npm - llm-cli-gateway - Versions diffs - 1.4.0 → 1.5.13 - Mend

llm-cli-gateway 1.4.0 → 1.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/CHANGELOG.md +135 -1
package/README.md +358 -15
package/dist/approval-manager.d.ts +1 -1
package/dist/async-job-manager.d.ts +32 -2
package/dist/async-job-manager.js +101 -16
package/dist/auth.d.ts +15 -0
package/dist/auth.js +46 -0
package/dist/cli-updater.d.ts +19 -2
package/dist/cli-updater.js +110 -7
package/dist/codex-json-parser.d.ts +34 -0
package/dist/codex-json-parser.js +105 -0
package/dist/config.d.ts +30 -0
package/dist/config.js +167 -0
package/dist/doctor.d.ts +110 -0
package/dist/doctor.js +280 -0
package/dist/endpoint-exposure.d.ts +22 -0
package/dist/endpoint-exposure.js +231 -0
package/dist/entrypoint-url.d.ts +1 -0
package/dist/entrypoint-url.js +5 -0
package/dist/executor.d.ts +9 -1
package/dist/executor.js +52 -17
package/dist/flight-recorder.d.ts +3 -1
package/dist/flight-recorder.js +31 -2
package/dist/gateway-server.d.ts +2 -0
package/dist/gateway-server.js +1 -0
package/dist/gemini-json-parser.d.ts +21 -0
package/dist/gemini-json-parser.js +47 -0
package/dist/health.d.ts +7 -0
package/dist/health.js +22 -0
package/dist/http-transport.d.ts +22 -0
package/dist/http-transport.js +164 -0
package/dist/index.d.ts +186 -2
package/dist/index.js +2761 -1454
package/dist/job-store.d.ts +118 -2
package/dist/job-store.js +176 -5
package/dist/logger.d.ts +9 -0
package/dist/logger.js +14 -0
package/dist/model-registry.js +40 -6
package/dist/provider-login-guidance.d.ts +21 -0
package/dist/provider-login-guidance.js +98 -0
package/dist/provider-status.d.ts +41 -0
package/dist/provider-status.js +203 -0
package/dist/request-helpers.d.ts +484 -4
package/dist/request-helpers.js +613 -0
package/dist/resources.js +44 -0
package/dist/session-manager-pg.js +1 -0
package/dist/session-manager.d.ts +1 -1
package/dist/session-manager.js +2 -1
package/dist/upstream-contracts.d.ts +62 -0
package/dist/upstream-contracts.js +620 -0
package/dist/validation-normalizer.d.ts +23 -0
package/dist/validation-normalizer.js +79 -0
package/dist/validation-orchestrator.d.ts +47 -0
package/dist/validation-orchestrator.js +145 -0
package/dist/validation-prompts.d.ts +15 -0
package/dist/validation-prompts.js +52 -0
package/dist/validation-report.d.ts +57 -0
package/dist/validation-report.js +129 -0
package/dist/validation-tools.d.ts +7 -0
package/dist/validation-tools.js +198 -0
package/package.json +25 -10
package/setup/status.schema.json +271 -0

package/dist/index.js CHANGED Viewed

@@ -2,32 +2,42 @@
 import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 import { randomUUID } from "crypto";
-import { readFileSync, readdirSync, realpathSync } from "fs";
+import { readFileSync, readdirSync } from "fs";
 import { dirname, join } from "path";
 import { fileURLToPath } from "url";
 import { z } from "zod";
 import { executeCli, killAllProcessGroups } from "./executor.js";
 import { parseStreamJson } from "./stream-json-parser.js";
+import { parseCodexJsonStream } from "./codex-json-parser.js";
+import { parseGeminiJson } from "./gemini-json-parser.js";
 import { createSessionManager } from "./session-manager.js";
 import { ResourceProvider } from "./resources.js";
 import { PerformanceMetrics } from "./metrics.js";
 import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse as optimizeResponseText, } from "./optimizer.js";
-import { loadConfig } from "./config.js";
+import { loadConfig, loadPersistenceConfig } from "./config.js";
 import { checkHealth } from "./health.js";
 import { getCliInfo, resolveModelAlias } from "./model-registry.js";
 import { AsyncJobManager } from "./async-job-manager.js";
-import { JobStore, resolveJobStoreDbPath } from "./job-store.js";
+import { createJobStore } from "./job-store.js";
 import { ApprovalManager } from "./approval-manager.js";
 import { checkReviewIntegrity } from "./review-integrity.js";
 import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES, } from "./claude-mcp-config.js";
-import { resolveSessionResumeArgs, resolveGrokSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, GATEWAY_SESSION_PREFIX, } from "./request-helpers.js";
+import { resolveGrokSessionArgs, resolveMistralSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, prepareMistralRequest as buildMistralCliInvocation, MISTRAL_AGENT_MODES, GATEWAY_SESSION_PREFIX, resolveClaudePermissionFlags, resolveCodexSandboxFlags, CLAUDE_PERMISSION_MODES, GEMINI_APPROVAL_MODES, CODEX_SANDBOX_MODES, CODEX_ASK_FOR_APPROVAL_MODES, CLAUDE_EFFORT_LEVELS, prepareClaudeHighImpactFlags, validateClaudeAgentsMap, prepareCodexHighImpactFlags, prepareCodexForkRequest, CODEX_CONFIG_OVERRIDES_SCHEMA, prepareGeminiHighImpactFlags, prependGeminiAttachments, resolveGeminiSessionPlan, GEMINI_HIGH_IMPACT_PARAMS_SCHEMA, } from "./request-helpers.js";
 import { createFlightRecorder } from "./flight-recorder.js";
 import { getCliVersions, runCliUpgrade } from "./cli-updater.js";
+import { startHttpGateway } from "./http-transport.js";
+import { printDoctorJson } from "./doctor.js";
+import { registerValidationTools } from "./validation-tools.js";
+import { assertUpstreamCliArgs, assertUpstreamCliEnv, buildUpstreamContractReport, } from "./upstream-contracts.js";
+import { entrypointFileURL } from "./entrypoint-url.js";
 // Simple logger that writes to stderr (stdout is used for MCP protocol)
 const logger = {
     info: (message, ...args) => {
         console.error(`[INFO] ${new Date().toISOString()} - ${message}`, ...args);
     },
+    warn: (message, ...args) => {
+        console.error(`[WARN] ${new Date().toISOString()} - ${message}`, ...args);
+    },
     error: (message, ...args) => {
         console.error(`[ERROR] ${new Date().toISOString()} - ${message}`, ...args);
     },
@@ -90,48 +100,87 @@ const loadedSkills = loadSkills();
 // system prompt at connection time. Covers key patterns + pointers to L2 resources.
 const SERVER_INSTRUCTIONS = `llm-cli-gateway: Multi-LLM orchestration via MCP.
-Tools: claude_request, codex_request, gemini_request, grok_request (sync) | *_request_async (async)
+Tools: claude_request, codex_request, gemini_request, grok_request, mistral_request (sync) | *_request_async (async)
+Validation: validate_with_models, second_opinion, compare_answers, red_team_review, consensus_check, ask_model, synthesize_validation
 Jobs: llm_job_status, llm_job_result, llm_job_cancel
 Sessions: session_create, session_list, session_set_active, session_get, session_delete, session_clear_all
-Other: list_models, cli_versions, cli_upgrade, approval_list, llm_process_health
+Other: list_models, cli_versions, upstream_contracts, cli_upgrade, approval_list, llm_process_health
 Key behaviors:
 - Sync auto-defers at ${SYNC_DEADLINE_MS}ms. Poll deferred jobs via llm_job_status/llm_job_result.
-- Sessions: Claude --continue, Gemini --resume, Grok --resume/--continue, Codex \`exec resume <ID>\` / \`exec resume --last\` (all real CLI continuity). For Codex, sessionId must be a real Codex UUID (from ~/.codex/sessions/); gateway-generated gw-* IDs are rejected.
+- Sessions: Claude --continue, Gemini --resume, Grok --resume/--continue, Mistral --resume/--continue (requires session_logging.enabled=true in ~/.vibe/config.toml), Codex \`exec resume <ID>\` / \`exec resume --last\` (all real CLI continuity). For Codex, sessionId must be a real Codex UUID (from ~/.codex/sessions/); gateway-generated gw-* IDs are rejected.
 - Approval gates: opt-in via approvalStrategy:"mcp_managed".
 - Idle timeout kills stuck processes (default 10min, configurable via idleTimeoutMs).
 Skills (full docs via MCP resources):
 ${loadedSkills.map(s => `- skills://${s.name} — ${s.description}`).join("\n")}`;
-const server = new McpServer({ name: "llm-cli-gateway", version: "1.0.0" }, { instructions: SERVER_INSTRUCTIONS });
+function newGatewayMcpServer() {
+    return new McpServer({ name: "llm-cli-gateway", version: "1.0.0" }, { instructions: SERVER_INSTRUCTIONS });
+}
 // Global state (initialized asynchronously)
 let sessionManager;
 let db = null;
 const performanceMetrics = new PerformanceMetrics();
 let resourceProvider;
 const flightRecorder = createFlightRecorder(logger);
-// Durable job store: persists every async job to ~/.llm-cli-gateway/logs.db so callers
-// can collect results across long polling gaps and gateway restarts, and so repeated
-// identical requests dedup onto the running/completed job instead of starting over.
+// Resolved persistence config — single source of truth for the async-job backend.
+// Driven by ~/.llm-cli-gateway/config.toml (+ deprecated env-var overrides).
+// When backend = "none", the JobStore is null AND *_request_async tools are not
+// registered (see createGatewayServer), making silent in-memory loss
+// structurally impossible.
+const persistenceConfig = loadPersistenceConfig(logger);
 const jobStore = (() => {
-    const dbPath = resolveJobStoreDbPath();
-    if (!dbPath) {
-        logger.info("Durable job store disabled (LLM_GATEWAY_LOGS_DB=none)");
-        return null;
-    }
     try {
-        return new JobStore(dbPath, logger);
+        return createJobStore(persistenceConfig, logger);
     }
     catch (err) {
-        logger.error("Failed to open durable job store; continuing in-memory only", err);
+        logger.error("Failed to open durable job store; async tools will be unavailable", err);
         return null;
     }
 })();
-const asyncJobManager = new AsyncJobManager(logger, (cli, durationMs, success) => {
-    performanceMetrics.recordRequest(cli, durationMs, success);
-}, jobStore);
+function newAsyncJobManager(metrics, runtimeLogger, store = jobStore) {
+    return new AsyncJobManager(runtimeLogger, (cli, durationMs, success) => {
+        metrics.recordRequest(cli, durationMs, success);
+    }, store);
+}
+const asyncJobManager = newAsyncJobManager(performanceMetrics, logger);
 const approvalManager = new ApprovalManager(undefined, logger);
 const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
+// U22: Session-provider enum extended to five providers. The storage layer's
+// CLI_TYPES already includes "mistral"; the MCP-tool layer mirrors that here so
+// session_create / session_list / session_clear_all accept the fifth provider.
+export const SESSION_PROVIDER_VALUES = ["claude", "codex", "gemini", "grok", "mistral"];
+export const SESSION_PROVIDER_ENUM = z.enum(SESSION_PROVIDER_VALUES);
+let activeServer = null;
+let activeHttpGateway = null;
+function resolveGatewayServerRuntime(deps = {}, options = {}) {
+    const runtimeLogger = deps.logger ?? logger;
+    const runtimeSessionManager = deps.sessionManager ?? sessionManager;
+    const runtimePerformanceMetrics = deps.performanceMetrics ??
+        (options.isolateState ? new PerformanceMetrics() : performanceMetrics);
+    const runtimeAsyncJobManager = deps.asyncJobManager ??
+        (options.isolateState
+            ? // Factory-created test/HTTP session servers must not mark another instance's
+                // durable jobs orphaned. Stdio startup injects the process-global manager.
+                newAsyncJobManager(runtimePerformanceMetrics, runtimeLogger, null)
+            : asyncJobManager);
+    const runtimeApprovalManager = deps.approvalManager ??
+        (options.isolateState ? new ApprovalManager(undefined, runtimeLogger) : approvalManager);
+    return {
+        sessionManager: runtimeSessionManager,
+        resourceProvider: deps.resourceProvider ??
+            (options.isolateState
+                ? new ResourceProvider(runtimeSessionManager, runtimePerformanceMetrics)
+                : resourceProvider),
+        db: "db" in deps ? (deps.db ?? null) : db,
+        performanceMetrics: runtimePerformanceMetrics,
+        asyncJobManager: runtimeAsyncJobManager,
+        approvalManager: runtimeApprovalManager,
+        flightRecorder: deps.flightRecorder ?? flightRecorder,
+        logger: runtimeLogger,
+        persistence: deps.persistence ?? persistenceConfig,
+    };
+}
 // Per-CLI idle timeouts: kill process if no stdout/stderr activity for this duration.
 // Claude idle timeout only applies in stream-json mode (with --include-partial-messages).
 // In text/json mode, Claude produces no output until done, so idle timeout would false-positive.
@@ -140,6 +189,7 @@ const CLI_IDLE_TIMEOUTS = {
     codex: 600_000, // 10 minutes — Codex streams stderr progress
     gemini: 600_000, // 10 minutes — Gemini streams stdout in real-time
     grok: 600_000, // 10 minutes — Grok streams stderr/stdout activity in headless mode
+    mistral: 600_000, // 10 minutes — Vibe streams stdout/stderr in headless mode
 };
 function resolveIdleTimeout(cli, override) {
     if (override !== undefined)
@@ -151,40 +201,91 @@ const SYNC_POLL_INTERVAL_MS = 1_000;
  * Start an async job and poll until completion or deadline.
  * Returns the job result if it finishes in time, or a deferral marker.
  */
-async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh) {
+async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete) {
+    // U26 fix: ownership of onComplete is a contract. Once this function returns
+    // OR throws, the caller MUST consider onComplete consumed — i.e. it has
+    // either been run, or the AsyncJobManager has taken ownership of it. The
+    // caller never needs to reclaim.
+    let onCompleteOwnedByCaller = onComplete !== undefined;
+    const consumeOnComplete = () => {
+        if (!onCompleteOwnedByCaller || !onComplete)
+            return;
+        onCompleteOwnedByCaller = false;
+        try {
+            onComplete();
+        }
+        catch (err) {
+            runtime.logger.error(`awaitJobOrDefer onComplete (${cli}) threw`, err);
+        }
+    };
+    try {
+        assertUpstreamCliArgs(cli, args);
+        assertUpstreamCliEnv(cli, env);
+    }
+    catch (err) {
+        consumeOnComplete();
+        throw err;
+    }
     if (SYNC_DEADLINE_MS === 0) {
         // Disabled — fall through to direct execution.
         // Note: direct execution bypasses dedup. forceRefresh is implied.
-        return executeCli(cli, args, { idleTimeout: idleTimeoutMs, logger });
+        const command = cli === "mistral" ? "vibe" : cli;
+        try {
+            return await executeCli(command, args, {
+                idleTimeout: idleTimeoutMs,
+                logger: runtime.logger,
+                env: env ? { ...process.env, ...env } : undefined,
+            });
+        }
+        finally {
+            // Direct-execution path completes inline; release per-request resources
+            // (e.g. outputSchema temp files) here.
+            consumeOnComplete();
+        }
+    }
+    let outcome;
+    try {
+        outcome = runtime.asyncJobManager.startJobWithDedup(cli, args, corrId, {
+            idleTimeoutMs,
+            outputFormat,
+            forceRefresh,
+            env,
+            onComplete,
+        });
+        // Handoff succeeded: AsyncJobManager owns onComplete (it'll fire via
+        // fireOnComplete on terminal status, or run inline immediately for dedup).
+        onCompleteOwnedByCaller = false;
+    }
+    catch (err) {
+        // Spawn or pre-spawn failure inside AsyncJobManager. The record was never
+        // registered, so onComplete will never be called by the manager. Reclaim
+        // here so the temp file is not leaked.
+        consumeOnComplete();
+        throw err;
     }
-    const outcome = asyncJobManager.startJobWithDedup(cli, args, corrId, {
-        idleTimeoutMs,
-        outputFormat,
-        forceRefresh,
-    });
     const job = outcome.snapshot;
     if (outcome.deduped) {
-        logger.info(`[${corrId}] sync request deduped onto running job ${job.id} (original corrId=${outcome.originalCorrelationId})`);
+        runtime.logger.info(`[${corrId}] sync request deduped onto running job ${job.id} (original corrId=${outcome.originalCorrelationId})`);
     }
     const deadline = Date.now() + SYNC_DEADLINE_MS;
     while (Date.now() < deadline) {
-        const snapshot = asyncJobManager.getJobSnapshot(job.id);
+        const snapshot = runtime.asyncJobManager.getJobSnapshot(job.id);
         if (snapshot && snapshot.status !== "running") {
             // Job finished within deadline — extract result
-            const result = asyncJobManager.getJobResult(job.id);
+            const result = runtime.asyncJobManager.getJobResult(job.id);
             if (!result) {
                 return { stdout: "", stderr: "Job result unavailable", code: 1 };
             }
             return {
                 stdout: result.stdout,
-                stderr: result.stderr,
+                stderr: result.stderr || result.error || "",
                 code: result.exitCode ?? 1,
             };
         }
         await new Promise(resolve => setTimeout(resolve, SYNC_POLL_INTERVAL_MS));
     }
     // Deadline exceeded — return deferral
-    logger.info(`[${corrId}] ${cli} sync deadline exceeded (${SYNC_DEADLINE_MS}ms), deferring to async job ${job.id}`);
+    runtime.logger.info(`[${corrId}] ${cli} sync deadline exceeded (${SYNC_DEADLINE_MS}ms), deferring to async job ${job.id}`);
     return {
         deferred: true,
         jobId: job.id,
@@ -262,28 +363,60 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
 function extractUsageAndCost(cli, output, outputFormat) {
     if (cli === "claude" && outputFormat === "stream-json") {
         const parsed = parseStreamJson(output);
+        if (!parsed.usage) {
+            return { costUsd: parsed.costUsd ?? undefined };
+        }
         return {
-            inputTokens: parsed.usage?.inputTokens,
-            outputTokens: parsed.usage?.outputTokens,
+            inputTokens: parsed.usage.inputTokens,
+            outputTokens: parsed.usage.outputTokens,
+            cacheReadTokens: parsed.usage.cacheReadInputTokens || undefined,
+            cacheCreationTokens: parsed.usage.cacheCreationInputTokens || undefined,
             costUsd: parsed.costUsd ?? undefined,
         };
     }
+    if (cli === "codex" && outputFormat === "json") {
+        const parsed = parseCodexJsonStream(output);
+        if (!parsed.usage) {
+            return {};
+        }
+        return {
+            inputTokens: parsed.usage.input_tokens,
+            outputTokens: parsed.usage.output_tokens,
+            cacheReadTokens: parsed.usage.cache_read_tokens,
+            cacheCreationTokens: parsed.usage.cache_creation_tokens,
+            costUsd: parsed.usage.cost_usd,
+        };
+    }
+    if (cli === "gemini" && outputFormat === "json") {
+        const parsed = parseGeminiJson(output);
+        if (!parsed || !parsed.usage) {
+            return {};
+        }
+        return {
+            inputTokens: parsed.usage.input_tokens,
+            outputTokens: parsed.usage.output_tokens,
+            cacheReadTokens: parsed.usage.cache_read_tokens,
+        };
+    }
+    // Mistral/Vibe: does not surface usage in its stdout/stream-json output. A
+    // future unit can read it from `~/.vibe/logs/session/<id>/metadata.json`
+    // once we resolve the session id post-run.
     return {};
 }
-function safeFlightStart(entry) {
+function safeFlightStart(entry, runtime = resolveGatewayServerRuntime()) {
     try {
-        flightRecorder.logStart(entry);
+        runtime.flightRecorder.logStart(entry);
     }
     catch (error) {
-        logger.error("Flight recorder logStart failed", error);
+        runtime.logger.error("Flight recorder logStart failed", error);
     }
 }
-function safeFlightComplete(correlationId, result) {
+function safeFlightComplete(correlationId, result, runtime = resolveGatewayServerRuntime()) {
     try {
-        flightRecorder.logComplete(correlationId, result);
+        runtime.flightRecorder.logComplete(correlationId, result);
     }
     catch (error) {
-        logger.error("Flight recorder logComplete failed", error);
+        runtime.logger.error("Flight recorder logComplete failed", error);
     }
 }
 function createApprovalDeniedResponse(operation, decision) {
@@ -350,124 +483,146 @@ function resolveClaudeMcpConfig(operation, correlationId, requestedMcpServers, s
 //──────────────────────────────────────────────────────────────────────────────
 // MCP Resources
 //──────────────────────────────────────────────────────────────────────────────
-// Register skill resources (L2: full docs, read on demand)
-for (const skill of loadedSkills) {
-    server.registerResource(`skill-${skill.name}`, `skills://${skill.name}`, {
-        title: skill.name,
-        description: skill.description,
-        mimeType: "text/markdown",
-    }, async () => ({
-        contents: [
-            {
-                uri: `skills://${skill.name}`,
-                mimeType: "text/markdown",
-                text: skill.content,
-            },
-        ],
-    }));
+function registerBaseResources(server, runtime) {
+    // Register skill resources (L2: full docs, read on demand)
+    for (const skill of loadedSkills) {
+        server.registerResource(`skill-${skill.name}`, `skills://${skill.name}`, {
+            title: skill.name,
+            description: skill.description,
+            mimeType: "text/markdown",
+        }, async () => ({
+            contents: [
+                {
+                    uri: `skills://${skill.name}`,
+                    mimeType: "text/markdown",
+                    text: skill.content,
+                },
+            ],
+        }));
+    }
+    runtime.logger.info(`Registered ${loadedSkills.length} skill resources`);
+    // Register all sessions resource
+    server.registerResource("all-sessions", "sessions://all", {
+        title: "📋 All Sessions",
+        description: "All conversation sessions across CLIs",
+        mimeType: "application/json",
+    }, async (uri) => {
+        runtime.logger.debug("Reading all sessions resource");
+        const contents = await runtime.resourceProvider.readResource(uri.href);
+        return { contents: contents ? [contents] : [] };
+    });
+    // Register Claude sessions resource
+    server.registerResource("claude-sessions", "sessions://claude", {
+        title: "🤖 Claude Sessions",
+        description: "Claude conversation sessions",
+        mimeType: "application/json",
+    }, async (uri) => {
+        runtime.logger.debug("Reading Claude sessions resource");
+        const contents = await runtime.resourceProvider.readResource(uri.href);
+        return { contents: contents ? [contents] : [] };
+    });
+    // Register Codex sessions resource
+    server.registerResource("codex-sessions", "sessions://codex", {
+        title: "💻 Codex Sessions",
+        description: "Codex conversation sessions",
+        mimeType: "application/json",
+    }, async (uri) => {
+        runtime.logger.debug("Reading Codex sessions resource");
+        const contents = await runtime.resourceProvider.readResource(uri.href);
+        return { contents: contents ? [contents] : [] };
+    });
+    // Register Gemini sessions resource
+    server.registerResource("gemini-sessions", "sessions://gemini", {
+        title: "✨ Gemini Sessions",
+        description: "Gemini conversation sessions",
+        mimeType: "application/json",
+    }, async (uri) => {
+        runtime.logger.debug("Reading Gemini sessions resource");
+        const contents = await runtime.resourceProvider.readResource(uri.href);
+        return { contents: contents ? [contents] : [] };
+    });
+    // Register Grok sessions resource
+    server.registerResource("grok-sessions", "sessions://grok", {
+        title: "⚡ Grok Sessions",
+        description: "Grok conversation sessions",
+        mimeType: "application/json",
+    }, async (uri) => {
+        runtime.logger.debug("Reading Grok sessions resource");
+        const contents = await runtime.resourceProvider.readResource(uri.href);
+        return { contents: contents ? [contents] : [] };
+    });
+    // Register Mistral sessions resource
+    server.registerResource("mistral-sessions", "sessions://mistral", {
+        title: "🌬 Mistral Sessions",
+        description: "Mistral Vibe conversation sessions",
+        mimeType: "application/json",
+    }, async (uri) => {
+        runtime.logger.debug("Reading Mistral sessions resource");
+        const contents = await runtime.resourceProvider.readResource(uri.href);
+        return { contents: contents ? [contents] : [] };
+    });
+    // Register Claude models resource
+    server.registerResource("claude-models", "models://claude", {
+        title: "🧠 Claude Models",
+        description: "Claude models and capabilities",
+        mimeType: "application/json",
+    }, async (uri) => {
+        runtime.logger.debug("Reading Claude models resource");
+        const contents = await runtime.resourceProvider.readResource(uri.href);
+        return { contents: contents ? [contents] : [] };
+    });
+    // Register Codex models resource
+    server.registerResource("codex-models", "models://codex", {
+        title: "🔧 Codex Models",
+        description: "Codex models and capabilities",
+        mimeType: "application/json",
+    }, async (uri) => {
+        runtime.logger.debug("Reading Codex models resource");
+        const contents = await runtime.resourceProvider.readResource(uri.href);
+        return { contents: contents ? [contents] : [] };
+    });
+    // Register Gemini models resource
+    server.registerResource("gemini-models", "models://gemini", {
+        title: "🌟 Gemini Models",
+        description: "Gemini models and capabilities",
+        mimeType: "application/json",
+    }, async (uri) => {
+        runtime.logger.debug("Reading Gemini models resource");
+        const contents = await runtime.resourceProvider.readResource(uri.href);
+        return { contents: contents ? [contents] : [] };
+    });
+    // Register Grok models resource
+    server.registerResource("grok-models", "models://grok", {
+        title: "⚡ Grok Models",
+        description: "Grok models and capabilities",
+        mimeType: "application/json",
+    }, async (uri) => {
+        runtime.logger.debug("Reading Grok models resource");
+        const contents = await runtime.resourceProvider.readResource(uri.href);
+        return { contents: contents ? [contents] : [] };
+    });
+    // Register Mistral models resource
+    server.registerResource("mistral-models", "models://mistral", {
+        title: "🌬 Mistral Models",
+        description: "Mistral Vibe models and capabilities",
+        mimeType: "application/json",
+    }, async (uri) => {
+        runtime.logger.debug("Reading Mistral models resource");
+        const contents = await runtime.resourceProvider.readResource(uri.href);
+        return { contents: contents ? [contents] : [] };
+    });
+    // Register performance metrics resource
+    server.registerResource("performance-metrics", "metrics://performance", {
+        title: "📈 Performance Metrics",
+        description: "Request counts, latency, success/failure rates",
+        mimeType: "application/json",
+    }, async (uri) => {
+        runtime.logger.debug("Reading performance metrics resource");
+        const contents = await runtime.resourceProvider.readResource(uri.href);
+        return { contents: contents ? [contents] : [] };
+    });
 }
-logger.info(`Registered ${loadedSkills.length} skill resources`);
-// Register all sessions resource
-server.registerResource("all-sessions", "sessions://all", {
-    title: "📋 All Sessions",
-    description: "All conversation sessions across CLIs",
-    mimeType: "application/json",
-}, async (uri) => {
-    logger.debug("Reading all sessions resource");
-    const contents = await resourceProvider.readResource(uri.href);
-    return { contents: contents ? [contents] : [] };
-});
-// Register Claude sessions resource
-server.registerResource("claude-sessions", "sessions://claude", {
-    title: "🤖 Claude Sessions",
-    description: "Claude conversation sessions",
-    mimeType: "application/json",
-}, async (uri) => {
-    logger.debug("Reading Claude sessions resource");
-    const contents = await resourceProvider.readResource(uri.href);
-    return { contents: contents ? [contents] : [] };
-});
-// Register Codex sessions resource
-server.registerResource("codex-sessions", "sessions://codex", {
-    title: "💻 Codex Sessions",
-    description: "Codex conversation sessions",
-    mimeType: "application/json",
-}, async (uri) => {
-    logger.debug("Reading Codex sessions resource");
-    const contents = await resourceProvider.readResource(uri.href);
-    return { contents: contents ? [contents] : [] };
-});
-// Register Gemini sessions resource
-server.registerResource("gemini-sessions", "sessions://gemini", {
-    title: "✨ Gemini Sessions",
-    description: "Gemini conversation sessions",
-    mimeType: "application/json",
-}, async (uri) => {
-    logger.debug("Reading Gemini sessions resource");
-    const contents = await resourceProvider.readResource(uri.href);
-    return { contents: contents ? [contents] : [] };
-});
-// Register Grok sessions resource
-server.registerResource("grok-sessions", "sessions://grok", {
-    title: "⚡ Grok Sessions",
-    description: "Grok conversation sessions",
-    mimeType: "application/json",
-}, async (uri) => {
-    logger.debug("Reading Grok sessions resource");
-    const contents = await resourceProvider.readResource(uri.href);
-    return { contents: contents ? [contents] : [] };
-});
-// Register Claude models resource
-server.registerResource("claude-models", "models://claude", {
-    title: "🧠 Claude Models",
-    description: "Claude models and capabilities",
-    mimeType: "application/json",
-}, async (uri) => {
-    logger.debug("Reading Claude models resource");
-    const contents = await resourceProvider.readResource(uri.href);
-    return { contents: contents ? [contents] : [] };
-});
-// Register Codex models resource
-server.registerResource("codex-models", "models://codex", {
-    title: "🔧 Codex Models",
-    description: "Codex models and capabilities",
-    mimeType: "application/json",
-}, async (uri) => {
-    logger.debug("Reading Codex models resource");
-    const contents = await resourceProvider.readResource(uri.href);
-    return { contents: contents ? [contents] : [] };
-});
-// Register Gemini models resource
-server.registerResource("gemini-models", "models://gemini", {
-    title: "🌟 Gemini Models",
-    description: "Gemini models and capabilities",
-    mimeType: "application/json",
-}, async (uri) => {
-    logger.debug("Reading Gemini models resource");
-    const contents = await resourceProvider.readResource(uri.href);
-    return { contents: contents ? [contents] : [] };
-});
-// Register Grok models resource
-server.registerResource("grok-models", "models://grok", {
-    title: "⚡ Grok Models",
-    description: "Grok models and capabilities",
-    mimeType: "application/json",
-}, async (uri) => {
-    logger.debug("Reading Grok models resource");
-    const contents = await resourceProvider.readResource(uri.href);
-    return { contents: contents ? [contents] : [] };
-});
-// Register performance metrics resource
-server.registerResource("performance-metrics", "metrics://performance", {
-    title: "📈 Performance Metrics",
-    description: "Request counts, latency, success/failure rates",
-    mimeType: "application/json",
-}, async (uri) => {
-    logger.debug("Reading performance metrics resource");
-    const contents = await resourceProvider.readResource(uri.href);
-    return { contents: contents ? [contents] : [] };
-});
-function prepareClaudeRequest(params) {
+export function prepareClaudeRequest(params, runtime = resolveGatewayServerRuntime()) {
     const corrId = params.correlationId || randomUUID();
     const cliInfo = getCliInfo();
     const resolvedModel = resolveModelAlias("claude", params.model, cliInfo);
@@ -478,7 +633,7 @@ function prepareClaudeRequest(params) {
         disallowedTools: params.disallowedTools,
     });
     if (reviewIntegrity.violations.length > 0) {
-        logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
+        runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
             cli: "claude",
             operation: params.operation,
             score: reviewIntegrity.totalScore,
@@ -498,7 +653,7 @@ function prepareClaudeRequest(params) {
     const mcpConfig = mcpConfigResolution.config;
     let approvalDecision = null;
     if (params.approvalStrategy === "mcp_managed") {
-        approvalDecision = approvalManager.decide({
+        approvalDecision = runtime.approvalManager.decide({
             cli: "claude",
             operation: params.operation,
             prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
@@ -535,8 +690,15 @@ function prepareClaudeRequest(params) {
     if (params.approvalStrategy === "mcp_managed") {
         args.push("--permission-mode", "bypassPermissions");
     }
-    else if (params.dangerouslySkipPermissions) {
-        args.push("--permission-mode", "bypassPermissions");
+    else {
+        const permFlags = resolveClaudePermissionFlags({
+            permissionMode: params.permissionMode,
+            dangerouslySkipPermissions: params.dangerouslySkipPermissions,
+        });
+        if (permFlags.warning) {
+            runtime.logger.warn(`[${corrId}] ${permFlags.warning}`);
+        }
+        args.push(...permFlags.args);
     }
     if (params.strictMcpConfig || mcpConfig.enabled.length > 0) {
         args.push("--mcp-config", mcpConfig.path);
@@ -544,6 +706,26 @@ function prepareClaudeRequest(params) {
             args.push("--strict-mcp-config");
         }
     }
+    // U25: Claude high-impact features (agent, agents, fork, system-prompt, budget, effort, …)
+    let validatedAgents;
+    if (params.agents && Object.keys(params.agents).length > 0) {
+        const result = validateClaudeAgentsMap(params.agents);
+        if (!result.ok) {
+            return createErrorResponse("claude", 1, "", corrId, new Error(result.message));
+        }
+        validatedAgents = result.value;
+    }
+    args.push(...prepareClaudeHighImpactFlags({
+        agent: params.agent,
+        agents: validatedAgents,
+        forkSession: params.forkSession,
+        systemPrompt: params.systemPrompt,
+        appendSystemPrompt: params.appendSystemPrompt,
+        maxBudgetUsd: params.maxBudgetUsd,
+        maxTurns: params.maxTurns,
+        effort: params.effort,
+        excludeDynamicSystemPromptSections: params.excludeDynamicSystemPromptSections,
+    }));
     return {
         corrId,
         effectivePrompt,
@@ -555,14 +737,14 @@ function prepareClaudeRequest(params) {
         args,
     };
 }
-function prepareCodexRequest(params) {
+export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntime()) {
     const corrId = params.correlationId || randomUUID();
     const cliInfo = getCliInfo();
     const resolvedModel = resolveModelAlias("codex", params.model, cliInfo);
     // Review integrity check on raw prompt (before optimization)
     const reviewIntegrity = checkReviewIntegrity({ prompt: params.prompt });
     if (reviewIntegrity.violations.length > 0) {
-        logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
+        runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
             cli: "codex",
             operation: params.operation,
             score: reviewIntegrity.totalScore,
@@ -577,7 +759,7 @@ function prepareCodexRequest(params) {
     const requestedMcpServers = normalizeMcpServers(params.mcpServers);
     let approvalDecision = null;
     if (params.approvalStrategy === "mcp_managed") {
-        approvalDecision = approvalManager.decide({
+        approvalDecision = runtime.approvalManager.decide({
             cli: "codex",
             operation: params.operation,
             prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
@@ -615,13 +797,73 @@ function prepareCodexRequest(params) {
     }
     if (resolvedModel)
         args.push("--model", resolvedModel);
-    if (sessionPlan.mode === "new" && params.fullAuto) {
-        args.push("--full-auto");
+    // Codex sandbox / approval: resolve modern flags + legacy fullAuto shorthand.
+    // `codex exec resume` rejects all of these (the original session's policy is
+    // inherited), so we only emit them when starting a NEW session.
+    if (sessionPlan.mode === "new") {
+        const sandboxFlags = resolveCodexSandboxFlags({
+            sandboxMode: params.sandboxMode,
+            askForApproval: params.askForApproval,
+            fullAuto: params.fullAuto,
+            useLegacyFullAutoFlag: params.useLegacyFullAutoFlag,
+        });
+        if (sandboxFlags.warning) {
+            runtime.logger.warn(`[${corrId}] ${sandboxFlags.warning}`);
+        }
+        args.push(...sandboxFlags.args);
     }
     if (params.dangerouslyBypassApprovalsAndSandbox) {
         args.push("--dangerously-bypass-approvals-and-sandbox");
     }
+    // U23 fix: emit `--json` when the caller asked for JSON output so the
+    // codex-json-parser actually receives JSONL events. This is what makes
+    // extractUsageAndCost() reachable from the tool surface; without it, the
+    // U23 parser is dead code.
+    if (params.outputFormat === "json") {
+        args.push("--json");
+    }
     args.push("--skip-git-repo-check");
+    // U26: High-impact feature flags. Some of these (`--output-schema`,
+    // `--search`, `-C`, `--add-dir`) are rejected by `codex exec resume`, so we
+    // only emit them on a NEW session. Images / ephemeral / profile /
+    // ignore-rules / ignore-user-config are allowed on resume per the audited
+    // CLI help; we emit them in both branches.
+    let highImpactCleanup;
+    if (sessionPlan.mode === "new") {
+        const high = prepareCodexHighImpactFlags({
+            outputSchema: params.outputSchema,
+            search: params.search,
+            profile: params.profile,
+            configOverrides: params.configOverrides,
+            ephemeral: params.ephemeral,
+            images: params.images,
+            ignoreUserConfig: params.ignoreUserConfig,
+            ignoreRules: params.ignoreRules,
+        });
+        if (high.missingImagePath) {
+            return createErrorResponse(params.operation, 1, "", corrId, new Error(`images: path does not exist: ${high.missingImagePath}`));
+        }
+        args.push(...high.args);
+        highImpactCleanup = high.cleanup;
+    }
+    else {
+        // On resume, emit only the resume-safe subset (profile, ephemeral,
+        // images, ignoreUserConfig, ignoreRules). outputSchema, search, and
+        // configOverrides are dropped silently to mirror existing behavior for
+        // sandbox/ask-for-approval on resume.
+        const high = prepareCodexHighImpactFlags({
+            profile: params.profile,
+            ephemeral: params.ephemeral,
+            images: params.images,
+            ignoreUserConfig: params.ignoreUserConfig,
+            ignoreRules: params.ignoreRules,
+        });
+        if (high.missingImagePath) {
+            return createErrorResponse(params.operation, 1, "", corrId, new Error(`images: path does not exist: ${high.missingImagePath}`));
+        }
+        args.push(...high.args);
+        highImpactCleanup = high.cleanup;
+    }
     if (sessionPlan.mode === "resume-by-id" && sessionPlan.sessionId) {
         args.push(sessionPlan.sessionId);
     }
@@ -634,9 +876,10 @@ function prepareCodexRequest(params) {
         approvalDecision,
         reviewIntegrity,
         args,
+        cleanup: highImpactCleanup,
     };
 }
-function prepareGeminiRequest(params) {
+export function prepareGeminiRequest(params, runtime = resolveGatewayServerRuntime()) {
     const corrId = params.correlationId || randomUUID();
     const cliInfo = getCliInfo();
     const resolvedModel = resolveModelAlias("gemini", params.model, cliInfo);
@@ -646,7 +889,7 @@ function prepareGeminiRequest(params) {
         allowedTools: params.allowedTools,
     });
     if (reviewIntegrity.violations.length > 0) {
-        logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
+        runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
             cli: "gemini",
             operation: params.operation,
             score: reviewIntegrity.totalScore,
@@ -661,7 +904,7 @@ function prepareGeminiRequest(params) {
     const requestedMcpServers = normalizeMcpServers(params.mcpServers);
     let approvalDecision = null;
     if (params.approvalStrategy === "mcp_managed") {
-        approvalDecision = approvalManager.decide({
+        approvalDecision = runtime.approvalManager.decide({
             cli: "gemini",
             operation: params.operation,
             prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
@@ -678,7 +921,29 @@ function prepareGeminiRequest(params) {
         }
     }
     const effectiveApprovalMode = params.approvalStrategy === "mcp_managed" ? "yolo" : params.approvalMode;
-    const args = [effectivePrompt];
+    // U27: Validate high-impact policy paths and prepend attachment tokens
+    // BEFORE the `-p` pair is emitted, preserving the U21 ordering invariant.
+    const highImpact = prepareGeminiHighImpactFlags({
+        sandbox: params.sandbox,
+        policyFiles: params.policyFiles,
+        adminPolicyFiles: params.adminPolicyFiles,
+    });
+    if (highImpact.missingPolicyPath) {
+        return createErrorResponse(params.operation, 1, "", corrId, new Error(`${highImpact.missingPolicyField}: path does not exist: ${highImpact.missingPolicyPath}`));
+    }
+    if (params.attachments && params.attachments.length > 0) {
+        try {
+            effectivePrompt = prependGeminiAttachments(effectivePrompt, params.attachments);
+        }
+        catch (err) {
+            return createErrorResponse(params.operation, 1, "", corrId, err instanceof Error ? err : new Error(String(err)));
+        }
+    }
+    // U21: Emit the prompt via -p/--prompt rather than as a positional argument.
+    // Positional prompts depend on Gemini's TTY/mode-detection heuristics; -p is
+    // the documented non-interactive flag and is robust against future CLI mode
+    // changes.
+    const args = ["-p", effectivePrompt];
     if (resolvedModel)
         args.push("--model", resolvedModel);
     if (effectiveApprovalMode)
@@ -695,6 +960,15 @@ function prepareGeminiRequest(params) {
         sanitizeCliArgValues(params.includeDirs, "includeDirs");
         params.includeDirs.forEach(dir => args.push("--include-directories", dir));
     }
+    // U27 high-impact flags (-s / --policy / --admin-policy) appended after the
+    // existing flag set so positional ordering relative to `-p` is preserved.
+    args.push(...highImpact.args);
+    // U23 fix: emit `-o json` when the caller asked for JSON output. The Gemini
+    // JSON parser is otherwise unreachable from the tool surface and the
+    // structured usageMetadata is silently dropped.
+    if (params.outputFormat === "json") {
+        args.push("-o", "json");
+    }
     return {
         corrId,
         effectivePrompt,
@@ -705,7 +979,7 @@ function prepareGeminiRequest(params) {
         args,
     };
 }
-function prepareGrokRequest(params) {
+function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime()) {
     const corrId = params.correlationId || randomUUID();
     const cliInfo = getCliInfo();
     const resolvedModel = resolveModelAlias("grok", params.model, cliInfo);
@@ -716,7 +990,7 @@ function prepareGrokRequest(params) {
         disallowedTools: params.disallowedTools,
     });
     if (reviewIntegrity.violations.length > 0) {
-        logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
+        runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
             cli: "grok",
             operation: params.operation,
             score: reviewIntegrity.totalScore,
@@ -731,7 +1005,7 @@ function prepareGrokRequest(params) {
     const requestedMcpServers = normalizeMcpServers(params.mcpServers);
     let approvalDecision = null;
     if (params.approvalStrategy === "mcp_managed") {
-        approvalDecision = approvalManager.decide({
+        approvalDecision = runtime.approvalManager.decide({
             cli: "grok",
             operation: params.operation,
             prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
@@ -780,6 +1054,78 @@ function prepareGrokRequest(params) {
         args,
     };
 }
+function prepareMistralRequest(params, runtime = resolveGatewayServerRuntime()) {
+    const corrId = params.correlationId || randomUUID();
+    const cliInfo = getCliInfo();
+    const resolvedModel = resolveModelAlias("mistral", params.model, cliInfo) || "devstral-medium";
+    const reviewIntegrity = checkReviewIntegrity({
+        prompt: params.prompt,
+        allowedTools: params.allowedTools,
+        disallowedTools: params.disallowedTools,
+    });
+    if (reviewIntegrity.violations.length > 0) {
+        runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
+            cli: "mistral",
+            operation: params.operation,
+            score: reviewIntegrity.totalScore,
+        });
+    }
+    let effectivePrompt = params.prompt;
+    if (params.optimizePrompt) {
+        const optimized = optimizePromptText(effectivePrompt);
+        logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
+        effectivePrompt = optimized;
+    }
+    const requestedMcpServers = normalizeMcpServers(params.mcpServers);
+    let approvalDecision = null;
+    if (params.approvalStrategy === "mcp_managed") {
+        approvalDecision = runtime.approvalManager.decide({
+            cli: "mistral",
+            operation: params.operation,
+            prompt: params.prompt,
+            bypassRequested: params.permissionMode === "auto-approve",
+            fullAuto: false,
+            requestedMcpServers,
+            allowedTools: params.allowedTools,
+            disallowedTools: params.disallowedTools,
+            policy: params.approvalPolicy,
+            metadata: { model: resolvedModel, vibeActiveModelEnv: true },
+            reviewIntegrity,
+        });
+        if (approvalDecision.status !== "approved") {
+            return createApprovalDeniedResponse(params.operation, approvalDecision);
+        }
+    }
+    // Under mcp_managed, force --agent auto-approve so the approval gate's
+    // verdict carries through to the CLI invocation (mirrors Grok's --always-approve
+    // forcing under mcp_managed).
+    const effectivePermissionMode = params.approvalStrategy === "mcp_managed"
+        ? "auto-approve"
+        : (params.permissionMode ?? "auto-approve");
+    const prep = buildMistralCliInvocation({
+        prompt: effectivePrompt,
+        resolvedModel,
+        outputFormat: params.outputFormat,
+        permissionMode: effectivePermissionMode,
+        effort: params.effort,
+        reasoningEffort: params.reasoningEffort,
+        allowedTools: params.allowedTools,
+        disallowedTools: params.disallowedTools,
+    });
+    if (prep.ignoredDisallowedTools) {
+        runtime.logger.info(`[${corrId}] Mistral does not support disallowedTools; ignoring (caller passed ${params.disallowedTools?.length ?? 0} entries)`);
+    }
+    return {
+        corrId,
+        effectivePrompt,
+        resolvedModel,
+        requestedMcpServers,
+        approvalDecision,
+        reviewIntegrity,
+        args: prep.args,
+        mistralEnv: prep.env,
+    };
+}
 function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat) {
     let finalStdout = stdout;
     // Skip response optimization for JSON output to prevent corrupting structured data
@@ -831,7 +1177,26 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
     }
     return response;
 }
+function resolveHandlerRuntime(deps) {
+    if (deps.runtime)
+        return deps.runtime;
+    const asyncDeps = deps;
+    // Older HandlerDeps callers may not provide `warn`; default-route to `info`.
+    const depLogger = deps.logger;
+    const normalizedLogger = {
+        info: depLogger.info,
+        warn: depLogger.warn ?? ((msg, ...rest) => depLogger.info(`[WARN] ${msg}`, ...rest)),
+        error: depLogger.error,
+        debug: depLogger.debug,
+    };
+    return resolveGatewayServerRuntime({
+        sessionManager: deps.sessionManager,
+        logger: normalizedLogger,
+        asyncJobManager: asyncDeps.asyncJobManager,
+    });
+}
 export async function handleGeminiRequest(deps, params) {
+    const runtime = resolveHandlerRuntime(deps);
     const startTime = Date.now();
     const prep = prepareGeminiRequest({
         prompt: params.prompt,
@@ -845,7 +1210,12 @@ export async function handleGeminiRequest(deps, params) {
         correlationId: params.correlationId,
         optimizePrompt: params.optimizePrompt,
         operation: "gemini_request",
-    });
+        outputFormat: params.outputFormat,
+        sandbox: params.sandbox,
+        policyFiles: params.policyFiles,
+        adminPolicyFiles: params.adminPolicyFiles,
+        attachments: params.attachments,
+    }, runtime);
     if (!("args" in prep))
         return prep;
     const { corrId, args } = prep;
@@ -857,20 +1227,24 @@ export async function handleGeminiRequest(deps, params) {
         model: prep.resolvedModel || "default",
         prompt: params.prompt,
         sessionId: params.sessionId,
-    });
+    }, runtime);
     deps.logger.info(`[${corrId}] gemini_request invoked with model=${prep.resolvedModel || "default"}, approvalMode=${params.approvalMode}, prompt length=${params.prompt.length}`);
     try {
-        // Session arg planning (pure, no I/O)
-        const sessionResult = resolveSessionResumeArgs({
+        // U27: Session arg planning. For fresh sessions, emit `--session-id <uuid>`
+        // so the gateway and Gemini agree on the session identifier from turn 1.
+        // For resume flows, fall back to `--resume <id>` (existing behavior).
+        const sessionPlan = resolveGeminiSessionPlan({
             sessionId: params.sessionId,
             resumeLatest: params.resumeLatest,
             createNewSession: params.createNewSession,
         });
-        args.push(...sessionResult.resumeArgs);
-        const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), undefined, params.forceRefresh);
+        args.push(...sessionPlan.args);
+        const userProvidedSession = sessionPlan.resumed;
+        const effectiveSessionIdHint = sessionPlan.emittedSessionId ?? params.sessionId;
+        const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime);
         // Deferred — job still running, return async reference
         if (isDeferredResponse(result)) {
-            return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
+            return buildDeferredToolResponse(result, effectiveSessionIdHint);
         }
         const { stdout, stderr, code } = result;
         durationMs = Math.max(0, Date.now() - startTime);
@@ -885,13 +1259,15 @@ export async function handleGeminiRequest(deps, params) {
                 exitCode: code,
                 errorMessage: stderr || `Exit code ${code}`,
                 status: "failed",
-            });
+            }, runtime);
             return createErrorResponse("gemini", code, stderr, corrId);
         }
         wasSuccessful = true;
-        // Post-success session I/O (sync handlers: no phantom sessions on CLI failure)
-        let effectiveSessionId = sessionResult.effectiveSessionId;
-        if (sessionResult.userProvidedSession && effectiveSessionId) {
+        // U27 Post-success session I/O. Mirror the gateway store 1:1 to whatever
+        // session id Gemini is using (either the user-supplied resume id or the
+        // deterministic --session-id we emitted).
+        let effectiveSessionId = effectiveSessionIdHint;
+        if (effectiveSessionId) {
             const existing = await deps.sessionManager.getSession(effectiveSessionId);
             if (!existing) {
                 try {
@@ -905,12 +1281,9 @@ export async function handleGeminiRequest(deps, params) {
             }
             await deps.sessionManager.updateSessionUsage(effectiveSessionId);
         }
-        else if (!params.createNewSession && !effectiveSessionId) {
-            const newSession = await deps.sessionManager.createSession("gemini", "Gemini Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
-            effectiveSessionId = newSession.id;
-        }
         deps.logger.info(`[${corrId}] gemini_request completed successfully in ${durationMs}ms`);
-        const response = buildCliResponse("gemini", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession);
+        const response = buildCliResponse("gemini", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, userProvidedSession, params.outputFormat);
+        const geminiUsage = extractUsageAndCost("gemini", stdout, params.outputFormat);
         safeFlightComplete(corrId, {
             response: stdout,
             durationMs,
@@ -920,7 +1293,12 @@ export async function handleGeminiRequest(deps, params) {
             optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
             exitCode: 0,
             status: "completed",
-        });
+            inputTokens: geminiUsage.inputTokens,
+            outputTokens: geminiUsage.outputTokens,
+            cacheReadTokens: geminiUsage.cacheReadTokens,
+            cacheCreationTokens: geminiUsage.cacheCreationTokens,
+            costUsd: geminiUsage.costUsd,
+        }, runtime);
         return response;
     }
     catch (error) {
@@ -935,15 +1313,16 @@ export async function handleGeminiRequest(deps, params) {
             exitCode: 1,
             errorMessage: error.message,
             status: "failed",
-        });
+        }, runtime);
         return createErrorResponse("gemini", 1, "", corrId, error);
     }
     finally {
         const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
-        performanceMetrics.recordRequest("gemini", finalizedDurationMs, wasSuccessful);
+        runtime.performanceMetrics.recordRequest("gemini", finalizedDurationMs, wasSuccessful);
     }
 }
 export async function handleGeminiRequestAsync(deps, params) {
+    const runtime = resolveHandlerRuntime(deps);
     const prep = prepareGeminiRequest({
         prompt: params.prompt,
         model: params.model,
@@ -956,21 +1335,26 @@ export async function handleGeminiRequestAsync(deps, params) {
         correlationId: params.correlationId,
         optimizePrompt: params.optimizePrompt,
         operation: "gemini_request_async",
-    });
+        outputFormat: params.outputFormat,
+        sandbox: params.sandbox,
+        policyFiles: params.policyFiles,
+        adminPolicyFiles: params.adminPolicyFiles,
+        attachments: params.attachments,
+    }, runtime);
     if (!("args" in prep))
         return prep;
     const { corrId, args, requestedMcpServers, approvalDecision } = prep;
     try {
-        // Session arg planning (pure, no I/O)
-        const sessionResult = resolveSessionResumeArgs({
+        // U27: Session arg planning with deterministic --session-id for fresh sessions.
+        const sessionPlan = resolveGeminiSessionPlan({
             sessionId: params.sessionId,
             resumeLatest: params.resumeLatest,
             createNewSession: params.createNewSession,
         });
-        args.push(...sessionResult.resumeArgs);
+        args.push(...sessionPlan.args);
         // Pre-start session I/O (async handlers: prevent orphaned jobs)
-        let effectiveSessionId = sessionResult.effectiveSessionId;
-        if (sessionResult.userProvidedSession && effectiveSessionId) {
+        let effectiveSessionId = sessionPlan.emittedSessionId ?? params.sessionId;
+        if (effectiveSessionId) {
             const existing = await deps.sessionManager.getSession(effectiveSessionId);
             if (!existing) {
                 try {
@@ -984,18 +1368,18 @@ export async function handleGeminiRequestAsync(deps, params) {
             }
             await deps.sessionManager.updateSessionUsage(effectiveSessionId);
         }
-        else if (!params.createNewSession && !effectiveSessionId) {
-            const newSession = await deps.sessionManager.createSession("gemini", "Gemini Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
-            effectiveSessionId = newSession.id;
-        }
-        // Start job only after all session I/O succeeds
-        const job = deps.asyncJobManager.startJob("gemini", args, corrId, undefined, resolveIdleTimeout("gemini", params.idleTimeoutMs), undefined, params.forceRefresh);
+        // Start job only after all session I/O succeeds. U23: forward outputFormat
+        // so AsyncJobManager records it in the durable store (the manager also
+        // surfaces it in the snapshot).
+        assertUpstreamCliArgs("gemini", args);
+        assertUpstreamCliEnv("gemini", undefined);
+        const job = deps.asyncJobManager.startJob("gemini", args, corrId, undefined, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh);
         deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
         const asyncResponse = {
             success: true,
             job,
             sessionId: effectiveSessionId || null,
-            resumable: sessionResult.userProvidedSession,
+            resumable: sessionPlan.resumed,
             approval: approvalDecision,
             mcpServers: { requested: requestedMcpServers },
         };
@@ -1016,6 +1400,7 @@ export async function handleGeminiRequestAsync(deps, params) {
     }
 }
 export async function handleGrokRequest(deps, params) {
+    const runtime = resolveHandlerRuntime(deps);
     const startTime = Date.now();
     const prep = prepareGrokRequest({
         prompt: params.prompt,
@@ -1033,7 +1418,7 @@ export async function handleGrokRequest(deps, params) {
         correlationId: params.correlationId,
         optimizePrompt: params.optimizePrompt,
         operation: "grok_request",
-    });
+    }, runtime);
     if (!("args" in prep))
         return prep;
     const { corrId, args } = prep;
@@ -1045,7 +1430,7 @@ export async function handleGrokRequest(deps, params) {
         model: prep.resolvedModel || "default",
         prompt: params.prompt,
         sessionId: params.sessionId,
-    });
+    }, runtime);
     deps.logger.info(`[${corrId}] grok_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode}, prompt length=${params.prompt.length}`);
     try {
         // Session arg planning (pure, no I/O)
@@ -1055,7 +1440,7 @@ export async function handleGrokRequest(deps, params) {
             createNewSession: params.createNewSession,
         });
         args.push(...sessionResult.resumeArgs);
-        const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh);
+        const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime);
         // Deferred — job still running, return async reference
         if (isDeferredResponse(result)) {
             return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
@@ -1073,7 +1458,7 @@ export async function handleGrokRequest(deps, params) {
                 exitCode: code,
                 errorMessage: stderr || `Exit code ${code}`,
                 status: "failed",
-            });
+            }, runtime);
             return createErrorResponse("grok", code, stderr, corrId);
         }
         wasSuccessful = true;
@@ -1108,7 +1493,7 @@ export async function handleGrokRequest(deps, params) {
             optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
             exitCode: 0,
             status: "completed",
-        });
+        }, runtime);
         return response;
     }
     catch (error) {
@@ -1123,15 +1508,16 @@ export async function handleGrokRequest(deps, params) {
             exitCode: 1,
             errorMessage: error.message,
             status: "failed",
-        });
+        }, runtime);
         return createErrorResponse("grok", 1, "", corrId, error);
     }
     finally {
         const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
-        performanceMetrics.recordRequest("grok", finalizedDurationMs, wasSuccessful);
+        runtime.performanceMetrics.recordRequest("grok", finalizedDurationMs, wasSuccessful);
     }
 }
 export async function handleGrokRequestAsync(deps, params) {
+    const runtime = resolveHandlerRuntime(deps);
     const prep = prepareGrokRequest({
         prompt: params.prompt,
         model: params.model,
@@ -1148,7 +1534,7 @@ export async function handleGrokRequestAsync(deps, params) {
         correlationId: params.correlationId,
         optimizePrompt: params.optimizePrompt,
         operation: "grok_request_async",
-    });
+    }, runtime);
     if (!("args" in prep))
         return prep;
     const { corrId, args, requestedMcpServers, approvalDecision } = prep;
@@ -1181,6 +1567,8 @@ export async function handleGrokRequestAsync(deps, params) {
             effectiveSessionId = newSession.id;
         }
         // Start job only after all session I/O succeeds
+        assertUpstreamCliArgs("grok", args);
+        assertUpstreamCliEnv("grok", undefined);
         const job = deps.asyncJobManager.startJob("grok", args, corrId, undefined, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh);
         deps.logger.info(`[${corrId}] grok_request_async started job ${job.id}`);
         const asyncResponse = {
@@ -1207,694 +1595,288 @@ export async function handleGrokRequestAsync(deps, params) {
         return createErrorResponse("grok_request_async", 1, "", corrId, error);
     }
 }
-export async function handleCodexRequestAsync(deps, params) {
-    const prep = prepareCodexRequest({
+export async function handleMistralRequest(deps, params) {
+    const runtime = resolveHandlerRuntime(deps);
+    const startTime = Date.now();
+    const prep = prepareMistralRequest({
         prompt: params.prompt,
         model: params.model,
-        fullAuto: params.fullAuto,
-        dangerouslyBypassApprovalsAndSandbox: params.dangerouslyBypassApprovalsAndSandbox,
+        outputFormat: params.outputFormat,
+        permissionMode: params.permissionMode,
+        effort: params.effort,
+        reasoningEffort: params.reasoningEffort,
+        allowedTools: params.allowedTools,
+        disallowedTools: params.disallowedTools,
         approvalStrategy: params.approvalStrategy,
         approvalPolicy: params.approvalPolicy,
         mcpServers: params.mcpServers,
-        sessionId: params.sessionId,
-        resumeLatest: params.resumeLatest,
-        createNewSession: params.createNewSession,
         correlationId: params.correlationId,
         optimizePrompt: params.optimizePrompt,
-        operation: "codex_request_async",
-    });
+        operation: "mistral_request",
+    }, runtime);
     if (!("args" in prep))
         return prep;
-    const { corrId, args, requestedMcpServers, approvalDecision } = prep;
+    const { corrId, args, mistralEnv } = prep;
+    let durationMs = 0;
+    let wasSuccessful = false;
+    safeFlightStart({
+        correlationId: corrId,
+        cli: "mistral",
+        model: prep.resolvedModel || "default",
+        prompt: params.prompt,
+        sessionId: params.sessionId,
+    }, runtime);
+    deps.logger.info(`[${corrId}] mistral_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode || "auto-approve"}, prompt length=${params.prompt.length}`);
     try {
-        // Pre-start session I/O (async handlers: prevent orphaned jobs)
-        let effectiveSessionId = params.sessionId;
-        if (!params.createNewSession && !params.sessionId) {
-            const activeSession = await deps.sessionManager.getActiveSession("codex");
-            if (activeSession) {
-                effectiveSessionId = activeSession.id;
-            }
-            else {
-                const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
-                effectiveSessionId = newSession.id;
-            }
-        }
-        else if (params.sessionId) {
-            await deps.sessionManager.updateSessionUsage(params.sessionId);
+        const sessionResult = resolveMistralSessionArgs({
+            sessionId: params.sessionId,
+            resumeLatest: params.resumeLatest,
+            createNewSession: params.createNewSession,
+        });
+        args.push(...sessionResult.resumeArgs);
+        const result = await awaitJobOrDefer("mistral", args, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, mistralEnv);
+        if (isDeferredResponse(result)) {
+            return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
         }
-        else if (params.createNewSession) {
-            const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
-            effectiveSessionId = newSession.id;
-        }
-        // Start job only after all session I/O succeeds
-        const job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs), undefined, params.forceRefresh);
-        deps.logger.info(`[${corrId}] codex_request_async started job ${job.id}`);
-        const asyncResponse = {
-            success: true,
-            job,
-            sessionId: effectiveSessionId || null,
-            approval: approvalDecision,
-            mcpServers: { requested: requestedMcpServers },
-        };
-        if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
-            asyncResponse.reviewIntegrity = prep.reviewIntegrity;
-        }
-        return {
-            content: [
-                {
-                    type: "text",
-                    text: JSON.stringify(asyncResponse, null, 2),
-                },
-            ],
-        };
-    }
-    catch (error) {
-        return createErrorResponse("codex_request_async", 1, "", corrId, error);
-    }
-}
-//──────────────────────────────────────────────────────────────────────────────
-// Claude Code Tool
-//──────────────────────────────────────────────────────────────────────────────
-server.tool("claude_request", {
-    prompt: z
-        .string()
-        .min(1, "Prompt cannot be empty")
-        .max(100000, "Prompt too long (max 100k chars)")
-        .describe("Prompt text for Claude"),
-    model: z
-        .string()
-        .optional()
-        .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
-    outputFormat: z
-        .enum(["text", "json", "stream-json"])
-        .default("text")
-        .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
-    sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
-    continueSession: z.boolean().default(false).describe("Continue active session"),
-    createNewSession: z.boolean().default(false).describe("Force new session"),
-    allowedTools: z
-        .array(z.string())
-        .optional()
-        .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
-    disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
-    dangerouslySkipPermissions: z
-        .boolean()
-        .default(false)
-        .describe("Bypass permissions (sandbox only)"),
-    approvalStrategy: z
-        .enum(["legacy", "mcp_managed"])
-        .default("legacy")
-        .describe("Approval strategy"),
-    approvalPolicy: z
-        .enum(["strict", "balanced", "permissive"])
-        .optional()
-        .describe("Approval policy override"),
-    mcpServers: z
-        .array(MCP_SERVER_ENUM)
-        .default(["sqry"])
-        .describe("MCP servers exposed to Claude"),
-    strictMcpConfig: z
-        .boolean()
-        .default(false)
-        .describe("Restrict Claude to provided MCP config only"),
-    correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
-    optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
-    optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
-    idleTimeoutMs: z
-        .number()
-        .int()
-        .min(30_000)
-        .max(3_600_000)
-        .optional()
-        .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
-    forceRefresh: z
-        .boolean()
-        .default(false)
-        .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-}, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
-    const startTime = Date.now();
-    const prep = prepareClaudeRequest({
-        prompt,
-        model,
-        outputFormat,
-        allowedTools,
-        disallowedTools,
-        dangerouslySkipPermissions,
-        approvalStrategy,
-        approvalPolicy,
-        mcpServers,
-        strictMcpConfig,
-        correlationId,
-        optimizePrompt,
-        operation: "claude_request",
-    });
-    if (!("args" in prep))
-        return prep;
-    const { corrId, args } = prep;
-    let durationMs = 0;
-    let wasSuccessful = false;
-    safeFlightStart({
-        correlationId: corrId,
-        cli: "claude",
-        model: prep.resolvedModel || "default",
-        prompt,
-        sessionId,
-    });
-    logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prompt.length}, sessionId=${sessionId}`);
-    try {
-        // Session management
-        let effectiveSessionId = sessionId;
-        let useContinue = continueSession;
-        const activeSession = await sessionManager.getActiveSession("claude");
-        if (!createNewSession && !continueSession && !sessionId && activeSession) {
-            effectiveSessionId = activeSession.id;
-            useContinue = true;
-        }
-        if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
-            useContinue = true;
-        }
-        if (useContinue) {
-            args.push("--continue");
-        }
-        else if (effectiveSessionId) {
-            args.push("--session-id", effectiveSessionId);
-            await sessionManager.updateSessionUsage(effectiveSessionId);
-        }
-        // Idle timeout only for stream-json (text/json produce no output until done)
-        const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
-        const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh);
-        // Deferred — job still running, return async reference
-        if (isDeferredResponse(result)) {
-            return buildDeferredToolResponse(result, effectiveSessionId);
-        }
-        const { stdout, stderr, code } = result;
-        durationMs = Math.max(0, Date.now() - startTime);
-        if (code !== 0) {
-            logger.info(`[${corrId}] claude_request failed in ${durationMs}ms`);
-            safeFlightComplete(corrId, {
-                response: stderr || "",
-                durationMs,
-                retryCount: 0,
-                circuitBreakerState: "closed",
-                optimizationApplied: optimizePrompt || optimizeResponse,
-                exitCode: code,
-                errorMessage: stderr || `Exit code ${code}`,
-                status: "failed",
-            });
-            return createErrorResponse("claude", code, stderr, corrId);
+        const { stdout, stderr, code } = result;
+        durationMs = Math.max(0, Date.now() - startTime);
+        if (code !== 0) {
+            deps.logger.info(`[${corrId}] mistral_request failed in ${durationMs}ms`);
+            safeFlightComplete(corrId, {
+                response: stderr || "",
+                durationMs,
+                retryCount: 0,
+                circuitBreakerState: "closed",
+                optimizationApplied: false,
+                exitCode: code,
+                errorMessage: stderr || `Exit code ${code}`,
+                status: "failed",
+            }, runtime);
+            return createErrorResponse("mistral", code, stderr, corrId);
         }
         wasSuccessful = true;
-        // If we used a session ID and it's not tracked yet, create a session record
-        if (effectiveSessionId) {
-            const existingSession = await sessionManager.getSession(effectiveSessionId);
-            if (!existingSession) {
-                await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
+        let effectiveSessionId = sessionResult.effectiveSessionId;
+        if (sessionResult.userProvidedSession && effectiveSessionId) {
+            const existing = await deps.sessionManager.getSession(effectiveSessionId);
+            if (!existing) {
+                try {
+                    await deps.sessionManager.createSession("mistral", "Mistral Session", effectiveSessionId);
+                }
+                catch {
+                    const rechecked = await deps.sessionManager.getSession(effectiveSessionId);
+                    if (!rechecked)
+                        throw new Error(`Failed to create or find session ${effectiveSessionId}`);
+                }
             }
+            await deps.sessionManager.updateSessionUsage(effectiveSessionId);
         }
-        logger.info(`[${corrId}] claude_request completed successfully in ${durationMs}ms`);
-        // Parse stream-json NDJSON output to extract result text
-        if (outputFormat === "stream-json") {
-            const parsed = parseStreamJson(stdout);
-            if (parsed.costUsd !== null) {
-                logger.debug(`[${corrId}] stream-json cost=$${parsed.costUsd}, model=${parsed.model}, turns=${parsed.numTurns}`);
-            }
-            safeFlightComplete(corrId, {
-                response: parsed.text,
-                inputTokens: parsed.usage?.inputTokens,
-                outputTokens: parsed.usage?.outputTokens,
-                durationMs,
-                retryCount: 0,
-                circuitBreakerState: "closed",
-                costUsd: parsed.costUsd ?? undefined,
-                optimizationApplied: optimizePrompt || optimizeResponse,
-                exitCode: 0,
-                status: "completed",
-            });
-            return buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
+        else if (!params.createNewSession && !effectiveSessionId) {
+            const newSession = await deps.sessionManager.createSession("mistral", "Mistral Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
+            effectiveSessionId = newSession.id;
         }
+        deps.logger.info(`[${corrId}] mistral_request completed successfully in ${durationMs}ms`);
+        const response = buildCliResponse("mistral", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession, params.outputFormat);
         safeFlightComplete(corrId, {
             response: stdout,
             durationMs,
             retryCount: 0,
             circuitBreakerState: "closed",
-            optimizationApplied: optimizePrompt || optimizeResponse,
+            approvalDecision: prep.approvalDecision?.status,
+            optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
             exitCode: 0,
             status: "completed",
-        });
-        return buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
+        }, runtime);
+        return response;
     }
     catch (error) {
         const elapsedMs = Math.max(0, Date.now() - startTime);
-        logger.info(`[${corrId}] claude_request threw exception after ${elapsedMs}ms`);
+        deps.logger.info(`[${corrId}] mistral_request threw exception after ${elapsedMs}ms`);
         safeFlightComplete(corrId, {
             response: "",
             durationMs: elapsedMs,
             retryCount: 0,
             circuitBreakerState: "closed",
-            optimizationApplied: optimizePrompt || optimizeResponse,
+            optimizationApplied: false,
             exitCode: 1,
             errorMessage: error.message,
             status: "failed",
-        });
-        return createErrorResponse("claude", 1, "", corrId, error);
+        }, runtime);
+        return createErrorResponse("mistral", 1, "", corrId, error);
     }
     finally {
         const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
-        performanceMetrics.recordRequest("claude", finalizedDurationMs, wasSuccessful);
+        runtime.performanceMetrics.recordRequest("mistral", finalizedDurationMs, wasSuccessful);
     }
-});
-//──────────────────────────────────────────────────────────────────────────────
-// Codex Tool
-//──────────────────────────────────────────────────────────────────────────────
-server.tool("codex_request", {
-    prompt: z
-        .string()
-        .min(1, "Prompt cannot be empty")
-        .max(100000, "Prompt too long (max 100k chars)")
-        .describe("Prompt text for Codex"),
-    model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
-    fullAuto: z.boolean().default(false).describe("Full-auto mode (sandboxed execution)"),
-    dangerouslyBypassApprovalsAndSandbox: z
-        .boolean()
-        .default(false)
-        .describe("Run Codex without approvals/sandbox"),
-    approvalStrategy: z
-        .enum(["legacy", "mcp_managed"])
-        .default("legacy")
-        .describe("Approval strategy"),
-    approvalPolicy: z
-        .enum(["strict", "balanced", "permissive"])
-        .optional()
-        .describe("Approval policy override"),
-    mcpServers: z
-        .array(MCP_SERVER_ENUM)
-        .default(["sqry"])
-        .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
-    sessionId: z
-        .string()
-        .optional()
-        .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
-    resumeLatest: z
-        .boolean()
-        .default(false)
-        .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
-    createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
-    correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
-    optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
-    optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
-    idleTimeoutMs: z
-        .number()
-        .int()
-        .min(30_000)
-        .max(3_600_000)
-        .optional()
-        .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
-    forceRefresh: z
-        .boolean()
-        .default(false)
-        .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-}, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
-    const startTime = Date.now();
-    const prep = prepareCodexRequest({
-        prompt,
-        model,
-        fullAuto,
-        dangerouslyBypassApprovalsAndSandbox,
-        approvalStrategy,
-        approvalPolicy,
-        mcpServers,
-        sessionId,
-        resumeLatest,
-        createNewSession,
-        correlationId,
-        optimizePrompt,
-        operation: "codex_request",
-    });
+}
+export async function handleMistralRequestAsync(deps, params) {
+    const runtime = resolveHandlerRuntime(deps);
+    const prep = prepareMistralRequest({
+        prompt: params.prompt,
+        model: params.model,
+        outputFormat: params.outputFormat,
+        permissionMode: params.permissionMode,
+        effort: params.effort,
+        reasoningEffort: params.reasoningEffort,
+        allowedTools: params.allowedTools,
+        disallowedTools: params.disallowedTools,
+        approvalStrategy: params.approvalStrategy,
+        approvalPolicy: params.approvalPolicy,
+        mcpServers: params.mcpServers,
+        correlationId: params.correlationId,
+        optimizePrompt: params.optimizePrompt,
+        operation: "mistral_request_async",
+    }, runtime);
     if (!("args" in prep))
         return prep;
-    const { corrId, args } = prep;
-    let durationMs = 0;
-    let wasSuccessful = false;
-    safeFlightStart({
-        correlationId: corrId,
-        cli: "codex",
-        model: prep.resolvedModel || "default",
-        prompt,
-        sessionId,
-    });
-    logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prompt.length}`);
+    const { corrId, args, requestedMcpServers, approvalDecision, mistralEnv } = prep;
     try {
-        const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), undefined, forceRefresh);
-        // Deferred — job still running, return async reference
-        if (isDeferredResponse(result)) {
-            return buildDeferredToolResponse(result, sessionId);
-        }
-        const { stdout, stderr, code } = result;
-        durationMs = Math.max(0, Date.now() - startTime);
-        if (code !== 0) {
-            logger.info(`[${corrId}] codex_request failed in ${durationMs}ms`);
-            safeFlightComplete(corrId, {
-                response: stderr || "",
-                durationMs,
-                retryCount: 0,
-                circuitBreakerState: "closed",
-                optimizationApplied: optimizePrompt || optimizeResponse,
-                exitCode: code,
-                errorMessage: stderr || `Exit code ${code}`,
-                status: "failed",
-            });
-            return createErrorResponse("codex", code, stderr, corrId);
-        }
-        wasSuccessful = true;
-        // Track session usage
-        let effectiveSessionId = sessionId;
-        if (!createNewSession && !sessionId) {
-            const activeSession = await sessionManager.getActiveSession("codex");
-            if (activeSession) {
-                effectiveSessionId = activeSession.id;
-            }
-            else {
-                const newSession = await sessionManager.createSession("codex", "Codex Session");
-                effectiveSessionId = newSession.id;
+        const sessionResult = resolveMistralSessionArgs({
+            sessionId: params.sessionId,
+            resumeLatest: params.resumeLatest,
+            createNewSession: params.createNewSession,
+        });
+        args.push(...sessionResult.resumeArgs);
+        let effectiveSessionId = sessionResult.effectiveSessionId;
+        if (sessionResult.userProvidedSession && effectiveSessionId) {
+            const existing = await deps.sessionManager.getSession(effectiveSessionId);
+            if (!existing) {
+                try {
+                    await deps.sessionManager.createSession("mistral", "Mistral Session", effectiveSessionId);
+                }
+                catch {
+                    const rechecked = await deps.sessionManager.getSession(effectiveSessionId);
+                    if (!rechecked)
+                        throw new Error(`Failed to create or find session ${effectiveSessionId}`);
+                }
             }
+            await deps.sessionManager.updateSessionUsage(effectiveSessionId);
         }
-        else if (sessionId) {
-            await sessionManager.updateSessionUsage(sessionId);
-        }
-        else if (createNewSession) {
-            const newSession = await sessionManager.createSession("codex", "Codex Session");
+        else if (!params.createNewSession && !effectiveSessionId) {
+            const newSession = await deps.sessionManager.createSession("mistral", "Mistral Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
             effectiveSessionId = newSession.id;
         }
-        logger.info(`[${corrId}] codex_request completed successfully in ${durationMs}ms`);
-        safeFlightComplete(corrId, {
-            response: stdout,
-            durationMs,
-            retryCount: 0,
-            circuitBreakerState: "closed",
-            optimizationApplied: optimizePrompt || optimizeResponse,
-            exitCode: 0,
-            status: "completed",
-        });
-        return buildCliResponse("codex", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs);
+        assertUpstreamCliArgs("mistral", args);
+        assertUpstreamCliEnv("mistral", mistralEnv);
+        const job = deps.asyncJobManager.startJob("mistral", args, corrId, undefined, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, mistralEnv);
+        deps.logger.info(`[${corrId}] mistral_request_async started job ${job.id}`);
+        const asyncResponse = {
+            success: true,
+            job,
+            sessionId: effectiveSessionId || null,
+            resumable: sessionResult.userProvidedSession,
+            approval: approvalDecision,
+            mcpServers: { requested: requestedMcpServers },
+        };
+        if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
+            asyncResponse.reviewIntegrity = prep.reviewIntegrity;
+        }
+        return {
+            content: [
+                {
+                    type: "text",
+                    text: JSON.stringify(asyncResponse, null, 2),
+                },
+            ],
+        };
     }
     catch (error) {
-        const elapsedMs = Math.max(0, Date.now() - startTime);
-        logger.info(`[${corrId}] codex_request threw exception after ${elapsedMs}ms`);
-        safeFlightComplete(corrId, {
-            response: "",
-            durationMs: elapsedMs,
-            retryCount: 0,
-            circuitBreakerState: "closed",
-            optimizationApplied: optimizePrompt || optimizeResponse,
-            exitCode: 1,
-            errorMessage: error.message,
-            status: "failed",
-        });
-        return createErrorResponse("codex", 1, "", corrId, error);
+        return createErrorResponse("mistral_request_async", 1, "", corrId, error);
     }
-    finally {
-        const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
-        performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
-    }
-});
-//──────────────────────────────────────────────────────────────────────────────
-// Gemini Tool
-//──────────────────────────────────────────────────────────────────────────────
-server.tool("gemini_request", {
-    prompt: z
-        .string()
-        .min(1, "Prompt cannot be empty")
-        .max(100000, "Prompt too long (max 100k chars)")
-        .describe("Prompt text for Gemini"),
-    model: z
-        .string()
-        .optional()
-        .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
-    sessionId: z.string().optional().describe("Session ID or 'latest'"),
-    resumeLatest: z.boolean().default(false).describe("Resume latest session"),
-    createNewSession: z.boolean().default(false).describe("Force new session"),
-    approvalMode: z
-        .enum(["default", "auto_edit", "yolo"])
-        .optional()
-        .describe("Approval: default|auto_edit|yolo"),
-    approvalStrategy: z
-        .enum(["legacy", "mcp_managed"])
-        .default("legacy")
-        .describe("Approval strategy"),
-    approvalPolicy: z
-        .enum(["strict", "balanced", "permissive"])
-        .optional()
-        .describe("Approval policy override"),
-    mcpServers: z
-        .array(MCP_SERVER_ENUM)
-        .default(["sqry"])
-        .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
-    allowedTools: z
-        .array(z.string())
-        .optional()
-        .describe("Allowed tools (['Write','Edit','Bash'])"),
-    includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
-    correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
-    optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
-    optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
-    idleTimeoutMs: z
-        .number()
-        .int()
-        .min(30_000)
-        .max(3_600_000)
-        .optional()
-        .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
-    forceRefresh: z
-        .boolean()
-        .default(false)
-        .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-}, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
-    return handleGeminiRequest({ sessionManager, logger }, {
-        prompt,
-        model,
-        sessionId,
-        resumeLatest,
-        createNewSession,
-        approvalMode,
-        approvalStrategy,
-        approvalPolicy,
-        mcpServers,
-        allowedTools,
-        includeDirs,
-        correlationId,
-        optimizePrompt,
-        optimizeResponse,
-        idleTimeoutMs,
-        forceRefresh,
-    });
-});
-//──────────────────────────────────────────────────────────────────────────────
-// Grok Tool
-//──────────────────────────────────────────────────────────────────────────────
-server.tool("grok_request", {
-    prompt: z
-        .string()
-        .min(1, "Prompt cannot be empty")
-        .max(100000, "Prompt too long (max 100k chars)")
-        .describe("Prompt text for Grok"),
-    model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
-    outputFormat: z
-        .enum(["plain", "json", "streaming-json"])
-        .optional()
-        .describe("Output format (plain|json|streaming-json). Grok default is plain."),
-    sessionId: z.string().optional().describe("Session ID (user-provided CLI handle for --resume)"),
-    resumeLatest: z
-        .boolean()
-        .default(false)
-        .describe("Resume most recent Grok session in cwd (--continue)"),
-    createNewSession: z.boolean().default(false).describe("Force new session"),
-    alwaysApprove: z
-        .boolean()
-        .default(false)
-        .describe("Auto-approve all tool executions (--always-approve)"),
-    permissionMode: z
-        .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
-        .optional()
-        .describe("Grok permission mode"),
-    effort: z
-        .enum(["low", "medium", "high", "xhigh", "max"])
-        .optional()
-        .describe("Grok effort level"),
-    reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
-    approvalStrategy: z
-        .enum(["legacy", "mcp_managed"])
-        .default("legacy")
-        .describe("Approval strategy"),
-    approvalPolicy: z
-        .enum(["strict", "balanced", "permissive"])
-        .optional()
-        .describe("Approval policy override"),
-    mcpServers: z
-        .array(MCP_SERVER_ENUM)
-        .default(["sqry"])
-        .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
-    allowedTools: z
-        .array(z.string())
-        .optional()
-        .describe("Allowed built-in tools (passed as --tools comma list)"),
-    disallowedTools: z
-        .array(z.string())
-        .optional()
-        .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
-    correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
-    optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
-    optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
-    idleTimeoutMs: z
-        .number()
-        .int()
-        .min(30_000)
-        .max(3_600_000)
-        .optional()
-        .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
-    forceRefresh: z
-        .boolean()
-        .default(false)
-        .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-}, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
-    return handleGrokRequest({ sessionManager, logger }, {
-        prompt,
-        model,
-        outputFormat,
-        sessionId,
-        resumeLatest,
-        createNewSession,
-        alwaysApprove,
-        permissionMode,
-        effort,
-        reasoningEffort,
-        approvalStrategy,
-        approvalPolicy,
-        mcpServers,
-        allowedTools,
-        disallowedTools,
-        correlationId,
-        optimizePrompt,
-        optimizeResponse,
-        idleTimeoutMs,
-        forceRefresh,
-    });
-});
-//──────────────────────────────────────────────────────────────────────────────
-// Async Long-Running Job Tools (No Time-Bound LLM Execution)
-//──────────────────────────────────────────────────────────────────────────────
-server.tool("claude_request_async", {
-    prompt: z
-        .string()
-        .min(1, "Prompt cannot be empty")
-        .max(100000, "Prompt too long (max 100k chars)")
-        .describe("Prompt text for Claude"),
-    model: z
-        .string()
-        .optional()
-        .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
-    outputFormat: z
-        .enum(["text", "json", "stream-json"])
-        .default("text")
-        .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
-    sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
-    continueSession: z.boolean().default(false).describe("Continue active session"),
-    createNewSession: z.boolean().default(false).describe("Force new session"),
-    allowedTools: z
-        .array(z.string())
-        .optional()
-        .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
-    disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
-    dangerouslySkipPermissions: z
-        .boolean()
-        .default(false)
-        .describe("Bypass permissions (sandbox only)"),
-    approvalStrategy: z
-        .enum(["legacy", "mcp_managed"])
-        .default("legacy")
-        .describe("Approval strategy"),
-    approvalPolicy: z
-        .enum(["strict", "balanced", "permissive"])
-        .optional()
-        .describe("Approval policy override"),
-    mcpServers: z
-        .array(MCP_SERVER_ENUM)
-        .default(["sqry"])
-        .describe("MCP servers exposed to Claude"),
-    strictMcpConfig: z
-        .boolean()
-        .default(false)
-        .describe("Restrict Claude to provided MCP config only"),
-    correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
-    optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
-    idleTimeoutMs: z
-        .number()
-        .int()
-        .min(30_000)
-        .max(3_600_000)
-        .optional()
-        .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
-    forceRefresh: z
-        .boolean()
-        .default(false)
-        .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-}, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
-    const prep = prepareClaudeRequest({
-        prompt,
-        model,
-        outputFormat,
-        allowedTools,
-        disallowedTools,
-        dangerouslySkipPermissions,
-        approvalStrategy,
-        approvalPolicy,
-        mcpServers,
-        strictMcpConfig,
-        correlationId,
-        optimizePrompt,
-        operation: "claude_request_async",
-    });
+}
+export async function handleCodexRequestAsync(deps, params) {
+    const runtime = resolveHandlerRuntime(deps);
+    const prep = prepareCodexRequest({
+        prompt: params.prompt,
+        model: params.model,
+        fullAuto: params.fullAuto,
+        sandboxMode: params.sandboxMode,
+        askForApproval: params.askForApproval,
+        useLegacyFullAutoFlag: params.useLegacyFullAutoFlag,
+        dangerouslyBypassApprovalsAndSandbox: params.dangerouslyBypassApprovalsAndSandbox,
+        approvalStrategy: params.approvalStrategy,
+        approvalPolicy: params.approvalPolicy,
+        mcpServers: params.mcpServers,
+        sessionId: params.sessionId,
+        resumeLatest: params.resumeLatest,
+        createNewSession: params.createNewSession,
+        correlationId: params.correlationId,
+        optimizePrompt: params.optimizePrompt,
+        operation: "codex_request_async",
+        outputFormat: params.outputFormat,
+        outputSchema: params.outputSchema,
+        search: params.search,
+        profile: params.profile,
+        configOverrides: params.configOverrides,
+        ephemeral: params.ephemeral,
+        images: params.images,
+        ignoreUserConfig: params.ignoreUserConfig,
+        ignoreRules: params.ignoreRules,
+    }, runtime);
     if (!("args" in prep))
         return prep;
-    const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
+    const { corrId, args, requestedMcpServers, approvalDecision } = prep;
+    // U26 fix: outputSchema temp-file ownership. The cleanup callable lives in
+    // exactly one place at a time: this scope until startJob succeeds, then
+    // AsyncJobManager (via onComplete → persistComplete → fireOnComplete) once
+    // the job is registered. Any code path that fails to hand it off MUST run
+    // it locally.
+    const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
+    let prepCleanupOwnedHere = prepCleanup !== undefined;
+    const runPrepCleanupLocally = () => {
+        if (!prepCleanupOwnedHere || !prepCleanup)
+            return;
+        prepCleanupOwnedHere = false;
+        try {
+            prepCleanup();
+        }
+        catch (err) {
+            deps.logger.error(`[${corrId}] codex_request_async outputSchema cleanup threw`, err);
+        }
+    };
     try {
-        // Session management (before job start for async)
-        let effectiveSessionId = sessionId;
-        let useContinue = continueSession;
-        const activeSession = await sessionManager.getActiveSession("claude");
-        if (!createNewSession && !continueSession && !sessionId && activeSession) {
-            effectiveSessionId = activeSession.id;
-            useContinue = true;
+        // Pre-start session I/O (async handlers: prevent orphaned jobs)
+        let effectiveSessionId = params.sessionId;
+        if (!params.createNewSession && !params.sessionId) {
+            const activeSession = await deps.sessionManager.getActiveSession("codex");
+            if (activeSession) {
+                effectiveSessionId = activeSession.id;
+            }
+            else {
+                const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
+                effectiveSessionId = newSession.id;
+            }
         }
-        if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
-            useContinue = true;
+        else if (params.sessionId) {
+            await deps.sessionManager.updateSessionUsage(params.sessionId);
         }
-        if (useContinue) {
-            args.push("--continue");
+        else if (params.createNewSession) {
+            const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
+            effectiveSessionId = newSession.id;
         }
-        else if (effectiveSessionId) {
-            args.push("--session-id", effectiveSessionId);
-            await sessionManager.updateSessionUsage(effectiveSessionId);
+        // Start job only after all session I/O succeeds. If startJob throws before
+        // registering the record, ownership stays here and we run it in the catch.
+        assertUpstreamCliArgs("codex", args);
+        assertUpstreamCliEnv("codex", undefined);
+        let job;
+        try {
+            job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup);
+            // Handoff succeeded: AsyncJobManager will fire prepCleanup on terminal
+            // status. Release our local ownership claim so the catch path doesn't
+            // double-fire.
+            prepCleanupOwnedHere = false;
         }
-        if (effectiveSessionId) {
-            const existingSession = await sessionManager.getSession(effectiveSessionId);
-            if (!existingSession) {
-                await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
-            }
+        catch (startErr) {
+            // startJob never stored the record → manager won't call onComplete. We
+            // still own the cleanup; let the outer catch run it.
+            throw startErr;
         }
-        // Idle timeout only for stream-json (text/json produce no output until done)
-        const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
-        const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat, forceRefresh);
-        logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
+        deps.logger.info(`[${corrId}] codex_request_async started job ${job.id}`);
         const asyncResponse = {
             success: true,
             job,
-            sessionId: effectiveSessionId || activeSession?.id || null,
+            sessionId: effectiveSessionId || null,
             approval: approvalDecision,
-            mcpServers: {
-                requested: requestedMcpServers,
-                enabled: mcpConfig?.enabled,
-                missing: mcpConfig?.missing,
-            },
+            mcpServers: { requested: requestedMcpServers },
         };
         if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
             asyncResponse.reviewIntegrity = prep.reviewIntegrity;
@@ -1909,667 +1891,1928 @@ server.tool("claude_request_async", {
         };
     }
     catch (error) {
-        return createErrorResponse("claude_request_async", 1, "", corrId, error);
-    }
-});
-server.tool("codex_request_async", {
-    prompt: z
-        .string()
-        .min(1, "Prompt cannot be empty")
-        .max(100000, "Prompt too long (max 100k chars)")
-        .describe("Prompt text for Codex"),
-    model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
-    fullAuto: z.boolean().default(false).describe("Full-auto mode (sandboxed execution)"),
-    dangerouslyBypassApprovalsAndSandbox: z
-        .boolean()
-        .default(false)
-        .describe("Run Codex without approvals/sandbox"),
-    approvalStrategy: z
-        .enum(["legacy", "mcp_managed"])
-        .default("legacy")
-        .describe("Approval strategy"),
-    approvalPolicy: z
-        .enum(["strict", "balanced", "permissive"])
-        .optional()
-        .describe("Approval policy override"),
-    mcpServers: z
-        .array(MCP_SERVER_ENUM)
-        .default(["sqry"])
-        .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
-    sessionId: z
-        .string()
-        .optional()
-        .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
-    resumeLatest: z
-        .boolean()
-        .default(false)
-        .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
-    createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
-    correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
-    optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
-    idleTimeoutMs: z
-        .number()
-        .int()
-        .min(30_000)
-        .max(3_600_000)
-        .optional()
-        .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
-    forceRefresh: z
-        .boolean()
-        .default(false)
-        .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-}, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
-    return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger }, {
-        prompt,
-        model,
-        fullAuto,
-        dangerouslyBypassApprovalsAndSandbox,
-        approvalStrategy,
-        approvalPolicy,
-        mcpServers,
-        sessionId,
-        resumeLatest,
-        createNewSession,
-        correlationId,
-        optimizePrompt,
-        idleTimeoutMs,
-        forceRefresh,
-    });
-});
-server.tool("gemini_request_async", {
-    prompt: z
-        .string()
-        .min(1, "Prompt cannot be empty")
-        .max(100000, "Prompt too long (max 100k chars)")
-        .describe("Prompt text for Gemini"),
-    model: z
-        .string()
-        .optional()
-        .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
-    sessionId: z.string().optional().describe("Session ID (user-provided CLI handle for --resume)"),
-    resumeLatest: z.boolean().default(false).describe("Resume latest session"),
-    createNewSession: z.boolean().default(false).describe("Force new session"),
-    approvalMode: z
-        .enum(["default", "auto_edit", "yolo"])
-        .optional()
-        .describe("Approval: default|auto_edit|yolo"),
-    approvalStrategy: z
-        .enum(["legacy", "mcp_managed"])
-        .default("legacy")
-        .describe("Approval strategy"),
-    approvalPolicy: z
-        .enum(["strict", "balanced", "permissive"])
-        .optional()
-        .describe("Approval policy override"),
-    mcpServers: z
-        .array(MCP_SERVER_ENUM)
-        .default(["sqry"])
-        .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
-    allowedTools: z
-        .array(z.string())
-        .optional()
-        .describe("Allowed tools (['Write','Edit','Bash'])"),
-    includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
-    correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
-    optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
-    idleTimeoutMs: z
-        .number()
-        .int()
-        .min(30_000)
-        .max(3_600_000)
-        .optional()
-        .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
-    forceRefresh: z
-        .boolean()
-        .default(false)
-        .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-}, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
-    return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger }, {
-        prompt,
-        model,
-        sessionId,
-        resumeLatest,
-        createNewSession,
-        approvalMode,
-        approvalStrategy,
-        approvalPolicy,
-        mcpServers,
-        allowedTools,
-        includeDirs,
-        correlationId,
-        optimizePrompt,
-        idleTimeoutMs,
-        forceRefresh,
-    });
-});
-server.tool("grok_request_async", {
-    prompt: z
-        .string()
-        .min(1, "Prompt cannot be empty")
-        .max(100000, "Prompt too long (max 100k chars)")
-        .describe("Prompt text for Grok"),
-    model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
-    outputFormat: z
-        .enum(["plain", "json", "streaming-json"])
-        .optional()
-        .describe("Output format (plain|json|streaming-json). Grok default is plain."),
-    sessionId: z.string().optional().describe("Session ID (user-provided CLI handle for --resume)"),
-    resumeLatest: z
-        .boolean()
-        .default(false)
-        .describe("Resume most recent Grok session in cwd (--continue)"),
-    createNewSession: z.boolean().default(false).describe("Force new session"),
-    alwaysApprove: z
-        .boolean()
-        .default(false)
-        .describe("Auto-approve all tool executions (--always-approve)"),
-    permissionMode: z
-        .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
-        .optional()
-        .describe("Grok permission mode"),
-    effort: z
-        .enum(["low", "medium", "high", "xhigh", "max"])
-        .optional()
-        .describe("Grok effort level"),
-    reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
-    approvalStrategy: z
-        .enum(["legacy", "mcp_managed"])
-        .default("legacy")
-        .describe("Approval strategy"),
-    approvalPolicy: z
-        .enum(["strict", "balanced", "permissive"])
-        .optional()
-        .describe("Approval policy override"),
-    mcpServers: z
-        .array(MCP_SERVER_ENUM)
-        .default(["sqry"])
-        .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
-    allowedTools: z
-        .array(z.string())
-        .optional()
-        .describe("Allowed built-in tools (passed as --tools comma list)"),
-    disallowedTools: z
-        .array(z.string())
-        .optional()
-        .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
-    correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
-    optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
-    idleTimeoutMs: z
-        .number()
-        .int()
-        .min(30_000)
-        .max(3_600_000)
-        .optional()
-        .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
-    forceRefresh: z
-        .boolean()
-        .default(false)
-        .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-}, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
-    return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger }, {
-        prompt,
-        model,
-        outputFormat,
-        sessionId,
-        resumeLatest,
-        createNewSession,
-        alwaysApprove,
-        permissionMode,
-        effort,
-        reasoningEffort,
-        approvalStrategy,
-        approvalPolicy,
-        mcpServers,
-        allowedTools,
-        disallowedTools,
-        correlationId,
-        optimizePrompt,
-        idleTimeoutMs,
-        forceRefresh,
-    });
-});
-server.tool("llm_job_status", {
-    jobId: z.string().describe("Async job ID from *_request_async"),
-}, async ({ jobId }) => {
-    const job = asyncJobManager.getJobSnapshot(jobId);
-    if (!job) {
-        return {
-            content: [
-                {
-                    type: "text",
-                    text: JSON.stringify({
-                        success: false,
-                        error: "Job not found",
-                        jobId,
-                    }, null, 2),
-                },
-            ],
-            isError: true,
-        };
-    }
-    return {
-        content: [
-            {
-                type: "text",
-                text: JSON.stringify({
-                    success: true,
-                    job,
-                }, null, 2),
-            },
-        ],
-    };
-});
-server.tool("llm_job_result", {
-    jobId: z.string().describe("Async job ID from *_request_async"),
-    maxChars: z
-        .number()
-        .int()
-        .min(1000)
-        .max(2000000)
-        .default(200000)
-        .describe("Max chars returned per stream"),
-}, async ({ jobId, maxChars }) => {
-    const result = asyncJobManager.getJobResult(jobId, maxChars);
-    if (!result) {
-        return {
-            content: [
-                {
-                    type: "text",
-                    text: JSON.stringify({
-                        success: false,
-                        error: "Job not found",
-                        jobId,
-                    }, null, 2),
-                },
-            ],
-            isError: true,
-        };
-    }
-    // Parse stream-json output for Claude async jobs
-    const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
-    let parsed;
-    if (outputFormat === "stream-json" && result.stdout) {
-        parsed = parseStreamJson(result.stdout);
-    }
-    return {
-        content: [
-            {
-                type: "text",
-                text: JSON.stringify({
-                    success: true,
-                    result,
-                    ...(parsed
-                        ? {
-                            parsed: {
-                                text: parsed.text,
-                                costUsd: parsed.costUsd,
-                                usage: parsed.usage,
-                                model: parsed.model,
-                                numTurns: parsed.numTurns,
-                            },
-                        }
-                        : {}),
-                }, null, 2),
-            },
-        ],
-    };
-});
-server.tool("llm_job_cancel", {
-    jobId: z.string().describe("Async job ID from *_request_async"),
-}, async ({ jobId }) => {
-    const cancel = asyncJobManager.cancelJob(jobId);
-    if (!cancel.canceled) {
-        return {
-            content: [
-                {
-                    type: "text",
-                    text: JSON.stringify({
-                        success: false,
-                        jobId,
-                        reason: cancel.reason || "Unable to cancel",
-                    }, null, 2),
-                },
-            ],
-            isError: true,
-        };
+        // Pre-start failure: either session I/O threw, or startJob threw before
+        // registering the record. In either case the manager will NOT fire
+        // prepCleanup, so we must run it here.
+        runPrepCleanupLocally();
+        return createErrorResponse("codex_request_async", 1, "", corrId, error);
     }
-    return {
-        content: [
-            {
-                type: "text",
-                text: JSON.stringify({
-                    success: true,
-                    jobId,
-                }, null, 2),
-            },
-        ],
-    };
-});
-server.tool("llm_process_health", {}, async () => {
-    const health = asyncJobManager.getJobHealth();
-    return {
-        content: [
-            {
-                type: "text",
-                text: JSON.stringify({ success: true, ...health }, null, 2),
-            },
-        ],
-    };
-});
+}
 //──────────────────────────────────────────────────────────────────────────────
-// Approval Audit Tools
+// Claude Code Tool
 //──────────────────────────────────────────────────────────────────────────────
-server.tool("approval_list", {
-    limit: z.number().int().min(1).max(500).default(50).describe("Max number of approval records"),
-    cli: z.enum(["claude", "codex", "gemini"]).optional().describe("Optional CLI filter"),
-}, async ({ limit, cli }) => {
-    const approvals = approvalManager.list(limit, cli);
-    return {
-        content: [
-            {
-                type: "text",
-                text: JSON.stringify({
+export function createGatewayServer(deps = {}) {
+    const runtime = resolveGatewayServerRuntime(deps, { isolateState: true });
+    const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger, persistence, } = runtime;
+    // Structural invariant: tools register iff ALL THREE conditions hold:
+    //   (1) persistence.backend !== "none"  — the operator/config has not
+    //       explicitly disabled durable persistence;
+    //   (2) persistence.asyncJobsEnabled === true — the derived opt-in flag
+    //       agrees (loadPersistenceConfig sets this iff backend is one of
+    //       sqlite/postgres/memory);
+    //   (3) asyncJobManager.hasStore() === true — the runtime manager
+    //       actually has a store attached (isolate-mode runtimes use null).
+    //
+    // Each guard closes a distinct re-entry path for the silent-loss footgun:
+    //   - Without (1), a caller can inject {backend:'none', asyncJobsEnabled:true}
+    //     and re-advertise the async tools while reporting backend='none' in
+    //     llm_process_health — exactly contradicting SPEC CLAIM 4f.
+    //   - Without (2), config that opts out is ignored.
+    //   - Without (3), a null-store manager (isolate-mode / HTTP per-session)
+    //     accepts registrations that have nowhere to persist results.
+    const asyncJobsEnabled = persistence.backend !== "none" && persistence.asyncJobsEnabled && asyncJobManager.hasStore();
+    const server = newGatewayMcpServer();
+    registerBaseResources(server, runtime);
+    registerValidationTools(server, { asyncJobManager });
+    server.tool("claude_request", {
+        prompt: z
+            .string()
+            .min(1, "Prompt cannot be empty")
+            .max(100000, "Prompt too long (max 100k chars)")
+            .describe("Prompt text for Claude"),
+        model: z
+            .string()
+            .optional()
+            .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
+        outputFormat: z
+            .enum(["text", "json", "stream-json"])
+            .default("text")
+            .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
+        sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
+        continueSession: z.boolean().default(false).describe("Continue active session"),
+        createNewSession: z.boolean().default(false).describe("Force new session"),
+        allowedTools: z
+            .array(z.string())
+            .optional()
+            .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
+        disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
+        dangerouslySkipPermissions: z
+            .boolean()
+            .default(false)
+            .describe('DEPRECATED: prefer `permissionMode: "bypassPermissions"`. Maps to it when `permissionMode` is unset.'),
+        permissionMode: z
+            .enum(CLAUDE_PERMISSION_MODES)
+            .optional()
+            .describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op (no flag emitted)."),
+        // U25 — Claude high-impact features
+        agent: z
+            .string()
+            .optional()
+            .describe("Claude --agent: dispatch to a named single sub-agent."),
+        agents: z
+            .record(z.record(z.unknown()))
+            .optional()
+            .describe("Claude --agents: inline JSON map of agent name → { description, prompt, tools?, model? }."),
+        forkSession: z
+            .boolean()
+            .optional()
+            .describe("Claude --fork-session: branch from an existing session into a fresh fork."),
+        systemPrompt: z
+            .string()
+            .optional()
+            .describe("Claude --system-prompt: replace the system prompt entirely."),
+        appendSystemPrompt: z
+            .string()
+            .optional()
+            .describe("Claude --append-system-prompt: append to the existing system prompt."),
+        maxBudgetUsd: z
+            .number()
+            .positive()
+            .optional()
+            .describe("Claude --max-budget-usd: spend cap for this request in USD."),
+        maxTurns: z
+            .number()
+            .int()
+            .positive()
+            .optional()
+            .describe("Claude --max-turns: cap on agent loop iterations."),
+        effort: z
+            .enum(CLAUDE_EFFORT_LEVELS)
+            .optional()
+            .describe("Claude --effort: low|medium|high|xhigh|max."),
+        excludeDynamicSystemPromptSections: z
+            .boolean()
+            .optional()
+            .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
+        approvalStrategy: z
+            .enum(["legacy", "mcp_managed"])
+            .default("legacy")
+            .describe("Approval strategy"),
+        approvalPolicy: z
+            .enum(["strict", "balanced", "permissive"])
+            .optional()
+            .describe("Approval policy override"),
+        mcpServers: z
+            .array(MCP_SERVER_ENUM)
+            .default(["sqry"])
+            .describe("MCP servers exposed to Claude"),
+        strictMcpConfig: z
+            .boolean()
+            .default(false)
+            .describe("Restrict Claude to provided MCP config only"),
+        correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
+        optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
+        optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
+        idleTimeoutMs: z
+            .number()
+            .int()
+            .min(30_000)
+            .max(3_600_000)
+            .optional()
+            .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
+        forceRefresh: z
+            .boolean()
+            .default(false)
+            .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
+    }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
+        const startTime = Date.now();
+        if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
+            return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
+        }
+        const prep = prepareClaudeRequest({
+            prompt,
+            model,
+            outputFormat,
+            allowedTools,
+            disallowedTools,
+            dangerouslySkipPermissions,
+            permissionMode,
+            approvalStrategy,
+            approvalPolicy,
+            mcpServers,
+            strictMcpConfig,
+            correlationId,
+            optimizePrompt,
+            operation: "claude_request",
+            agent,
+            agents,
+            forkSession,
+            systemPrompt,
+            appendSystemPrompt,
+            maxBudgetUsd,
+            maxTurns,
+            effort,
+            excludeDynamicSystemPromptSections,
+        }, runtime);
+        if (!("args" in prep))
+            return prep;
+        const { corrId, args } = prep;
+        let durationMs = 0;
+        let wasSuccessful = false;
+        safeFlightStart({
+            correlationId: corrId,
+            cli: "claude",
+            model: prep.resolvedModel || "default",
+            prompt,
+            sessionId,
+        }, runtime);
+        logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prompt.length}, sessionId=${sessionId}`);
+        try {
+            // Session management
+            let effectiveSessionId = sessionId;
+            let useContinue = continueSession;
+            const activeSession = await sessionManager.getActiveSession("claude");
+            if (!createNewSession && !continueSession && !sessionId && activeSession) {
+                effectiveSessionId = activeSession.id;
+                useContinue = true;
+            }
+            if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
+                useContinue = true;
+            }
+            if (useContinue) {
+                args.push("--continue");
+            }
+            else if (effectiveSessionId) {
+                args.push("--session-id", effectiveSessionId);
+                await sessionManager.updateSessionUsage(effectiveSessionId);
+            }
+            // Idle timeout only for stream-json (text/json produce no output until done)
+            const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
+            const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh, runtime);
+            // Deferred — job still running, return async reference
+            if (isDeferredResponse(result)) {
+                return buildDeferredToolResponse(result, effectiveSessionId);
+            }
+            const { stdout, stderr, code } = result;
+            durationMs = Math.max(0, Date.now() - startTime);
+            if (code !== 0) {
+                logger.info(`[${corrId}] claude_request failed in ${durationMs}ms`);
+                safeFlightComplete(corrId, {
+                    response: stderr || "",
+                    durationMs,
+                    retryCount: 0,
+                    circuitBreakerState: "closed",
+                    optimizationApplied: optimizePrompt || optimizeResponse,
+                    exitCode: code,
+                    errorMessage: stderr || `Exit code ${code}`,
+                    status: "failed",
+                }, runtime);
+                return createErrorResponse("claude", code, stderr, corrId);
+            }
+            wasSuccessful = true;
+            // If we used a session ID and it's not tracked yet, create a session record
+            if (effectiveSessionId) {
+                const existingSession = await sessionManager.getSession(effectiveSessionId);
+                if (!existingSession) {
+                    await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
+                }
+            }
+            logger.info(`[${corrId}] claude_request completed successfully in ${durationMs}ms`);
+            // Parse stream-json NDJSON output to extract result text
+            if (outputFormat === "stream-json") {
+                const parsed = parseStreamJson(stdout);
+                if (parsed.costUsd !== null) {
+                    logger.debug(`[${corrId}] stream-json cost=$${parsed.costUsd}, model=${parsed.model}, turns=${parsed.numTurns}`);
+                }
+                safeFlightComplete(corrId, {
+                    response: parsed.text,
+                    inputTokens: parsed.usage?.inputTokens,
+                    outputTokens: parsed.usage?.outputTokens,
+                    cacheReadTokens: parsed.usage?.cacheReadInputTokens || undefined,
+                    cacheCreationTokens: parsed.usage?.cacheCreationInputTokens || undefined,
+                    durationMs,
+                    retryCount: 0,
+                    circuitBreakerState: "closed",
+                    costUsd: parsed.costUsd ?? undefined,
+                    optimizationApplied: optimizePrompt || optimizeResponse,
+                    exitCode: 0,
+                    status: "completed",
+                }, runtime);
+                return buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
+            }
+            safeFlightComplete(corrId, {
+                response: stdout,
+                durationMs,
+                retryCount: 0,
+                circuitBreakerState: "closed",
+                optimizationApplied: optimizePrompt || optimizeResponse,
+                exitCode: 0,
+                status: "completed",
+            }, runtime);
+            return buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
+        }
+        catch (error) {
+            const elapsedMs = Math.max(0, Date.now() - startTime);
+            logger.info(`[${corrId}] claude_request threw exception after ${elapsedMs}ms`);
+            safeFlightComplete(corrId, {
+                response: "",
+                durationMs: elapsedMs,
+                retryCount: 0,
+                circuitBreakerState: "closed",
+                optimizationApplied: optimizePrompt || optimizeResponse,
+                exitCode: 1,
+                errorMessage: error.message,
+                status: "failed",
+            }, runtime);
+            return createErrorResponse("claude", 1, "", corrId, error);
+        }
+        finally {
+            const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
+            performanceMetrics.recordRequest("claude", finalizedDurationMs, wasSuccessful);
+        }
+    });
+    //──────────────────────────────────────────────────────────────────────────────
+    // Codex Tool
+    //──────────────────────────────────────────────────────────────────────────────
+    server.tool("codex_request", {
+        prompt: z
+            .string()
+            .min(1, "Prompt cannot be empty")
+            .max(100000, "Prompt too long (max 100k chars)")
+            .describe("Prompt text for Codex"),
+        model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
+        fullAuto: z
+            .boolean()
+            .default(false)
+            .describe("DEPRECATED: prefer `sandboxMode` + `askForApproval`. Expands to `--sandbox workspace-write --ask-for-approval never`."),
+        sandboxMode: z
+            .enum(CODEX_SANDBOX_MODES)
+            .optional()
+            .describe("Codex --sandbox: read-only|workspace-write|danger-full-access."),
+        askForApproval: z
+            .enum(CODEX_ASK_FOR_APPROVAL_MODES)
+            .optional()
+            .describe("Codex --ask-for-approval: untrusted|on-request|never."),
+        useLegacyFullAutoFlag: z
+            .boolean()
+            .default(false)
+            .describe("Escape hatch: emit `--full-auto` directly instead of expanding (deprecated)."),
+        dangerouslyBypassApprovalsAndSandbox: z
+            .boolean()
+            .default(false)
+            .describe("Run Codex without approvals/sandbox"),
+        approvalStrategy: z
+            .enum(["legacy", "mcp_managed"])
+            .default("legacy")
+            .describe("Approval strategy"),
+        approvalPolicy: z
+            .enum(["strict", "balanced", "permissive"])
+            .optional()
+            .describe("Approval policy override"),
+        mcpServers: z
+            .array(MCP_SERVER_ENUM)
+            .default(["sqry"])
+            .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
+        sessionId: z
+            .string()
+            .optional()
+            .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
+        resumeLatest: z
+            .boolean()
+            .default(false)
+            .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
+        createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
+        correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
+        optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
+        optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
+        idleTimeoutMs: z
+            .number()
+            .int()
+            .min(30_000)
+            .max(3_600_000)
+            .optional()
+            .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
+        forceRefresh: z
+            .boolean()
+            .default(false)
+            .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
+        // U23: emit `--json` so the codex-json-parser surfaces input/output/cache
+        // tokens (and any cost) through extractUsageAndCost. Without "json", the
+        // parser is unreachable and Codex usage is never reported.
+        outputFormat: z
+            .enum(["text", "json"])
+            .default("text")
+            .describe("Codex output format. `json` emits --json (JSONL events) so token usage and cost are parsed and reported in the flight recorder. `text` is the default."),
+        // U26: high-impact feature flags. All optional.
+        outputSchema: z
+            .union([z.string(), z.record(z.unknown())])
+            .optional()
+            .describe("Codex --output-schema. Pass a path (string) or an inline JSON Schema object; object is materialised to a 0o600 temp file under os.tmpdir() and deleted after the run."),
+        search: z.boolean().optional().describe("Emit Codex --search to enable web search."),
+        profile: z
+            .string()
+            .optional()
+            .describe("Codex --profile <name>: select a profile from ~/.codex/config.toml."),
+        configOverrides: CODEX_CONFIG_OVERRIDES_SCHEMA.describe("Codex -c key=value overrides. Keys: /^[a-zA-Z0-9._]+$/. Values: no CR/LF."),
+        ephemeral: z
+            .boolean()
+            .optional()
+            .describe("Codex --ephemeral: do not persist the session to disk."),
+        images: z
+            .array(z.string())
+            .optional()
+            .describe("Codex -i <path>: image attachments. Each path must exist; missing paths fail fast."),
+        ignoreUserConfig: z
+            .boolean()
+            .optional()
+            .describe("Codex --ignore-user-config: ignore ~/.codex/config.toml for this run."),
+        ignoreRules: z
+            .boolean()
+            .optional()
+            .describe("Codex --ignore-rules: skip project rule files for this run."),
+    }, async ({ prompt, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
+        const startTime = Date.now();
+        const prep = prepareCodexRequest({
+            prompt,
+            model,
+            fullAuto,
+            sandboxMode,
+            askForApproval,
+            useLegacyFullAutoFlag,
+            dangerouslyBypassApprovalsAndSandbox,
+            approvalStrategy,
+            approvalPolicy,
+            mcpServers,
+            sessionId,
+            resumeLatest,
+            createNewSession,
+            correlationId,
+            optimizePrompt,
+            operation: "codex_request",
+            outputFormat,
+            outputSchema,
+            search,
+            profile,
+            configOverrides,
+            ephemeral,
+            images,
+            ignoreUserConfig,
+            ignoreRules,
+        }, runtime);
+        if (!("args" in prep))
+            return prep;
+        const { corrId, args } = prep;
+        let durationMs = 0;
+        let wasSuccessful = false;
+        safeFlightStart({
+            correlationId: corrId,
+            cli: "codex",
+            model: prep.resolvedModel || "default",
+            prompt,
+            sessionId,
+        }, runtime);
+        logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prompt.length}`);
+        // U26 fix: pass the outputSchema cleanup to awaitJobOrDefer, which
+        // guarantees the cleanup runs exactly once — inline for direct
+        // execution, on terminal status for the job-backed path (sync
+        // completion or deferred). The outer finally MUST NOT clean again.
+        const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
+        try {
+            const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), outputFormat, forceRefresh, runtime, undefined, prepCleanup);
+            // Deferred — job still running, return async reference. Cleanup
+            // ownership belongs to AsyncJobManager via onComplete.
+            if (isDeferredResponse(result)) {
+                return buildDeferredToolResponse(result, sessionId);
+            }
+            const { stdout, stderr, code } = result;
+            durationMs = Math.max(0, Date.now() - startTime);
+            if (code !== 0) {
+                logger.info(`[${corrId}] codex_request failed in ${durationMs}ms`);
+                safeFlightComplete(corrId, {
+                    response: stderr || "",
+                    durationMs,
+                    retryCount: 0,
+                    circuitBreakerState: "closed",
+                    optimizationApplied: optimizePrompt || optimizeResponse,
+                    exitCode: code,
+                    errorMessage: stderr || `Exit code ${code}`,
+                    status: "failed",
+                }, runtime);
+                return createErrorResponse("codex", code, stderr, corrId);
+            }
+            wasSuccessful = true;
+            // Track session usage
+            let effectiveSessionId = sessionId;
+            if (!createNewSession && !sessionId) {
+                const activeSession = await sessionManager.getActiveSession("codex");
+                if (activeSession) {
+                    effectiveSessionId = activeSession.id;
+                }
+                else {
+                    const newSession = await sessionManager.createSession("codex", "Codex Session");
+                    effectiveSessionId = newSession.id;
+                }
+            }
+            else if (sessionId) {
+                await sessionManager.updateSessionUsage(sessionId);
+            }
+            else if (createNewSession) {
+                const newSession = await sessionManager.createSession("codex", "Codex Session");
+                effectiveSessionId = newSession.id;
+            }
+            logger.info(`[${corrId}] codex_request completed successfully in ${durationMs}ms`);
+            const codexUsage = extractUsageAndCost("codex", stdout, outputFormat);
+            safeFlightComplete(corrId, {
+                response: stdout,
+                durationMs,
+                retryCount: 0,
+                circuitBreakerState: "closed",
+                optimizationApplied: optimizePrompt || optimizeResponse,
+                exitCode: 0,
+                status: "completed",
+                inputTokens: codexUsage.inputTokens,
+                outputTokens: codexUsage.outputTokens,
+                cacheReadTokens: codexUsage.cacheReadTokens,
+                cacheCreationTokens: codexUsage.cacheCreationTokens,
+                costUsd: codexUsage.costUsd,
+            }, runtime);
+            return buildCliResponse("codex", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
+        }
+        catch (error) {
+            const elapsedMs = Math.max(0, Date.now() - startTime);
+            logger.info(`[${corrId}] codex_request threw exception after ${elapsedMs}ms`);
+            safeFlightComplete(corrId, {
+                response: "",
+                durationMs: elapsedMs,
+                retryCount: 0,
+                circuitBreakerState: "closed",
+                optimizationApplied: optimizePrompt || optimizeResponse,
+                exitCode: 1,
+                errorMessage: error.message,
+                status: "failed",
+            }, runtime);
+            return createErrorResponse("codex", 1, "", corrId, error);
+        }
+        finally {
+            const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
+            performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
+            // Cleanup is owned by awaitJobOrDefer's contract; nothing to do here.
+        }
+    });
+    //──────────────────────────────────────────────────────────────────────────────
+    // U26: codex_fork_session — `codex fork <SESSION_ID|--last> <prompt>`
+    //──────────────────────────────────────────────────────────────────────────────
+    server.tool("codex_fork_session", {
+        prompt: z
+            .string()
+            .min(1, "Prompt cannot be empty")
+            .max(100000, "Prompt too long (max 100k chars)")
+            .describe("Prompt text for the forked Codex session"),
+        sessionId: z
+            .string()
+            .optional()
+            .describe("Codex session UUID to fork from. Mutually exclusive with `forkLast`."),
+        forkLast: z
+            .boolean()
+            .optional()
+            .describe("Fork from the most recent Codex session. Mutually exclusive with `sessionId`."),
+        model: z.string().optional().describe("Model name or alias (e.g. gpt-5.5, latest)"),
+        sandboxMode: z
+            .enum(CODEX_SANDBOX_MODES)
+            .optional()
+            .describe("Codex --sandbox: read-only|workspace-write|danger-full-access."),
+        askForApproval: z
+            .enum(CODEX_ASK_FOR_APPROVAL_MODES)
+            .optional()
+            .describe("Codex --ask-for-approval: untrusted|on-request|never."),
+        correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
+        idleTimeoutMs: z
+            .number()
+            .int()
+            .min(30_000)
+            .max(3_600_000)
+            .optional()
+            .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
+    }, async ({ prompt, sessionId, forkLast, model, sandboxMode, askForApproval, correlationId, idleTimeoutMs, }) => {
+        const corrId = correlationId || randomUUID();
+        const startTime = Date.now();
+        let durationMs = 0;
+        let wasSuccessful = false;
+        // Enforce mutual exclusion at tool boundary (Zod records the params but
+        // the SDK's `.tool(...)` does not accept top-level refines).
+        if (sessionId && forkLast) {
+            return createErrorResponse("codex_fork_session", 1, "", corrId, new Error("sessionId and forkLast are mutually exclusive"));
+        }
+        if (!sessionId && !forkLast) {
+            return createErrorResponse("codex_fork_session", 1, "", corrId, new Error("one of sessionId or forkLast is required"));
+        }
+        let forkArgs;
+        try {
+            forkArgs = prepareCodexForkRequest({ prompt, sessionId, forkLast }).args;
+        }
+        catch (err) {
+            return createErrorResponse("codex_fork_session", 1, "", corrId, err);
+        }
+        const cliInfo = getCliInfo();
+        const resolvedModel = resolveModelAlias("codex", model, cliInfo);
+        // Compose argv: forkArgs already starts with `fork`. Inject model and
+        // sandbox/approval flags BEFORE the positional <sessionId|--last> +
+        // prompt to keep them as flags rather than positionals. forkArgs layout
+        // is either ["fork", "--last", prompt] or ["fork", sessionId, prompt];
+        // we splice flags right after "fork".
+        const flagSegment = [];
+        if (resolvedModel)
+            flagSegment.push("--model", resolvedModel);
+        const sandboxFlags = resolveCodexSandboxFlags({
+            sandboxMode,
+            askForApproval,
+        });
+        if (sandboxFlags.warning) {
+            logger.warn(`[${corrId}] ${sandboxFlags.warning}`);
+        }
+        flagSegment.push(...sandboxFlags.args);
+        const finalArgs = [forkArgs[0], ...flagSegment, ...forkArgs.slice(1)];
+        logger.info(`[${corrId}] codex_fork_session invoked (forkLast=${Boolean(forkLast)}, sessionId=${sessionId ? "set" : "unset"})`);
+        try {
+            const result = await awaitJobOrDefer("codex", finalArgs, corrId, resolveIdleTimeout("codex", idleTimeoutMs), undefined, false, runtime);
+            if (isDeferredResponse(result)) {
+                return buildDeferredToolResponse(result, sessionId);
+            }
+            const { stdout, stderr, code } = result;
+            durationMs = Math.max(0, Date.now() - startTime);
+            if (code !== 0) {
+                return createErrorResponse("codex", code, stderr, corrId);
+            }
+            wasSuccessful = true;
+            return {
+                content: [{ type: "text", text: stdout }],
+            };
+        }
+        catch (error) {
+            return createErrorResponse("codex_fork_session", 1, "", corrId, error);
+        }
+        finally {
+            const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
+            performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
+        }
+    });
+    //──────────────────────────────────────────────────────────────────────────────
+    // Gemini Tool
+    //──────────────────────────────────────────────────────────────────────────────
+    server.tool("gemini_request", {
+        prompt: z
+            .string()
+            .min(1, "Prompt cannot be empty")
+            .max(100000, "Prompt too long (max 100k chars)")
+            .describe("Prompt text for Gemini"),
+        model: z
+            .string()
+            .optional()
+            .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
+        sessionId: z.string().optional().describe("Session ID or 'latest'"),
+        resumeLatest: z.boolean().default(false).describe("Resume latest session"),
+        createNewSession: z.boolean().default(false).describe("Force new session"),
+        approvalMode: z
+            .enum(GEMINI_APPROVAL_MODES)
+            .optional()
+            .describe("Approval: default|auto_edit|yolo|plan"),
+        approvalStrategy: z
+            .enum(["legacy", "mcp_managed"])
+            .default("legacy")
+            .describe("Approval strategy"),
+        approvalPolicy: z
+            .enum(["strict", "balanced", "permissive"])
+            .optional()
+            .describe("Approval policy override"),
+        mcpServers: z
+            .array(MCP_SERVER_ENUM)
+            .default(["sqry"])
+            .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
+        allowedTools: z
+            .array(z.string())
+            .optional()
+            .describe("Allowed tools (['Write','Edit','Bash'])"),
+        includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
+        correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
+        optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
+        optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
+        idleTimeoutMs: z
+            .number()
+            .int()
+            .min(30_000)
+            .max(3_600_000)
+            .optional()
+            .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
+        forceRefresh: z
+            .boolean()
+            .default(false)
+            .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
+        // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
+        // remains text so existing callers see no behavior change.
+        outputFormat: z
+            .enum(["text", "json"])
+            .default("text")
+            .describe("Gemini output format. `json` emits `-o json` so usageMetadata is parsed and reported."),
+        sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
+        policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
+        adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
+        attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
+    }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
+        return handleGeminiRequest({ sessionManager, logger, runtime }, {
+            prompt,
+            model,
+            sessionId,
+            resumeLatest,
+            createNewSession,
+            approvalMode,
+            approvalStrategy,
+            approvalPolicy,
+            mcpServers,
+            allowedTools,
+            includeDirs,
+            correlationId,
+            optimizePrompt,
+            optimizeResponse,
+            idleTimeoutMs,
+            forceRefresh,
+            outputFormat,
+            sandbox,
+            policyFiles,
+            adminPolicyFiles,
+            attachments,
+        });
+    });
+    //──────────────────────────────────────────────────────────────────────────────
+    // Grok Tool
+    //──────────────────────────────────────────────────────────────────────────────
+    server.tool("grok_request", {
+        prompt: z
+            .string()
+            .min(1, "Prompt cannot be empty")
+            .max(100000, "Prompt too long (max 100k chars)")
+            .describe("Prompt text for Grok"),
+        model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
+        outputFormat: z
+            .enum(["plain", "json", "streaming-json"])
+            .optional()
+            .describe("Output format (plain|json|streaming-json). Grok default is plain."),
+        sessionId: z
+            .string()
+            .optional()
+            .describe("Session ID (user-provided CLI handle for --resume)"),
+        resumeLatest: z
+            .boolean()
+            .default(false)
+            .describe("Resume most recent Grok session in cwd (--continue)"),
+        createNewSession: z.boolean().default(false).describe("Force new session"),
+        alwaysApprove: z
+            .boolean()
+            .default(false)
+            .describe("Auto-approve all tool executions (--always-approve)"),
+        permissionMode: z
+            .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
+            .optional()
+            .describe("Grok permission mode"),
+        effort: z
+            .enum(["low", "medium", "high", "xhigh", "max"])
+            .optional()
+            .describe("Grok effort level"),
+        reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
+        approvalStrategy: z
+            .enum(["legacy", "mcp_managed"])
+            .default("legacy")
+            .describe("Approval strategy"),
+        approvalPolicy: z
+            .enum(["strict", "balanced", "permissive"])
+            .optional()
+            .describe("Approval policy override"),
+        mcpServers: z
+            .array(MCP_SERVER_ENUM)
+            .default(["sqry"])
+            .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
+        allowedTools: z
+            .array(z.string())
+            .optional()
+            .describe("Allowed built-in tools (passed as --tools comma list)"),
+        disallowedTools: z
+            .array(z.string())
+            .optional()
+            .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
+        correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
+        optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
+        optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
+        idleTimeoutMs: z
+            .number()
+            .int()
+            .min(30_000)
+            .max(3_600_000)
+            .optional()
+            .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
+        forceRefresh: z
+            .boolean()
+            .default(false)
+            .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
+    }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
+        return handleGrokRequest({ sessionManager, logger, runtime }, {
+            prompt,
+            model,
+            outputFormat,
+            sessionId,
+            resumeLatest,
+            createNewSession,
+            alwaysApprove,
+            permissionMode,
+            effort,
+            reasoningEffort,
+            approvalStrategy,
+            approvalPolicy,
+            mcpServers,
+            allowedTools,
+            disallowedTools,
+            correlationId,
+            optimizePrompt,
+            optimizeResponse,
+            idleTimeoutMs,
+            forceRefresh,
+        });
+    });
+    //──────────────────────────────────────────────────────────────────────────────
+    // Mistral Vibe Tool
+    //──────────────────────────────────────────────────────────────────────────────
+    server.tool("mistral_request", {
+        prompt: z
+            .string()
+            .min(1, "Prompt cannot be empty")
+            .max(100000, "Prompt too long (max 100k chars)")
+            .describe("Prompt text for Mistral Vibe"),
+        model: z
+            .string()
+            .optional()
+            .describe("Model alias (e.g. devstral-medium, devstral-large, latest). Resolved alias is injected via VIBE_ACTIVE_MODEL env var — Vibe has no --model flag."),
+        outputFormat: z
+            .enum(["plain", "json", "stream-json"])
+            .optional()
+            .describe("Output format (plain|json|stream-json). Vibe default is plain."),
+        sessionId: z
+            .string()
+            .optional()
+            .describe("Session ID (user-provided CLI handle for --resume). Requires [session_logging] enabled = true in ~/.vibe/config.toml."),
+        resumeLatest: z
+            .boolean()
+            .default(false)
+            .describe("Resume most recent Vibe session in cwd (--continue)"),
+        createNewSession: z.boolean().default(false).describe("Force new session"),
+        permissionMode: z
+            .enum(MISTRAL_AGENT_MODES)
+            .optional()
+            .describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
+        effort: z
+            .enum(["low", "medium", "high", "xhigh", "max"])
+            .optional()
+            .describe("Vibe effort level"),
+        reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
+        approvalStrategy: z
+            .enum(["legacy", "mcp_managed"])
+            .default("legacy")
+            .describe("Approval strategy"),
+        approvalPolicy: z
+            .enum(["strict", "balanced", "permissive"])
+            .optional()
+            .describe("Approval policy override"),
+        mcpServers: z
+            .array(MCP_SERVER_ENUM)
+            .default(["sqry"])
+            .describe("MCP server names for approval tracking (Vibe manages its own MCP config via `vibe mcp`)"),
+        allowedTools: z
+            .array(z.string())
+            .optional()
+            .describe("Allowlist of built-in tools — each emitted as a separate --enabled-tools <tool> flag"),
+        disallowedTools: z
+            .array(z.string())
+            .optional()
+            .describe("Accepted for caller parity; Vibe has no deny-list flag, so values are ignored (a warning is logged)."),
+        correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
+        optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
+        optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
+        idleTimeoutMs: z
+            .number()
+            .int()
+            .min(30_000)
+            .max(3_600_000)
+            .optional()
+            .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
+        forceRefresh: z
+            .boolean()
+            .default(false)
+            .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
+    }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
+        return handleMistralRequest({ sessionManager, logger, runtime }, {
+            prompt,
+            model,
+            outputFormat,
+            sessionId,
+            resumeLatest,
+            createNewSession,
+            permissionMode,
+            effort,
+            reasoningEffort,
+            approvalStrategy,
+            approvalPolicy,
+            mcpServers,
+            allowedTools,
+            disallowedTools,
+            correlationId,
+            optimizePrompt,
+            optimizeResponse,
+            idleTimeoutMs,
+            forceRefresh,
+        });
+    });
+    //──────────────────────────────────────────────────────────────────────────────
+    // Async Long-Running Job Tools (No Time-Bound LLM Execution)
+    //
+    // STRUCTURAL INVARIANT: these tools are only registered when a real job
+    // store is attached (`persistence.asyncJobsEnabled === true`). When the
+    // operator has configured `[persistence].backend = "none"`, none of the
+    // *_request_async / llm_job_* tools exist in the MCP tool list at all —
+    // orchestrating agents get a clean "tool not found" signal at connect
+    // time instead of silent in-memory loss after the 1-hour TTL.
+    //──────────────────────────────────────────────────────────────────────────────
+    if (asyncJobsEnabled) {
+        server.tool("claude_request_async", {
+            prompt: z
+                .string()
+                .min(1, "Prompt cannot be empty")
+                .max(100000, "Prompt too long (max 100k chars)")
+                .describe("Prompt text for Claude"),
+            model: z
+                .string()
+                .optional()
+                .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
+            outputFormat: z
+                .enum(["text", "json", "stream-json"])
+                .default("text")
+                .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
+            sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
+            continueSession: z.boolean().default(false).describe("Continue active session"),
+            createNewSession: z.boolean().default(false).describe("Force new session"),
+            allowedTools: z
+                .array(z.string())
+                .optional()
+                .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
+            disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
+            dangerouslySkipPermissions: z
+                .boolean()
+                .default(false)
+                .describe('DEPRECATED: prefer `permissionMode: "bypassPermissions"`. Maps to it when `permissionMode` is unset.'),
+            permissionMode: z
+                .enum(CLAUDE_PERMISSION_MODES)
+                .optional()
+                .describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op."),
+            // U25 — Claude high-impact features
+            agent: z
+                .string()
+                .optional()
+                .describe("Claude --agent: dispatch to a named single sub-agent."),
+            agents: z
+                .record(z.record(z.unknown()))
+                .optional()
+                .describe("Claude --agents: inline JSON map of agent name → { description, prompt, tools?, model? }."),
+            forkSession: z
+                .boolean()
+                .optional()
+                .describe("Claude --fork-session: branch from an existing session into a fresh fork."),
+            systemPrompt: z
+                .string()
+                .optional()
+                .describe("Claude --system-prompt: replace the system prompt entirely."),
+            appendSystemPrompt: z
+                .string()
+                .optional()
+                .describe("Claude --append-system-prompt: append to the existing system prompt."),
+            maxBudgetUsd: z
+                .number()
+                .positive()
+                .optional()
+                .describe("Claude --max-budget-usd: spend cap for this request in USD."),
+            maxTurns: z
+                .number()
+                .int()
+                .positive()
+                .optional()
+                .describe("Claude --max-turns: cap on agent loop iterations."),
+            effort: z
+                .enum(CLAUDE_EFFORT_LEVELS)
+                .optional()
+                .describe("Claude --effort: low|medium|high|xhigh|max."),
+            excludeDynamicSystemPromptSections: z
+                .boolean()
+                .optional()
+                .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
+            approvalStrategy: z
+                .enum(["legacy", "mcp_managed"])
+                .default("legacy")
+                .describe("Approval strategy"),
+            approvalPolicy: z
+                .enum(["strict", "balanced", "permissive"])
+                .optional()
+                .describe("Approval policy override"),
+            mcpServers: z
+                .array(MCP_SERVER_ENUM)
+                .default(["sqry"])
+                .describe("MCP servers exposed to Claude"),
+            strictMcpConfig: z
+                .boolean()
+                .default(false)
+                .describe("Restrict Claude to provided MCP config only"),
+            correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
+            optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
+            idleTimeoutMs: z
+                .number()
+                .int()
+                .min(30_000)
+                .max(3_600_000)
+                .optional()
+                .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
+            forceRefresh: z
+                .boolean()
+                .default(false)
+                .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
+        }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
+            if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
+                return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
+            }
+            const prep = prepareClaudeRequest({
+                prompt,
+                model,
+                outputFormat,
+                allowedTools,
+                disallowedTools,
+                dangerouslySkipPermissions,
+                permissionMode,
+                approvalStrategy,
+                approvalPolicy,
+                mcpServers,
+                strictMcpConfig,
+                correlationId,
+                optimizePrompt,
+                operation: "claude_request_async",
+                agent,
+                agents,
+                forkSession,
+                systemPrompt,
+                appendSystemPrompt,
+                maxBudgetUsd,
+                maxTurns,
+                effort,
+                excludeDynamicSystemPromptSections,
+            }, runtime);
+            if (!("args" in prep))
+                return prep;
+            const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
+            try {
+                // Session management (before job start for async)
+                let effectiveSessionId = sessionId;
+                let useContinue = continueSession;
+                const activeSession = await sessionManager.getActiveSession("claude");
+                if (!createNewSession && !continueSession && !sessionId && activeSession) {
+                    effectiveSessionId = activeSession.id;
+                    useContinue = true;
+                }
+                if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
+                    useContinue = true;
+                }
+                if (useContinue) {
+                    args.push("--continue");
+                }
+                else if (effectiveSessionId) {
+                    args.push("--session-id", effectiveSessionId);
+                    await sessionManager.updateSessionUsage(effectiveSessionId);
+                }
+                if (effectiveSessionId) {
+                    const existingSession = await sessionManager.getSession(effectiveSessionId);
+                    if (!existingSession) {
+                        await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
+                    }
+                }
+                // Idle timeout only for stream-json (text/json produce no output until done)
+                const effectiveIdleTimeout = outputFormat === "stream-json"
+                    ? resolveIdleTimeout("claude", idleTimeoutMs)
+                    : undefined;
+                assertUpstreamCliArgs("claude", args);
+                assertUpstreamCliEnv("claude", undefined);
+                const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat, forceRefresh);
+                logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
+                const asyncResponse = {
                     success: true,
-                    count: approvals.length,
-                    approvals,
-                }, null, 2),
-            },
-        ],
-    };
-});
-//──────────────────────────────────────────────────────────────────────────────
-// List Models Tool
-//──────────────────────────────────────────────────────────────────────────────
-server.tool("list_models", {
-    cli: z
-        .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini"]).optional())
-        .describe("CLI filter (claude|codex|gemini)"),
-}, async ({ cli }) => {
-    const cliInfo = getCliInfo();
-    const result = cli ? { [cli]: cliInfo[cli] } : cliInfo;
-    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
-});
-server.tool("cli_versions", {
-    cli: z
-        .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini"]).optional())
-        .describe("CLI filter (claude|codex|gemini)"),
-}, async ({ cli }) => {
-    const versions = await getCliVersions(cli);
-    return { content: [{ type: "text", text: JSON.stringify({ versions }, null, 2) }] };
-});
-server.tool("cli_upgrade", {
-    cli: z.enum(["claude", "codex", "gemini"]).describe("CLI to upgrade"),
-    target: z
-        .string()
-        .min(1)
-        .default("latest")
-        .describe("Package tag/version/target to install (default: latest)"),
-    dryRun: z
-        .boolean()
-        .default(true)
-        .describe("When true, return the upgrade plan without running it"),
-    timeoutMs: z
-        .number()
-        .int()
-        .min(30_000)
-        .max(3_600_000)
-        .optional()
-        .describe("Upgrade timeout in ms when dryRun=false"),
-}, async ({ cli, target, dryRun, timeoutMs }) => {
-    try {
-        const result = await runCliUpgrade({ cli, target, dryRun, timeoutMs, logger });
-        return {
-            content: [
-                {
-                    type: "text",
-                    text: JSON.stringify({
-                        success: true,
-                        ...result,
-                    }, null, 2),
-                },
-            ],
+                    job,
+                    sessionId: effectiveSessionId || activeSession?.id || null,
+                    approval: approvalDecision,
+                    mcpServers: {
+                        requested: requestedMcpServers,
+                        enabled: mcpConfig?.enabled,
+                        missing: mcpConfig?.missing,
+                    },
+                };
+                if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
+                    asyncResponse.reviewIntegrity = prep.reviewIntegrity;
+                }
+                return {
+                    content: [
+                        {
+                            type: "text",
+                            text: JSON.stringify(asyncResponse, null, 2),
+                        },
+                    ],
+                };
+            }
+            catch (error) {
+                return createErrorResponse("claude_request_async", 1, "", corrId, error);
+            }
+        });
+        server.tool("codex_request_async", {
+            prompt: z
+                .string()
+                .min(1, "Prompt cannot be empty")
+                .max(100000, "Prompt too long (max 100k chars)")
+                .describe("Prompt text for Codex"),
+            model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
+            fullAuto: z
+                .boolean()
+                .default(false)
+                .describe("DEPRECATED: prefer `sandboxMode` + `askForApproval`. Expands to `--sandbox workspace-write --ask-for-approval never`."),
+            sandboxMode: z
+                .enum(CODEX_SANDBOX_MODES)
+                .optional()
+                .describe("Codex --sandbox: read-only|workspace-write|danger-full-access."),
+            askForApproval: z
+                .enum(CODEX_ASK_FOR_APPROVAL_MODES)
+                .optional()
+                .describe("Codex --ask-for-approval: untrusted|on-request|never."),
+            useLegacyFullAutoFlag: z
+                .boolean()
+                .default(false)
+                .describe("Escape hatch: emit `--full-auto` directly (deprecated)."),
+            dangerouslyBypassApprovalsAndSandbox: z
+                .boolean()
+                .default(false)
+                .describe("Run Codex without approvals/sandbox"),
+            approvalStrategy: z
+                .enum(["legacy", "mcp_managed"])
+                .default("legacy")
+                .describe("Approval strategy"),
+            approvalPolicy: z
+                .enum(["strict", "balanced", "permissive"])
+                .optional()
+                .describe("Approval policy override"),
+            mcpServers: z
+                .array(MCP_SERVER_ENUM)
+                .default(["sqry"])
+                .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
+            sessionId: z
+                .string()
+                .optional()
+                .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
+            resumeLatest: z
+                .boolean()
+                .default(false)
+                .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
+            createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
+            correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
+            optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
+            idleTimeoutMs: z
+                .number()
+                .int()
+                .min(30_000)
+                .max(3_600_000)
+                .optional()
+                .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
+            forceRefresh: z
+                .boolean()
+                .default(false)
+                .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
+            // U23: emit `--json` to enable JSONL event-stream parsing for token usage.
+            outputFormat: z
+                .enum(["text", "json"])
+                .default("text")
+                .describe("Codex output format. `json` emits --json (JSONL events) for token usage extraction."),
+            // U26: high-impact feature flags. All optional.
+            outputSchema: z
+                .union([z.string(), z.record(z.unknown())])
+                .optional()
+                .describe("Codex --output-schema. Pass a path (string) or an inline JSON Schema object."),
+            search: z.boolean().optional().describe("Emit Codex --search to enable web search."),
+            profile: z.string().optional().describe("Codex --profile <name>."),
+            configOverrides: CODEX_CONFIG_OVERRIDES_SCHEMA.describe("Codex -c key=value overrides. Keys: /^[a-zA-Z0-9._]+$/. Values: no CR/LF."),
+            ephemeral: z.boolean().optional().describe("Codex --ephemeral."),
+            images: z.array(z.string()).optional().describe("Codex -i <path>: image attachments."),
+            ignoreUserConfig: z.boolean().optional().describe("Codex --ignore-user-config."),
+            ignoreRules: z.boolean().optional().describe("Codex --ignore-rules."),
+        }, async ({ prompt, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
+            return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
+                prompt,
+                model,
+                fullAuto,
+                sandboxMode,
+                askForApproval,
+                useLegacyFullAutoFlag,
+                dangerouslyBypassApprovalsAndSandbox,
+                approvalStrategy,
+                approvalPolicy,
+                mcpServers,
+                sessionId,
+                resumeLatest,
+                createNewSession,
+                correlationId,
+                optimizePrompt,
+                idleTimeoutMs,
+                forceRefresh,
+                outputFormat,
+                outputSchema,
+                search,
+                profile,
+                configOverrides,
+                ephemeral,
+                images,
+                ignoreUserConfig,
+                ignoreRules,
+            });
+        });
+        server.tool("gemini_request_async", {
+            prompt: z
+                .string()
+                .min(1, "Prompt cannot be empty")
+                .max(100000, "Prompt too long (max 100k chars)")
+                .describe("Prompt text for Gemini"),
+            model: z
+                .string()
+                .optional()
+                .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
+            sessionId: z
+                .string()
+                .optional()
+                .describe("Session ID (user-provided CLI handle for --resume)"),
+            resumeLatest: z.boolean().default(false).describe("Resume latest session"),
+            createNewSession: z.boolean().default(false).describe("Force new session"),
+            approvalMode: z
+                .enum(GEMINI_APPROVAL_MODES)
+                .optional()
+                .describe("Approval: default|auto_edit|yolo|plan"),
+            approvalStrategy: z
+                .enum(["legacy", "mcp_managed"])
+                .default("legacy")
+                .describe("Approval strategy"),
+            approvalPolicy: z
+                .enum(["strict", "balanced", "permissive"])
+                .optional()
+                .describe("Approval policy override"),
+            mcpServers: z
+                .array(MCP_SERVER_ENUM)
+                .default(["sqry"])
+                .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
+            allowedTools: z
+                .array(z.string())
+                .optional()
+                .describe("Allowed tools (['Write','Edit','Bash'])"),
+            includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
+            correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
+            optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
+            idleTimeoutMs: z
+                .number()
+                .int()
+                .min(30_000)
+                .max(3_600_000)
+                .optional()
+                .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
+            forceRefresh: z
+                .boolean()
+                .default(false)
+                .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
+            // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
+            // remains text so existing callers see no behavior change.
+            outputFormat: z
+                .enum(["text", "json"])
+                .default("text")
+                .describe("Gemini output format. `json` emits `-o json` so usageMetadata is parsed and reported."),
+            sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
+            policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
+            adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
+            attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
+        }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
+            return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
+                prompt,
+                model,
+                sessionId,
+                resumeLatest,
+                createNewSession,
+                approvalMode,
+                approvalStrategy,
+                approvalPolicy,
+                mcpServers,
+                allowedTools,
+                includeDirs,
+                correlationId,
+                optimizePrompt,
+                idleTimeoutMs,
+                forceRefresh,
+                outputFormat,
+                sandbox,
+                policyFiles,
+                adminPolicyFiles,
+                attachments,
+            });
+        });
+        server.tool("grok_request_async", {
+            prompt: z
+                .string()
+                .min(1, "Prompt cannot be empty")
+                .max(100000, "Prompt too long (max 100k chars)")
+                .describe("Prompt text for Grok"),
+            model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
+            outputFormat: z
+                .enum(["plain", "json", "streaming-json"])
+                .optional()
+                .describe("Output format (plain|json|streaming-json). Grok default is plain."),
+            sessionId: z
+                .string()
+                .optional()
+                .describe("Session ID (user-provided CLI handle for --resume)"),
+            resumeLatest: z
+                .boolean()
+                .default(false)
+                .describe("Resume most recent Grok session in cwd (--continue)"),
+            createNewSession: z.boolean().default(false).describe("Force new session"),
+            alwaysApprove: z
+                .boolean()
+                .default(false)
+                .describe("Auto-approve all tool executions (--always-approve)"),
+            permissionMode: z
+                .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
+                .optional()
+                .describe("Grok permission mode"),
+            effort: z
+                .enum(["low", "medium", "high", "xhigh", "max"])
+                .optional()
+                .describe("Grok effort level"),
+            reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
+            approvalStrategy: z
+                .enum(["legacy", "mcp_managed"])
+                .default("legacy")
+                .describe("Approval strategy"),
+            approvalPolicy: z
+                .enum(["strict", "balanced", "permissive"])
+                .optional()
+                .describe("Approval policy override"),
+            mcpServers: z
+                .array(MCP_SERVER_ENUM)
+                .default(["sqry"])
+                .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
+            allowedTools: z
+                .array(z.string())
+                .optional()
+                .describe("Allowed built-in tools (passed as --tools comma list)"),
+            disallowedTools: z
+                .array(z.string())
+                .optional()
+                .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
+            correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
+            optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
+            idleTimeoutMs: z
+                .number()
+                .int()
+                .min(30_000)
+                .max(3_600_000)
+                .optional()
+                .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
+            forceRefresh: z
+                .boolean()
+                .default(false)
+                .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
+        }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
+            return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
+                prompt,
+                model,
+                outputFormat,
+                sessionId,
+                resumeLatest,
+                createNewSession,
+                alwaysApprove,
+                permissionMode,
+                effort,
+                reasoningEffort,
+                approvalStrategy,
+                approvalPolicy,
+                mcpServers,
+                allowedTools,
+                disallowedTools,
+                correlationId,
+                optimizePrompt,
+                idleTimeoutMs,
+                forceRefresh,
+            });
+        });
+        server.tool("mistral_request_async", {
+            prompt: z
+                .string()
+                .min(1, "Prompt cannot be empty")
+                .max(100000, "Prompt too long (max 100k chars)")
+                .describe("Prompt text for Mistral Vibe"),
+            model: z
+                .string()
+                .optional()
+                .describe("Model alias (resolved into VIBE_ACTIVE_MODEL env var — Vibe has no --model flag)"),
+            outputFormat: z
+                .enum(["plain", "json", "stream-json"])
+                .optional()
+                .describe("Output format (plain|json|stream-json). Vibe default is plain."),
+            sessionId: z
+                .string()
+                .optional()
+                .describe("Session ID (user-provided CLI handle for --resume). Requires [session_logging] enabled = true in ~/.vibe/config.toml."),
+            resumeLatest: z
+                .boolean()
+                .default(false)
+                .describe("Resume most recent Vibe session in cwd (--continue)"),
+            createNewSession: z.boolean().default(false).describe("Force new session"),
+            permissionMode: z
+                .enum(MISTRAL_AGENT_MODES)
+                .optional()
+                .describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
+            effort: z
+                .enum(["low", "medium", "high", "xhigh", "max"])
+                .optional()
+                .describe("Vibe effort level"),
+            reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
+            approvalStrategy: z
+                .enum(["legacy", "mcp_managed"])
+                .default("legacy")
+                .describe("Approval strategy"),
+            approvalPolicy: z
+                .enum(["strict", "balanced", "permissive"])
+                .optional()
+                .describe("Approval policy override"),
+            mcpServers: z
+                .array(MCP_SERVER_ENUM)
+                .default(["sqry"])
+                .describe("MCP server names for approval tracking (Vibe manages its own MCP config via `vibe mcp`)"),
+            allowedTools: z
+                .array(z.string())
+                .optional()
+                .describe("Allowlist of built-in tools — each emitted as a separate --enabled-tools <tool> flag"),
+            disallowedTools: z
+                .array(z.string())
+                .optional()
+                .describe("Accepted for caller parity; Vibe has no deny-list flag, so values are ignored (a warning is logged)."),
+            correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
+            optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
+            idleTimeoutMs: z
+                .number()
+                .int()
+                .min(30_000)
+                .max(3_600_000)
+                .optional()
+                .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
+            forceRefresh: z
+                .boolean()
+                .default(false)
+                .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
+        }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
+            return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
+                prompt,
+                model,
+                outputFormat,
+                sessionId,
+                resumeLatest,
+                createNewSession,
+                permissionMode,
+                effort,
+                reasoningEffort,
+                approvalStrategy,
+                approvalPolicy,
+                mcpServers,
+                allowedTools,
+                disallowedTools,
+                correlationId,
+                optimizePrompt,
+                idleTimeoutMs,
+                forceRefresh,
+            });
+        });
+        server.tool("llm_job_status", {
+            jobId: z.string().describe("Async job ID from *_request_async"),
+        }, async ({ jobId }) => {
+            const job = asyncJobManager.getJobSnapshot(jobId);
+            if (!job) {
+                return {
+                    content: [
+                        {
+                            type: "text",
+                            text: JSON.stringify({
+                                success: false,
+                                error: "Job not found",
+                                jobId,
+                            }, null, 2),
+                        },
+                    ],
+                    isError: true,
+                };
+            }
+            return {
+                content: [
+                    {
+                        type: "text",
+                        text: JSON.stringify({
+                            success: true,
+                            job,
+                        }, null, 2),
+                    },
+                ],
+            };
+        });
+        server.tool("llm_job_result", {
+            jobId: z.string().describe("Async job ID from *_request_async"),
+            maxChars: z
+                .number()
+                .int()
+                .min(1000)
+                .max(2000000)
+                .default(200000)
+                .describe("Max chars returned per stream"),
+        }, async ({ jobId, maxChars }) => {
+            const result = asyncJobManager.getJobResult(jobId, maxChars);
+            if (!result) {
+                return {
+                    content: [
+                        {
+                            type: "text",
+                            text: JSON.stringify({
+                                success: false,
+                                error: "Job not found",
+                                jobId,
+                            }, null, 2),
+                        },
+                    ],
+                    isError: true,
+                };
+            }
+            // Parse stream-json output for Claude async jobs
+            const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
+            let parsed;
+            if (outputFormat === "stream-json" && result.stdout) {
+                parsed = parseStreamJson(result.stdout);
+            }
+            return {
+                content: [
+                    {
+                        type: "text",
+                        text: JSON.stringify({
+                            success: true,
+                            result,
+                            ...(parsed
+                                ? {
+                                    parsed: {
+                                        text: parsed.text,
+                                        costUsd: parsed.costUsd,
+                                        usage: parsed.usage,
+                                        model: parsed.model,
+                                        numTurns: parsed.numTurns,
+                                    },
+                                }
+                                : {}),
+                        }, null, 2),
+                    },
+                ],
+            };
+        });
+        server.tool("llm_job_cancel", {
+            jobId: z.string().describe("Async job ID from *_request_async"),
+        }, async ({ jobId }) => {
+            const cancel = asyncJobManager.cancelJob(jobId);
+            if (!cancel.canceled) {
+                return {
+                    content: [
+                        {
+                            type: "text",
+                            text: JSON.stringify({
+                                success: false,
+                                jobId,
+                                reason: cancel.reason || "Unable to cancel",
+                            }, null, 2),
+                        },
+                    ],
+                    isError: true,
+                };
+            }
+            return {
+                content: [
+                    {
+                        type: "text",
+                        text: JSON.stringify({
+                            success: true,
+                            jobId,
+                        }, null, 2),
+                    },
+                ],
+            };
+        });
+    } // end if (asyncJobsEnabled)
+    server.tool("llm_process_health", {}, async () => {
+        const health = asyncJobManager.getJobHealth();
+        const persistenceBlock = {
+            backend: persistence.backend,
+            dbPath: persistence.path,
+            dsn: persistence.dsn ? "[redacted]" : null,
+            retentionDays: persistence.retentionDays,
+            dedupWindowMs: persistence.dedupWindowMs,
+            asyncJobsEnabled: persistence.asyncJobsEnabled,
+            acknowledgeEphemeral: persistence.acknowledgeEphemeral,
+            sources: persistence.sources,
+            warning: persistence.asyncJobsEnabled
+                ? null
+                : "Async job persistence is disabled (backend = 'none'). *_request_async tools are NOT registered on this gateway. Set [persistence].backend = 'sqlite' (or 'memory' + acknowledgeEphemeral = true) to enable them.",
         };
-    }
-    catch (error) {
-        const message = error instanceof Error ? error.message : String(error);
         return {
             content: [
                 {
                     type: "text",
-                    text: JSON.stringify({
-                        success: false,
-                        error: message,
-                    }, null, 2),
+                    text: JSON.stringify({ success: true, ...health, persistence: persistenceBlock }, null, 2),
                 },
             ],
-            isError: true,
         };
-    }
-});
-//──────────────────────────────────────────────────────────────────────────────
-// Session Management Tools
-//──────────────────────────────────────────────────────────────────────────────
-server.tool("session_create", {
-    cli: z.enum(["claude", "codex", "gemini"]).describe("CLI type (claude|codex|gemini)"),
-    description: z.string().optional().describe("Session description"),
-    setAsActive: z.boolean().default(true).describe("Set as active session"),
-}, async ({ cli, description, setAsActive }) => {
-    try {
-        const session = await sessionManager.createSession(cli, description);
-        if (setAsActive) {
-            await sessionManager.setActiveSession(cli, session.id);
-        }
-        logger.info(`Created new ${cli} session: ${session.id}`);
+    });
+    //──────────────────────────────────────────────────────────────────────────────
+    // Approval Audit Tools
+    //──────────────────────────────────────────────────────────────────────────────
+    server.tool("approval_list", {
+        limit: z
+            .number()
+            .int()
+            .min(1)
+            .max(500)
+            .default(50)
+            .describe("Max number of approval records"),
+        cli: z
+            .enum(["claude", "codex", "gemini", "grok", "mistral"])
+            .optional()
+            .describe("Optional CLI filter"),
+    }, async ({ limit, cli }) => {
+        const approvals = approvalManager.list(limit, cli);
         return {
             content: [
                 {
                     type: "text",
                     text: JSON.stringify({
                         success: true,
-                        session: {
-                            id: session.id,
-                            cli: session.cli,
-                            description: session.description,
-                            createdAt: session.createdAt,
-                            isActive: setAsActive,
-                        },
-                    }, null, 2),
-                },
-            ],
-        };
-    }
-    catch (error) {
-        return createErrorResponse("session_create", 1, "", undefined, error);
-    }
-});
-server.tool("session_list", {
-    cli: z
-        .enum(["claude", "codex", "gemini"])
-        .optional()
-        .describe("CLI filter (claude|codex|gemini)"),
-}, async ({ cli }) => {
-    try {
-        const sessions = await sessionManager.listSessions(cli);
-        const activeSessions = {
-            claude: await sessionManager.getActiveSession("claude"),
-            codex: await sessionManager.getActiveSession("codex"),
-            gemini: await sessionManager.getActiveSession("gemini"),
-            grok: await sessionManager.getActiveSession("grok"),
-        };
-        const sessionList = sessions.map(s => ({
-            id: s.id,
-            cli: s.cli,
-            description: s.description,
-            createdAt: s.createdAt,
-            lastUsedAt: s.lastUsedAt,
-            isActive: activeSessions[s.cli]?.id === s.id,
-        }));
-        return {
-            content: [
-                {
-                    type: "text",
-                    text: JSON.stringify({
-                        total: sessionList.length,
-                        sessions: sessionList,
-                        activeSessions: {
-                            claude: activeSessions.claude?.id || null,
-                            codex: activeSessions.codex?.id || null,
-                            gemini: activeSessions.gemini?.id || null,
-                            grok: activeSessions.grok?.id || null,
-                        },
+                        count: approvals.length,
+                        approvals,
                     }, null, 2),
                 },
             ],
         };
-    }
-    catch (error) {
-        return createErrorResponse("session_list", 1, "", undefined, error);
-    }
-});
-server.tool("session_set_active", {
-    cli: z.enum(["claude", "codex", "gemini"]).describe("CLI type (claude|codex|gemini)"),
-    sessionId: z.string().nullable().describe("Session ID (null to clear)"),
-}, async ({ cli, sessionId }) => {
-    try {
-        const success = await sessionManager.setActiveSession(cli, sessionId || null);
-        if (!success) {
+    });
+    //──────────────────────────────────────────────────────────────────────────────
+    // List Models Tool
+    //──────────────────────────────────────────────────────────────────────────────
+    server.tool("list_models", {
+        cli: z
+            .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
+            .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
+    }, async ({ cli }) => {
+        const cliInfo = getCliInfo();
+        const result = cli ? { [cli]: cliInfo[cli] } : cliInfo;
+        return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
+    });
+    server.tool("cli_versions", {
+        cli: z
+            .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
+            .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
+    }, async ({ cli }) => {
+        const versions = await getCliVersions(cli);
+        return { content: [{ type: "text", text: JSON.stringify({ versions }, null, 2) }] };
+    });
+    server.tool("upstream_contracts", {
+        cli: z
+            .preprocess(value => (value === "" || value === null ? undefined : value), SESSION_PROVIDER_ENUM.optional())
+            .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
+        probeInstalled: z
+            .boolean()
+            .default(false)
+            .describe("When true, run local --help probes and compare advertised flags"),
+    }, async ({ cli, probeInstalled }) => {
+        const report = buildUpstreamContractReport({ cli, probeInstalled });
+        return { content: [{ type: "text", text: JSON.stringify(report, null, 2) }] };
+    });
+    server.tool("cli_upgrade", {
+        cli: z.enum(["claude", "codex", "gemini", "grok", "mistral"]).describe("CLI to upgrade"),
+        target: z
+            .string()
+            .min(1)
+            .default("latest")
+            .describe("Package tag/version/target to install (default: latest)"),
+        dryRun: z
+            .boolean()
+            .default(true)
+            .describe("When true, return the upgrade plan without running it"),
+        timeoutMs: z
+            .number()
+            .int()
+            .min(30_000)
+            .max(3_600_000)
+            .optional()
+            .describe("Upgrade timeout in ms when dryRun=false"),
+    }, async ({ cli, target, dryRun, timeoutMs }) => {
+        try {
+            const result = await runCliUpgrade({ cli, target, dryRun, timeoutMs, logger });
             return {
                 content: [
                     {
                         type: "text",
                         text: JSON.stringify({
-                            success: false,
-                            error: "Session not found or does not belong to the specified CLI",
+                            success: true,
+                            ...result,
                         }, null, 2),
                     },
                 ],
-                isError: true,
             };
         }
-        logger.info(`Set active ${cli} session to: ${sessionId}`);
-        return {
-            content: [
-                {
-                    type: "text",
-                    text: JSON.stringify({
-                        success: true,
-                        cli,
-                        activeSessionId: sessionId,
-                    }, null, 2),
-                },
-            ],
-        };
-    }
-    catch (error) {
-        return createErrorResponse("session_set_active", 1, "", undefined, error);
-    }
-});
-server.tool("session_delete", {
-    sessionId: z.string().describe("Session ID"),
-}, async ({ sessionId }) => {
-    try {
-        const session = await sessionManager.getSession(sessionId);
-        if (!session) {
+        catch (error) {
+            const message = error instanceof Error ? error.message : String(error);
             return {
                 content: [
                     {
                         type: "text",
                         text: JSON.stringify({
                             success: false,
-                            error: "Session not found",
+                            error: message,
                         }, null, 2),
                     },
                 ],
                 isError: true,
             };
         }
-        const success = await sessionManager.deleteSession(sessionId);
-        logger.info(`Deleted session: ${sessionId}`);
-        return {
-            content: [
-                {
-                    type: "text",
-                    text: JSON.stringify({
-                        success,
-                        deletedSession: {
-                            id: session.id,
-                            cli: session.cli,
-                            description: session.description,
+    });
+    //──────────────────────────────────────────────────────────────────────────────
+    // Session Management Tools
+    //──────────────────────────────────────────────────────────────────────────────
+    server.tool("session_create", {
+        cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
+        description: z.string().optional().describe("Session description"),
+        setAsActive: z.boolean().default(true).describe("Set as active session"),
+    }, async ({ cli, description, setAsActive }) => {
+        try {
+            const session = await sessionManager.createSession(cli, description);
+            if (setAsActive) {
+                await sessionManager.setActiveSession(cli, session.id);
+            }
+            logger.info(`Created new ${cli} session: ${session.id}`);
+            return {
+                content: [
+                    {
+                        type: "text",
+                        text: JSON.stringify({
+                            success: true,
+                            session: {
+                                id: session.id,
+                                cli: session.cli,
+                                description: session.description,
+                                createdAt: session.createdAt,
+                                isActive: setAsActive,
+                            },
+                        }, null, 2),
+                    },
+                ],
+            };
+        }
+        catch (error) {
+            return createErrorResponse("session_create", 1, "", undefined, error);
+        }
+    });
+    server.tool("session_list", {
+        cli: SESSION_PROVIDER_ENUM.optional().describe("CLI filter (claude|codex|gemini|grok|mistral)"),
+    }, async ({ cli }) => {
+        try {
+            const sessions = await sessionManager.listSessions(cli);
+            const activeSessions = {
+                claude: await sessionManager.getActiveSession("claude"),
+                codex: await sessionManager.getActiveSession("codex"),
+                gemini: await sessionManager.getActiveSession("gemini"),
+                grok: await sessionManager.getActiveSession("grok"),
+                mistral: await sessionManager.getActiveSession("mistral"),
+            };
+            const sessionList = sessions.map(s => ({
+                id: s.id,
+                cli: s.cli,
+                description: s.description,
+                createdAt: s.createdAt,
+                lastUsedAt: s.lastUsedAt,
+                isActive: activeSessions[s.cli]?.id === s.id,
+            }));
+            return {
+                content: [
+                    {
+                        type: "text",
+                        text: JSON.stringify({
+                            total: sessionList.length,
+                            sessions: sessionList,
+                            activeSessions: {
+                                claude: activeSessions.claude?.id || null,
+                                codex: activeSessions.codex?.id || null,
+                                gemini: activeSessions.gemini?.id || null,
+                                grok: activeSessions.grok?.id || null,
+                                mistral: activeSessions.mistral?.id || null,
+                            },
+                        }, null, 2),
+                    },
+                ],
+            };
+        }
+        catch (error) {
+            return createErrorResponse("session_list", 1, "", undefined, error);
+        }
+    });
+    server.tool("session_set_active", {
+        cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
+        sessionId: z.string().nullable().describe("Session ID (null to clear)"),
+    }, async ({ cli, sessionId }) => {
+        try {
+            const success = await sessionManager.setActiveSession(cli, sessionId || null);
+            if (!success) {
+                return {
+                    content: [
+                        {
+                            type: "text",
+                            text: JSON.stringify({
+                                success: false,
+                                error: "Session not found or does not belong to the specified CLI",
+                            }, null, 2),
                         },
-                    }, null, 2),
-                },
-            ],
-        };
-    }
-    catch (error) {
-        return createErrorResponse("session_delete", 1, "", undefined, error);
-    }
-});
-server.tool("session_get", {
-    sessionId: z.string().describe("Session ID"),
-}, async ({ sessionId }) => {
-    try {
-        const session = await sessionManager.getSession(sessionId);
-        if (!session) {
+                    ],
+                    isError: true,
+                };
+            }
+            logger.info(`Set active ${cli} session to: ${sessionId}`);
             return {
                 content: [
                     {
                         type: "text",
                         text: JSON.stringify({
-                            success: false,
-                            error: "Session not found",
+                            success: true,
+                            cli,
+                            activeSessionId: sessionId,
                         }, null, 2),
                     },
                 ],
-                isError: true,
             };
         }
-        const activeSession = await sessionManager.getActiveSession(session.cli);
-        return {
-            content: [
-                {
-                    type: "text",
-                    text: JSON.stringify({
-                        success: true,
-                        session: {
-                            ...session,
-                            isActive: activeSession?.id === session.id,
+        catch (error) {
+            return createErrorResponse("session_set_active", 1, "", undefined, error);
+        }
+    });
+    server.tool("session_delete", {
+        sessionId: z.string().describe("Session ID"),
+    }, async ({ sessionId }) => {
+        try {
+            const session = await sessionManager.getSession(sessionId);
+            if (!session) {
+                return {
+                    content: [
+                        {
+                            type: "text",
+                            text: JSON.stringify({
+                                success: false,
+                                error: "Session not found",
+                            }, null, 2),
                         },
-                    }, null, 2),
-                },
-            ],
-        };
-    }
-    catch (error) {
-        return createErrorResponse("session_get", 1, "", undefined, error);
-    }
-});
-server.tool("session_clear_all", {
-    cli: z
-        .enum(["claude", "codex", "gemini"])
-        .optional()
-        .describe("CLI filter (claude|codex|gemini)"),
-}, async ({ cli }) => {
-    try {
-        const count = await sessionManager.clearAllSessions(cli);
-        logger.info(`Cleared ${count} sessions${cli ? ` for ${cli}` : ""}`);
-        return {
-            content: [
-                {
-                    type: "text",
-                    text: JSON.stringify({
-                        success: true,
-                        deletedCount: count,
-                        cli: cli || "all",
-                    }, null, 2),
-                },
-            ],
-        };
-    }
-    catch (error) {
-        return createErrorResponse("session_clear_all", 1, "", undefined, error);
-    }
-});
+                    ],
+                    isError: true,
+                };
+            }
+            const success = await sessionManager.deleteSession(sessionId);
+            logger.info(`Deleted session: ${sessionId}`);
+            return {
+                content: [
+                    {
+                        type: "text",
+                        text: JSON.stringify({
+                            success,
+                            deletedSession: {
+                                id: session.id,
+                                cli: session.cli,
+                                description: session.description,
+                            },
+                        }, null, 2),
+                    },
+                ],
+            };
+        }
+        catch (error) {
+            return createErrorResponse("session_delete", 1, "", undefined, error);
+        }
+    });
+    server.tool("session_get", {
+        sessionId: z.string().describe("Session ID"),
+    }, async ({ sessionId }) => {
+        try {
+            const session = await sessionManager.getSession(sessionId);
+            if (!session) {
+                return {
+                    content: [
+                        {
+                            type: "text",
+                            text: JSON.stringify({
+                                success: false,
+                                error: "Session not found",
+                            }, null, 2),
+                        },
+                    ],
+                    isError: true,
+                };
+            }
+            const activeSession = await sessionManager.getActiveSession(session.cli);
+            return {
+                content: [
+                    {
+                        type: "text",
+                        text: JSON.stringify({
+                            success: true,
+                            session: {
+                                ...session,
+                                isActive: activeSession?.id === session.id,
+                            },
+                        }, null, 2),
+                    },
+                ],
+            };
+        }
+        catch (error) {
+            return createErrorResponse("session_get", 1, "", undefined, error);
+        }
+    });
+    server.tool("session_clear_all", {
+        cli: SESSION_PROVIDER_ENUM.optional().describe("CLI filter (claude|codex|gemini|grok|mistral)"),
+    }, async ({ cli }) => {
+        try {
+            const count = await sessionManager.clearAllSessions(cli);
+            logger.info(`Cleared ${count} sessions${cli ? ` for ${cli}` : ""}`);
+            return {
+                content: [
+                    {
+                        type: "text",
+                        text: JSON.stringify({
+                            success: true,
+                            deletedCount: count,
+                            cli: cli || "all",
+                        }, null, 2),
+                    },
+                ],
+            };
+        }
+        catch (error) {
+            return createErrorResponse("session_clear_all", 1, "", undefined, error);
+        }
+    });
+    return server;
+}
 //──────────────────────────────────────────────────────────────────────────────
 // Async Initialization
 //──────────────────────────────────────────────────────────────────────────────
@@ -2592,7 +3835,7 @@ async function initializeSessionManager() {
 //──────────────────────────────────────────────────────────────────────────────
 // Health Check Resource (only if using PostgreSQL)
 //──────────────────────────────────────────────────────────────────────────────
-function registerHealthResource() {
+function registerHealthResource(server) {
     if (db) {
         server.registerResource("health", "health://status", {
             title: "🏥 Health Status",
@@ -2640,8 +3883,16 @@ async function shutdown(signal) {
         // Kill all active process groups (SIGTERM → wait 3s → SIGKILL)
         await killAllProcessGroups();
         logger.info("All process groups terminated");
-        await server.close();
-        logger.info("MCP server closed");
+        if (activeHttpGateway) {
+            await activeHttpGateway.close();
+            logger.info("HTTP MCP transport closed");
+            activeHttpGateway = null;
+        }
+        if (activeServer) {
+            await activeServer.close();
+            logger.info("MCP server closed");
+            activeServer = null;
+        }
         if (db) {
             await db.disconnect();
             logger.info("Database connections closed");
@@ -2661,18 +3912,74 @@ process.on("SIGINT", () => shutdown("SIGINT"));
 // Server Startup
 //──────────────────────────────────────────────────────────────────────────────
 async function main() {
-    logger.info("Starting llm-cli-gateway MCP server");
+    const args = process.argv.slice(2);
+    if (args[0] === "doctor") {
+        if (args.includes("--json")) {
+            printDoctorJson();
+            return;
+        }
+        process.stderr.write("Only doctor --json is supported in this layer.\n");
+        process.exit(2);
+    }
+    if (args[0] === "contracts") {
+        if (args.includes("--json")) {
+            const cliArg = args.find(arg => arg.startsWith("--cli="))?.split("=")[1];
+            const cli = SESSION_PROVIDER_VALUES.includes(cliArg)
+                ? cliArg
+                : undefined;
+            if (cliArg && !cli) {
+                process.stderr.write(`Unsupported --cli value: ${cliArg}\n`);
+                process.exit(2);
+            }
+            const probeInstalled = args.includes("--probe-installed");
+            process.stdout.write(JSON.stringify(buildUpstreamContractReport({ cli, probeInstalled }), null, 2) + "\n");
+            return;
+        }
+        process.stderr.write("Usage: llm-cli-gateway contracts --json [--cli=claude|codex|gemini|grok|mistral] [--probe-installed]\n");
+        process.exit(2);
+    }
+    const transportArg = args.find(arg => arg.startsWith("--transport="));
+    const transportMode = transportArg?.split("=")[1] ||
+        process.env.LLM_GATEWAY_TRANSPORT ||
+        process.env.MCP_TRANSPORT ||
+        "stdio";
+    logger.info(`Starting llm-cli-gateway MCP server with ${transportMode} transport`);
     // Initialize session manager first
     await initializeSessionManager();
+    const serverDeps = {
+        sessionManager,
+        resourceProvider,
+        db,
+        performanceMetrics,
+        asyncJobManager,
+        approvalManager,
+        flightRecorder,
+        logger,
+    };
+    if (transportMode === "http") {
+        activeHttpGateway = await startHttpGateway({
+            deps: serverDeps,
+            createGatewayServer,
+            logger,
+        });
+        logger.info(`llm-cli-gateway HTTP MCP server connected and ready at ${activeHttpGateway.url}`);
+        return;
+    }
+    if (transportMode !== "stdio") {
+        throw new Error(`Unsupported transport: ${transportMode}`);
+    }
+    activeServer = createGatewayServer({
+        ...serverDeps,
+    });
     // Register health check resource if using PostgreSQL
-    registerHealthResource();
+    registerHealthResource(activeServer);
     const transport = new StdioServerTransport();
-    await server.connect(transport);
+    await activeServer.connect(transport);
     logger.info("llm-cli-gateway MCP server connected and ready");
 }
 // Guard: only auto-start when run directly (not imported for testing)
 // Resolve symlinks so `llm-cli-gateway` (npm-linked bin) matches import.meta.url
-const __entryUrl = process.argv[1] ? new URL(realpathSync(process.argv[1]), "file://").href : "";
+const __entryUrl = entrypointFileURL(process.argv[1]);
 if (__entryUrl === import.meta.url) {
     main().catch(error => {
         logger.error("Fatal server error:", error);