npm - llm-cli-gateway - Versions diffs - 2.5.0 → 2.6.3 - Mend

llm-cli-gateway 2.5.0 → 2.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/CHANGELOG.md +63 -0
package/README.md +16 -18
package/dist/async-job-manager.d.ts +3 -0
package/dist/async-job-manager.js +44 -3
package/dist/cache-stats.d.ts +2 -0
package/dist/cache-stats.js +17 -4
package/dist/cli-updater.js +22 -13
package/dist/executor.d.ts +1 -0
package/dist/executor.js +7 -0
package/dist/flight-recorder.d.ts +1 -0
package/dist/flight-recorder.js +14 -2
package/dist/index.d.ts +1 -0
package/dist/index.js +116 -281
package/dist/prompt-parts.js +5 -2
package/dist/provider-codegen.d.ts +27 -0
package/dist/provider-codegen.js +335 -0
package/dist/provider-login-guidance.js +9 -9
package/dist/provider-status.js +5 -5
package/dist/request-helpers.js +2 -2
package/dist/upstream-contracts.js +70 -95
package/npm-shrinkwrap.json +2 -2
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -7,7 +7,7 @@ import { existsSync, mkdirSync, readFileSync, readdirSync, renameSync, unlinkSyn
 import { dirname, join } from "path";
 import { fileURLToPath } from "url";
 import { z } from "zod/v3";
-import { executeCli, killAllProcessGroups } from "./executor.js";
+import { executeCli, killAllProcessGroups, providerCommandName } from "./executor.js";
 import { parseStreamJson } from "./stream-json-parser.js";
 import { parseCodexJsonStream } from "./codex-json-parser.js";
 import { parseGeminiJson, parseGeminiStreamJson } from "./gemini-json-parser.js";
@@ -27,7 +27,7 @@ import { createJobStore } from "./job-store.js";
 import { ApprovalManager } from "./approval-manager.js";
 import { checkReviewIntegrity } from "./review-integrity.js";
 import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES, } from "./claude-mcp-config.js";
-import { resolveGrokSessionArgs, resolveMistralSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, prepareMistralRequest as buildMistralCliInvocation, MISTRAL_AGENT_MODES, GATEWAY_SESSION_PREFIX, resolveClaudePermissionFlags, resolveCodexSandboxFlags, CLAUDE_PERMISSION_MODES, GEMINI_APPROVAL_MODES, CODEX_SANDBOX_MODES, CODEX_ASK_FOR_APPROVAL_MODES, CLAUDE_EFFORT_LEVELS, prepareClaudeHighImpactFlags, validateClaudeAgentsMap, prepareCodexHighImpactFlags, prepareCodexForkRequest, CODEX_CONFIG_OVERRIDES_SCHEMA, prepareGeminiHighImpactFlags, prependGeminiAttachments, resolveGeminiSessionPlan, GEMINI_HIGH_IMPACT_PARAMS_SCHEMA, } from "./request-helpers.js";
+import { resolveGrokSessionArgs, resolveMistralSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, prepareMistralRequest as buildMistralCliInvocation, MISTRAL_AGENT_MODES, GATEWAY_SESSION_PREFIX, resolveClaudePermissionFlags, resolveCodexSandboxFlags, CLAUDE_PERMISSION_MODES, GEMINI_APPROVAL_MODES, CODEX_SANDBOX_MODES, CODEX_ASK_FOR_APPROVAL_MODES, CLAUDE_EFFORT_LEVELS, prepareClaudeHighImpactFlags, validateClaudeAgentsMap, prepareCodexHighImpactFlags, prepareCodexForkRequest, CODEX_CONFIG_OVERRIDES_SCHEMA, resolveGeminiSessionPlan, GEMINI_HIGH_IMPACT_PARAMS_SCHEMA, } from "./request-helpers.js";
 import { createFlightRecorder } from "./flight-recorder.js";
 import { resolvePromptInput, PromptPartsSchema, assembleClaudeCacheBlocks, } from "./prompt-parts.js";
 import { computeSessionCacheStats, computeTtlRemaining, readPersistedRequest, PERSISTED_REQUEST_DEFAULT_MAX_CHARS, } from "./cache-stats.js";
@@ -38,7 +38,8 @@ import { printDoctorJson } from "./doctor.js";
 import { createWorkspace, describeWorkspace, getWorkspace, loadWorkspaceRegistry, registerExistingWorkspace, resolveWorkspaceForProvider, validatePathInsideWorkspace, } from "./workspace-registry.js";
 import { generateSecret, hashSecret } from "./oauth.js";
 import { registerValidationTools } from "./validation-tools.js";
-import { assertUpstreamCliArgs, assertUpstreamCliEnv, buildProviderSubcommandsCompactCatalog, buildUpstreamContractReport, getCliSubcommandContract, probeInstalledCliContract, serializeCliSubcommandContract, } from "./upstream-contracts.js";
+import { assertUpstreamCliArgs, assertUpstreamCliEnv, buildProviderSubcommandsCompactCatalog, buildUpstreamContractReport, getCliSubcommandContract, probeInstalledCliContract, serializeCliSubcommandContract, UPSTREAM_CLI_CONTRACTS, } from "./upstream-contracts.js";
+import { buildArgvFromGeneration, deriveZodShapeFromGeneration, GROK_FLAG_GENERATION, GROK_GEN_OUTPUT_FORMAT, GROK_GEN_MAIN, GROK_GEN_PROMPT_FILE, GROK_GEN_SINGLE, GROK_GEN_TAIL, } from "./provider-codegen.js";
 import { entrypointFileURL } from "./entrypoint-url.js";
 const logger = {
     info: (message, ...args) => {
@@ -162,7 +163,7 @@ Other: list_models, cli_versions, upstream_contracts, provider_subcommands_* (re
 Key behaviors:
 ${deferralLine}
-- Sessions: Claude --continue, Gemini --resume, Grok --resume/--continue, Mistral --resume/--continue (current Vibe defaults session logging on; doctor flags explicit session_logging.enabled=false), Codex \`exec resume <ID>\` / \`exec resume --last\` (all real CLI continuity). For Codex, sessionId must be a real Codex UUID (from ~/.codex/sessions/); gateway-generated gw-* IDs are rejected.
+- Sessions: Claude --continue, Gemini (Antigravity) --conversation <id>/--continue, Grok --resume/--continue, Mistral --resume/--continue (current Vibe defaults session logging on; doctor flags explicit session_logging.enabled=false), Codex \`exec resume <ID>\` / \`exec resume --last\` (all real CLI continuity). For Codex, sessionId must be a real Codex UUID (from ~/.codex/sessions/); gateway-generated gw-* IDs are rejected.
 - Approval gates: opt-in via approvalStrategy:"mcp_managed".
 - Upstream drift detection: After upgrading any provider CLI (especially grok), use upstream_contracts with probeInstalled:true and provider_subcommand_drift for declared subcommand help surfaces. Probes are safe, read-only --help checks.
 - Idle timeout kills stuck processes (default 10min, configurable via idleTimeoutMs).
@@ -230,6 +231,7 @@ function getApprovalManager(runtimeLogger = logger) {
 const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
 const CLI_TYPE_ENUM = z.enum(CLI_TYPES);
 export const MAX_TURNS_SCHEMA = z.number().int().positive().safe().max(10_000);
+const GROK_GENERATED_SHAPE = deriveZodShapeFromGeneration(UPSTREAM_CLI_CONTRACTS.grok, GROK_FLAG_GENERATION);
 export const MAX_TOKENS_SCHEMA = z.number().int().positive().safe().max(100_000_000);
 export const MAX_PRICE_SCHEMA = z.number().positive().finite().min(1e-6).max(10_000);
 export const WORKTREE_SCHEMA = z
@@ -340,7 +342,7 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, f
         runtime.persistence.asyncJobsEnabled &&
         runtime.asyncJobManager.hasStore();
     if (SYNC_DEADLINE_MS === 0 || !deferralAvailable) {
-        const command = cli === "mistral" ? "vibe" : cli;
+        const command = providerCommandName(cli);
         try {
             return await executeCli(command, args, {
                 idleTimeout: idleTimeoutMs,
@@ -687,6 +689,7 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
         content: [{ type: "text", text: errorMessage }],
         isError: true,
         structuredContent: {
+            response: errorMessage,
             correlationId: correlationId || null,
             cli,
             exitCode: code,
@@ -758,6 +761,7 @@ function buildAsyncFlightRecorderHandoff(cliName, prep, sessionId, outputFormat)
             stablePrefixHash: prep.stablePrefixHash ?? undefined,
             stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
             cacheControlBlocks: prep.cacheControlBlocks,
+            cacheControlTtlSeconds: prep.cacheControlTtlSeconds,
         },
         extractUsage: (stdout) => extractUsageAndCost(cli, stdout, fmt, { sessionId: sid, home }),
     };
@@ -1102,7 +1106,18 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
     const ccEarly = params.promptParts?.cacheControl;
     const cacheControlRequestedEarly = !!(ccEarly &&
         (ccEarly.system || ccEarly.tools || ccEarly.context));
-    if (params.optimizePrompt && cacheControlRequestedEarly) {
+    const explicitCacheControlBlockCount = params.promptParts && ccEarly
+        ? (ccEarly.system && params.promptParts.system && params.promptParts.system.length > 0
+            ? 1
+            : 0) +
+            (ccEarly.tools && params.promptParts.tools && params.promptParts.tools.length > 0 ? 1 : 0) +
+            (ccEarly.context && params.promptParts.context && params.promptParts.context.length > 0
+                ? 1
+                : 0)
+        : 0;
+    const effectiveExplicitCacheControl = explicitCacheControlBlockCount > 0;
+    const cacheControlNoop = cacheControlRequestedEarly && !effectiveExplicitCacheControl;
+    if (params.optimizePrompt && effectiveExplicitCacheControl) {
         return createErrorResponse(params.operation, 1, "", corrId, new Error("optimizePrompt is incompatible with promptParts.cacheControl (slice κ): optimization rewrites the assembled prompt text the flight recorder logs, while the cache_control payload is built from raw promptParts; the two would desync and break Anthropic prefix-cache reuse. Disable optimizePrompt when opting into cacheControl."));
     }
     let effectivePrompt = assembledPrompt;
@@ -1111,7 +1126,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
         logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
         effectivePrompt = optimized;
     }
-    const requestedMcpServers = normalizeMcpServers(params.mcpServers);
+    const requestedMcpServers = params.mcpServers ? [...new Set(params.mcpServers)] : [];
     const mcpConfigResolution = resolveClaudeMcpConfig(params.operation, corrId, requestedMcpServers, params.strictMcpConfig);
     if ("errorResponse" in mcpConfigResolution) {
         return mcpConfigResolution.errorResponse;
@@ -1137,7 +1152,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
         }
     }
     let autoEmittedCacheControlBlock = null;
-    if (!cacheControlRequestedEarly &&
+    if (!effectiveExplicitCacheControl &&
         runtime.cacheAwareness.emitAnthropicCacheControl &&
         !params.optimizePrompt &&
         params.outputFormat === "stream-json" &&
@@ -1161,7 +1176,14 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
         }
     }
     const warnings = [];
-    if (!cacheControlRequestedEarly &&
+    if (cacheControlNoop) {
+        warnings.push({
+            code: "cache_control_noop",
+            message: "promptParts.cacheControl only marked empty or omitted stable parts; no cache_control breakpoint will be emitted from the explicit marker.",
+            reason: "cacheControl marker did not match a non-empty stable block",
+        });
+    }
+    if (!effectiveExplicitCacheControl &&
         autoEmittedCacheControlBlock === null &&
         params.promptParts &&
         stablePrefixTokens !== null) {
@@ -1181,9 +1203,10 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
             });
         }
     }
-    const cacheControlRequested = cacheControlRequestedEarly || autoEmittedCacheControlBlock !== null;
+    const cacheControlRequested = effectiveExplicitCacheControl || autoEmittedCacheControlBlock !== null;
     let stdinPayload;
     let cacheControlBlocks;
+    let cacheControlTtlSeconds;
     if (cacheControlRequested) {
         if (params.outputFormat !== "stream-json") {
             return createErrorResponse(params.operation, 1, "", corrId, new Error("promptParts.cacheControl requires outputFormat: 'stream-json' (slice κ pipes the cache_control blocks over --input-format stream-json; text/json output formats cannot carry the required NDJSON usage events)."));
@@ -1200,6 +1223,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
         const built = assembleClaudeCacheBlocks(effectiveParts);
         stdinPayload = `${JSON.stringify(built.payload)}\n`;
         cacheControlBlocks = built.markedBlockCount;
+        cacheControlTtlSeconds = built.markedBlockCount > 0 ? 3600 : undefined;
     }
     const args = cacheControlRequested
         ? [
@@ -1288,6 +1312,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
         stablePrefixTokens,
         stdinPayload,
         cacheControlBlocks,
+        cacheControlTtlSeconds,
         warnings: warnings.length > 0 ? warnings : undefined,
     };
 }
@@ -1320,7 +1345,7 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
         logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
         effectivePrompt = optimized;
     }
-    const requestedMcpServers = normalizeMcpServers(params.mcpServers);
+    const requestedMcpServers = params.mcpServers ? [...new Set(params.mcpServers)] : [];
     let approvalDecision = null;
     if (params.approvalStrategy === "mcp_managed") {
         approvalDecision = runtime.approvalManager.decide({
@@ -1478,7 +1503,7 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
         logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
         effectivePrompt = optimized;
     }
-    const requestedMcpServers = normalizeMcpServers(params.mcpServers);
+    const requestedMcpServers = params.mcpServers ? [...new Set(params.mcpServers)] : [];
     let approvalDecision = null;
     if (params.approvalStrategy === "mcp_managed") {
         approvalDecision = runtime.approvalManager.decide({
@@ -1498,51 +1523,45 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
         }
     }
     const effectiveApprovalMode = params.approvalStrategy === "mcp_managed" ? "yolo" : params.approvalMode;
-    const highImpact = prepareGeminiHighImpactFlags({
-        sandbox: params.sandbox,
-        policyFiles: params.policyFiles,
-        adminPolicyFiles: params.adminPolicyFiles,
-    });
-    if (highImpact.missingPolicyPath) {
-        return createErrorResponse(params.operation, 1, "", corrId, new Error(`${highImpact.missingPolicyField}: path does not exist: ${highImpact.missingPolicyPath}`));
-    }
-    if (params.attachments && params.attachments.length > 0) {
-        try {
-            effectivePrompt = prependGeminiAttachments(effectivePrompt, params.attachments);
-        }
-        catch (err) {
-            return createErrorResponse(params.operation, 1, "", corrId, err instanceof Error ? err : new Error(String(err)));
-        }
-    }
-    const args = ["-p", effectivePrompt];
-    if (resolvedModel)
-        args.push("--model", resolvedModel);
-    if (effectiveApprovalMode)
-        args.push("--approval-mode", effectiveApprovalMode);
-    if (params.yolo && effectiveApprovalMode !== "yolo") {
-        args.push("--yolo");
+    const unsupported = (field, detail) => createErrorResponse(params.operation, 1, "", corrId, new Error(`${field} is not supported by Antigravity CLI (agy): ${detail}`));
+    if (effectiveApprovalMode &&
+        effectiveApprovalMode !== "default" &&
+        effectiveApprovalMode !== "yolo") {
+        return unsupported("approvalMode", "use 'default' for prompted execution or 'yolo'/yolo=true for --dangerously-skip-permissions");
     }
     if (params.allowedTools && params.allowedTools.length > 0) {
-        sanitizeCliArgValues(params.allowedTools, "allowedTools");
-        params.allowedTools.forEach(tool => args.push("--allowed-tools", tool));
+        return unsupported("allowedTools", "agy has no non-interactive allowed-tools flag");
     }
     if (requestedMcpServers.length > 0) {
-        sanitizeCliArgValues(requestedMcpServers, "mcpServers");
-        requestedMcpServers.forEach(serverName => args.push("--allowed-mcp-server-names", serverName));
+        return unsupported("mcpServers", "agy has no non-interactive allowed MCP server allowlist flag");
     }
-    if (params.includeDirs && params.includeDirs.length > 0) {
-        sanitizeCliArgValues(params.includeDirs, "includeDirs");
-        params.includeDirs.forEach(dir => args.push("--include-directories", dir));
+    if (params.outputFormat && params.outputFormat !== "text") {
+        return unsupported("outputFormat", "agy print mode currently emits text only");
     }
-    args.push(...highImpact.args);
-    if (params.outputFormat === "json") {
-        args.push("-o", "json");
+    if (params.policyFiles && params.policyFiles.length > 0) {
+        return unsupported("policyFiles", "agy has no --policy flag");
     }
-    else if (params.outputFormat === "stream-json") {
-        args.push("-o", "stream-json");
+    if (params.adminPolicyFiles && params.adminPolicyFiles.length > 0) {
+        return unsupported("adminPolicyFiles", "agy has no --admin-policy flag");
+    }
+    if (params.attachments && params.attachments.length > 0) {
+        return unsupported("attachments", "agy has no documented @path attachment-token contract");
     }
     if (params.skipTrust) {
-        args.push("--skip-trust");
+        return unsupported("skipTrust", "agy has no --skip-trust flag");
+    }
+    const args = ["--print", effectivePrompt];
+    if (resolvedModel)
+        args.push("--model", resolvedModel);
+    if (params.includeDirs && params.includeDirs.length > 0) {
+        sanitizeCliArgValues(params.includeDirs, "includeDirs");
+        params.includeDirs.forEach(dir => args.push("--add-dir", dir));
+    }
+    if (params.sandbox) {
+        args.push("--sandbox");
+    }
+    if (params.yolo || effectiveApprovalMode === "yolo") {
+        args.push("--dangerously-skip-permissions");
     }
     return {
         corrId,
@@ -1611,76 +1630,19 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
         }
     }
     const effectiveAlwaysApprove = params.approvalStrategy === "mcp_managed" ? true : Boolean(params.alwaysApprove);
+    const grokContract = UPSTREAM_CLI_CONTRACTS.grok;
+    const genParams = params;
     const args = ["-p", effectivePrompt];
     if (resolvedModel)
         args.push("--model", resolvedModel);
-    if (params.outputFormat)
-        args.push("--output-format", params.outputFormat);
+    args.push(...buildArgvFromGeneration(grokContract, GROK_GEN_OUTPUT_FORMAT, genParams));
     if (effectiveAlwaysApprove) {
         args.push("--always-approve");
     }
     else if (params.permissionMode) {
         args.push("--permission-mode", params.permissionMode);
     }
-    if (params.effort)
-        args.push("--effort", params.effort);
-    if (params.reasoningEffort)
-        args.push("--reasoning-effort", params.reasoningEffort);
-    if (params.allowedTools && params.allowedTools.length > 0) {
-        args.push("--tools", params.allowedTools.join(","));
-    }
-    if (params.disallowedTools && params.disallowedTools.length > 0) {
-        args.push("--disallowed-tools", params.disallowedTools.join(","));
-    }
-    if (params.maxTurns !== undefined) {
-        args.push("--max-turns", String(params.maxTurns));
-    }
-    if (params.workingDir) {
-        args.push("--cwd", params.workingDir);
-    }
-    if (params.sandbox) {
-        args.push("--sandbox", params.sandbox);
-    }
-    if (params.rules) {
-        args.push("--rules", params.rules);
-    }
-    if (params.systemPromptOverride) {
-        args.push("--system-prompt-override", params.systemPromptOverride);
-    }
-    if (params.allow && params.allow.length > 0) {
-        for (const rule of params.allow) {
-            args.push("--allow", rule);
-        }
-    }
-    if (params.deny && params.deny.length > 0) {
-        for (const rule of params.deny) {
-            args.push("--deny", rule);
-        }
-    }
-    if (params.compactionMode) {
-        args.push("--compaction-mode", params.compactionMode);
-    }
-    if (params.compactionDetail) {
-        args.push("--compaction-detail", params.compactionDetail);
-    }
-    if (params.agent) {
-        args.push("--agent", params.agent);
-    }
-    if (params.bestOfN !== undefined) {
-        args.push("--best-of-n", String(params.bestOfN));
-    }
-    if (params.check) {
-        args.push("--check");
-    }
-    if (params.disableWebSearch) {
-        args.push("--disable-web-search");
-    }
-    if (params.todoGate) {
-        args.push("--todo-gate");
-    }
-    if (params.verbatim) {
-        args.push("--verbatim");
-    }
+    args.push(...buildArgvFromGeneration(grokContract, GROK_GEN_MAIN, genParams));
     if (params.agents !== undefined) {
         if (typeof params.agents === "string") {
             if (!params.agents.trim()) {
@@ -1696,9 +1658,7 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
             args.push("--agents", JSON.stringify(agentsResult.value));
         }
     }
-    if (params.promptFile) {
-        args.push("--prompt-file", params.promptFile);
-    }
+    args.push(...buildArgvFromGeneration(grokContract, GROK_GEN_PROMPT_FILE, genParams));
     if (params.promptJson !== undefined) {
         const promptJsonValue = typeof params.promptJson === "string" ? params.promptJson : JSON.stringify(params.promptJson);
         if (!promptJsonValue.trim()) {
@@ -1706,33 +1666,8 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
         }
         args.push("--prompt-json", promptJsonValue);
     }
-    if (params.single) {
-        args.push("--single", params.single);
-    }
-    if (params.experimentalMemory) {
-        args.push("--experimental-memory");
-    }
-    if (params.noAltScreen) {
-        args.push("--no-alt-screen");
-    }
-    if (params.noMemory) {
-        args.push("--no-memory");
-    }
-    if (params.noPlan) {
-        args.push("--no-plan");
-    }
-    if (params.noSubagents) {
-        args.push("--no-subagents");
-    }
-    if (params.oauth) {
-        args.push("--oauth");
-    }
-    if (params.restoreCode) {
-        args.push("--restore-code");
-    }
-    if (params.leaderSocket) {
-        args.push("--leader-socket", params.leaderSocket);
-    }
+    args.push(...buildArgvFromGeneration(grokContract, GROK_GEN_SINGLE, genParams));
+    args.push(...buildArgvFromGeneration(grokContract, GROK_GEN_TAIL, genParams));
     if (params.nativeWorktree === true) {
         args.push("--worktree");
     }
@@ -1889,6 +1824,7 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
     const response = {
         content: [{ type: "text", text: finalStdout }],
         structuredContent: {
+            response: finalStdout,
             model: prep.resolvedModel || "default",
             cli,
             correlationId: corrId,
@@ -2024,6 +1960,7 @@ function buildGrokApiToolResponse(args) {
     const response = {
         content: [{ type: "text", text }],
         structuredContent: {
+            response: text,
             provider: "grok-api",
             cli: "grok-api",
             model: args.result.model || args.prep.resolvedModel,
@@ -3468,6 +3405,7 @@ export function createGatewayServer(deps = {}) {
             stablePrefixHash: prep.stablePrefixHash ?? undefined,
             stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
             cacheControlBlocks: prep.cacheControlBlocks,
+            cacheControlTtlSeconds: prep.cacheControlTtlSeconds,
         }, runtime);
         logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prep.effectivePrompt.length}, sessionId=${effectiveSessionId}, cacheControlBlocks=${prep.cacheControlBlocks ?? 0}`);
         try {
@@ -3967,23 +3905,23 @@ export function createGatewayServer(deps = {}) {
             performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
         }
     });
-    server.tool("gemini_request", "Run a Google Gemini CLI request synchronously (when async jobs are enabled, auto-defers to a pollable job past the sync deadline; otherwise runs to completion). Requires exactly one of prompt or promptParts.", {
+    server.tool("gemini_request", "Run a Google Antigravity CLI (`agy`) request through the Gemini-compatible gateway tool synchronously (when async jobs are enabled, auto-defers to a pollable job past the sync deadline; otherwise runs to completion). Requires exactly one of prompt or promptParts.", {
         prompt: z
             .string()
             .min(1, "Prompt cannot be empty")
             .max(100000, "Prompt too long (max 100k chars)")
             .optional()
-            .describe("Prompt text for Gemini (mutually exclusive with promptParts)"),
+            .describe("Prompt text for Antigravity CLI (mutually exclusive with promptParts)"),
         promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
         model: z
             .string()
             .optional()
-            .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
+            .describe("Model name or alias passed to agy --model (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
         sessionId: z
             .string()
             .optional()
-            .describe("Gemini session ID to resume (emits --resume <id>), or 'latest' for the most recent session in this cwd"),
-        resumeLatest: z.boolean().default(false).describe("Resume latest session"),
+            .describe("Antigravity conversation ID to resume (emits --conversation <id>)"),
+        resumeLatest: z.boolean().default(false).describe("Continue the most recent conversation"),
         createNewSession: z.boolean().default(false).describe("Force new session"),
         approvalMode: z
             .enum(GEMINI_APPROVAL_MODES)
@@ -3999,13 +3937,16 @@ export function createGatewayServer(deps = {}) {
             .describe("Approval policy override"),
         mcpServers: z
             .array(MCP_SERVER_ENUM)
-            .default(["sqry"])
-            .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
+            .default([])
+            .describe("Unsupported for Antigravity CLI; non-empty values are rejected"),
         allowedTools: z
             .array(z.string())
             .optional()
-            .describe("Allowed tools (['Write','Edit','Bash'])"),
-        includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
+            .describe("Unsupported for Antigravity CLI; non-empty values are rejected"),
+        includeDirs: z
+            .array(z.string())
+            .optional()
+            .describe("Additional workspace directories passed as --add-dir"),
         correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
         optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
         optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
@@ -4023,19 +3964,19 @@ export function createGatewayServer(deps = {}) {
         outputFormat: z
             .enum(["text", "json", "stream-json"])
             .default("text")
-            .describe("Gemini output format. `json` emits `-o json` (single JSON with usageMetadata). `stream-json` emits `-o stream-json` (NDJSON event stream — `init`/`message`/`result` lines, usage extracted from the terminal `result.stats` event). Both report usage to the flight recorder."),
-        sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
-        policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
-        adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
-        attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
+            .describe("Antigravity CLI currently supports text output only through the gateway; json and stream-json are rejected."),
+        sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Antigravity in sandbox mode (--sandbox)"),
+        policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Unsupported for Antigravity CLI; non-empty values are rejected."),
+        adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Unsupported for Antigravity CLI; non-empty values are rejected."),
+        attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Unsupported for Antigravity CLI; non-empty values are rejected."),
         skipTrust: z
             .boolean()
             .default(false)
-            .describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
+            .describe("Unsupported for Antigravity CLI; true is rejected."),
         yolo: z
             .boolean()
             .optional()
-            .describe("Emit `--yolo` to auto-approve all actions. Equivalent to approvalMode 'yolo'; routed through the same approval gate. Under mcp_managed the gate still decides."),
+            .describe("Emit `--dangerously-skip-permissions` to auto-approve all actions. Routed through the same approval gate. Under mcp_managed the gate still decides."),
         workspace: WORKSPACE_ALIAS_SCHEMA.optional(),
         worktree: WORKTREE_SCHEMA.optional(),
     }, {
@@ -4083,10 +4024,7 @@ export function createGatewayServer(deps = {}) {
             .describe("Prompt text for Grok (mutually exclusive with promptParts)"),
         promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
         model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
-        outputFormat: z
-            .enum(["plain", "json", "streaming-json"])
-            .optional()
-            .describe("Output format (plain|json|streaming-json). Grok default is plain."),
+        ...GROK_GENERATED_SHAPE,
         sessionId: z
             .string()
             .optional()
@@ -4104,11 +4042,6 @@ export function createGatewayServer(deps = {}) {
             .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
             .optional()
             .describe("Grok permission mode"),
-        effort: z
-            .enum(["low", "medium", "high", "xhigh", "max"])
-            .optional()
-            .describe("Grok effort level"),
-        reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
         approvalStrategy: z
             .enum(["legacy", "mcp_managed"])
             .default("legacy")
@@ -4121,14 +4054,6 @@ export function createGatewayServer(deps = {}) {
             .array(MCP_SERVER_ENUM)
             .default(["sqry"])
             .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
-        allowedTools: z
-            .array(z.string())
-            .optional()
-            .describe("Allowed built-in tools (passed as --tools comma list)"),
-        disallowedTools: z
-            .array(z.string())
-            .optional()
-            .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
         correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
         optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
         optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
@@ -4143,107 +4068,14 @@ export function createGatewayServer(deps = {}) {
             .boolean()
             .default(false)
             .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-        maxTurns: MAX_TURNS_SCHEMA.optional().describe("Grok `--max-turns N`: cap on agent-loop iterations for cost / latency control (Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
-        workingDir: z
-            .string()
-            .min(1)
-            .optional()
-            .describe("Grok --cwd <DIR>: working directory for this invocation. Lets headless callers run Grok against a directory other than the gateway process's cwd."),
-        sandbox: z
-            .string()
-            .min(1)
-            .optional()
-            .describe("Grok --sandbox <PROFILE>: sandbox profile for filesystem and network access. Freeform per `grok --help` (no enum constraint on Grok 0.1.210); also settable via GROK_SANDBOX env var. Caller responsibility to pass a valid profile name."),
-        rules: z
-            .string()
-            .min(1)
-            .optional()
-            .describe("Grok --rules <RULES>: extra rules to append to the system prompt. Supports `@file` prefix per `grok --help` to load from a file; gateway passes the value verbatim and lets Grok parse the prefix."),
-        systemPromptOverride: z
-            .string()
-            .min(1)
-            .optional()
-            .describe("Grok --system-prompt-override <PROMPT>: replace the agent's system prompt entirely. Distinct from Claude's --system-prompt / --append-system-prompt (Grok has only one override flag, not a pair)."),
-        allow: z
-            .array(z.string())
-            .optional()
-            .describe('Grok --allow <RULE>: permission allow rules. Each entry is emitted as its own --allow instance (per `grok --help`: "Repeat to add multiple rules").'),
-        deny: z
-            .array(z.string())
-            .optional()
-            .describe('Grok --deny <RULE>: permission deny rules. Each entry is emitted as its own --deny instance (per `grok --help`: "Repeat to add multiple rules").'),
-        compactionMode: z
-            .enum(["summary", "transcript", "segments"])
-            .optional()
-            .describe("Grok --compaction-mode: summary (default; no pointer) | transcript (points at the raw transcript) | segments (persists per-segment markdown to grep). Sets GROK_COMPACTION_MODE."),
-        compactionDetail: z
-            .enum(["none", "minimal", "balanced", "verbose"])
-            .optional()
-            .describe("Grok --compaction-detail: verbatim segment detail (none|minimal|balanced|verbose, default verbose). Only affects `--compaction-mode segments`. Sets GROK_COMPACTION_DETAIL."),
-        agent: z
-            .string()
-            .min(1)
-            .optional()
-            .describe("Grok --agent <NAME>: agent name or definition file path."),
-        bestOfN: MAX_TURNS_SCHEMA.optional().describe("Grok --best-of-n <N>: run the task N ways in parallel and pick the best (headless only)."),
-        check: z
-            .boolean()
-            .optional()
-            .describe("Grok --check: append a self-verification loop to the prompt (headless only)."),
-        disableWebSearch: z
-            .boolean()
-            .optional()
-            .describe("Grok --disable-web-search: disable web search and remote retrieval tools."),
-        todoGate: z
-            .boolean()
-            .optional()
-            .describe("Grok --todo-gate: enable runtime turn-end TodoGate for this session (session-scoped, not persisted)."),
-        verbatim: z
-            .boolean()
-            .optional()
-            .describe("Grok --verbatim: send the prompt exactly as given. Also skips gateway optimizePrompt when true."),
         agents: z
             .union([z.string().min(1), z.record(z.string(), z.record(z.string(), z.unknown()))])
             .optional()
             .describe("Grok --agents <JSON>: inline subagent definitions (JSON string or name → { description, prompt, … } map)."),
-        promptFile: z
-            .string()
-            .min(1)
-            .optional()
-            .describe("Grok --prompt-file <PATH>: single-turn prompt loaded from a file."),
         promptJson: z
             .union([z.string(), z.array(z.unknown()), z.record(z.string(), z.unknown())])
             .optional()
             .describe("Grok --prompt-json <JSON>: single-turn prompt JSON blocks (string or serializable value)."),
-        single: z
-            .string()
-            .min(1)
-            .optional()
-            .describe("Grok --single <PROMPT>: single-turn prompt (in addition to gateway -p)."),
-        experimentalMemory: z
-            .boolean()
-            .optional()
-            .describe("Grok --experimental-memory: enable cross-session memory."),
-        noAltScreen: z
-            .boolean()
-            .optional()
-            .describe("Grok --no-alt-screen: run inline without alt screen."),
-        noMemory: z.boolean().optional().describe("Grok --no-memory: disable cross-session memory."),
-        noPlan: z.boolean().optional().describe("Grok --no-plan: disable plan mode."),
-        noSubagents: z
-            .boolean()
-            .optional()
-            .describe("Grok --no-subagents: disable subagent spawning."),
-        oauth: z.boolean().optional().describe("Grok --oauth: use OAuth during authentication."),
-        restoreCode: z
-            .boolean()
-            .optional()
-            .describe("Grok --restore-code: check out the original session commit when resuming."),
-        leaderSocket: z
-            .string()
-            .min(1)
-            .optional()
-            .describe("Grok 0.2.32+ --leader-socket <PATH>: custom leader socket path (default ~/.grok/leader.sock). Targets an isolated leader process, e.g. a local/branch Grok build; name it ~/.grok/leader-*.sock to keep `grok leader list/kill` discovery working."),
         nativeWorktree: z
             .union([z.boolean(), z.string().min(1)])
             .optional()
@@ -4835,23 +4667,23 @@ export function createGatewayServer(deps = {}) {
                 worktree,
             });
         });
-        server.tool("gemini_request_async", "Start a Google Gemini CLI request as a durable background job. Poll with llm_job_status, collect with llm_job_result.", {
+        server.tool("gemini_request_async", "Start a Google Antigravity CLI (`agy`) request as a durable background job through the Gemini-compatible gateway tool. Poll with llm_job_status, collect with llm_job_result.", {
             prompt: z
                 .string()
                 .min(1, "Prompt cannot be empty")
                 .max(100000, "Prompt too long (max 100k chars)")
                 .optional()
-                .describe("Prompt text for Gemini (mutually exclusive with promptParts)"),
+                .describe("Prompt text for Antigravity CLI (mutually exclusive with promptParts)"),
             promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
             model: z
                 .string()
                 .optional()
-                .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
+                .describe("Model name or alias passed to agy --model (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
             sessionId: z
                 .string()
                 .optional()
-                .describe("Gemini session ID to resume (emits --resume <id>), or 'latest' for the most recent session in this cwd"),
-            resumeLatest: z.boolean().default(false).describe("Resume latest session"),
+                .describe("Antigravity conversation ID to resume (emits --conversation <id>)"),
+            resumeLatest: z.boolean().default(false).describe("Continue the most recent conversation"),
             createNewSession: z.boolean().default(false).describe("Force new session"),
             approvalMode: z
                 .enum(GEMINI_APPROVAL_MODES)
@@ -4867,13 +4699,16 @@ export function createGatewayServer(deps = {}) {
                 .describe("Approval policy override"),
             mcpServers: z
                 .array(MCP_SERVER_ENUM)
-                .default(["sqry"])
-                .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
+                .default([])
+                .describe("Unsupported for Antigravity CLI; non-empty values are rejected"),
             allowedTools: z
                 .array(z.string())
                 .optional()
-                .describe("Allowed tools (['Write','Edit','Bash'])"),
-            includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
+                .describe("Unsupported for Antigravity CLI; non-empty values are rejected"),
+            includeDirs: z
+                .array(z.string())
+                .optional()
+                .describe("Additional workspace directories passed as --add-dir"),
             correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
             optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
             idleTimeoutMs: z
@@ -4890,19 +4725,19 @@ export function createGatewayServer(deps = {}) {
             outputFormat: z
                 .enum(["text", "json", "stream-json"])
                 .default("text")
-                .describe("Gemini output format. `json` emits `-o json` (single JSON with usageMetadata). `stream-json` emits `-o stream-json` (NDJSON event stream — `init`/`message`/`result` lines, usage extracted from the terminal `result.stats` event). Both report usage to the flight recorder."),
-            sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
-            policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
-            adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
-            attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
+                .describe("Antigravity CLI currently supports text output only through the gateway; json and stream-json are rejected."),
+            sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Antigravity in sandbox mode (--sandbox)"),
+            policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Unsupported for Antigravity CLI; non-empty values are rejected."),
+            adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Unsupported for Antigravity CLI; non-empty values are rejected."),
+            attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Unsupported for Antigravity CLI; non-empty values are rejected."),
             skipTrust: z
                 .boolean()
                 .default(false)
-                .describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
+                .describe("Unsupported for Antigravity CLI; true is rejected."),
             yolo: z
                 .boolean()
                 .optional()
-                .describe("Emit `--yolo` to auto-approve all actions. Equivalent to approvalMode 'yolo'; routed through the same approval gate. Under mcp_managed the gate still decides."),
+                .describe("Emit `--dangerously-skip-permissions` to auto-approve all actions. Routed through the same approval gate. Under mcp_managed the gate still decides."),
             workspace: WORKSPACE_ALIAS_SCHEMA.optional(),
             worktree: WORKTREE_SCHEMA.optional(),
         }, {