llm-cli-gateway 2.5.0 → 2.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -7,7 +7,7 @@ import { existsSync, mkdirSync, readFileSync, readdirSync, renameSync, unlinkSyn
7
7
  import { dirname, join } from "path";
8
8
  import { fileURLToPath } from "url";
9
9
  import { z } from "zod/v3";
10
- import { executeCli, killAllProcessGroups } from "./executor.js";
10
+ import { executeCli, killAllProcessGroups, providerCommandName } from "./executor.js";
11
11
  import { parseStreamJson } from "./stream-json-parser.js";
12
12
  import { parseCodexJsonStream } from "./codex-json-parser.js";
13
13
  import { parseGeminiJson, parseGeminiStreamJson } from "./gemini-json-parser.js";
@@ -27,7 +27,7 @@ import { createJobStore } from "./job-store.js";
27
27
  import { ApprovalManager } from "./approval-manager.js";
28
28
  import { checkReviewIntegrity } from "./review-integrity.js";
29
29
  import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES, } from "./claude-mcp-config.js";
30
- import { resolveGrokSessionArgs, resolveMistralSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, prepareMistralRequest as buildMistralCliInvocation, MISTRAL_AGENT_MODES, GATEWAY_SESSION_PREFIX, resolveClaudePermissionFlags, resolveCodexSandboxFlags, CLAUDE_PERMISSION_MODES, GEMINI_APPROVAL_MODES, CODEX_SANDBOX_MODES, CODEX_ASK_FOR_APPROVAL_MODES, CLAUDE_EFFORT_LEVELS, prepareClaudeHighImpactFlags, validateClaudeAgentsMap, prepareCodexHighImpactFlags, prepareCodexForkRequest, CODEX_CONFIG_OVERRIDES_SCHEMA, prepareGeminiHighImpactFlags, prependGeminiAttachments, resolveGeminiSessionPlan, GEMINI_HIGH_IMPACT_PARAMS_SCHEMA, } from "./request-helpers.js";
30
+ import { resolveGrokSessionArgs, resolveMistralSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, prepareMistralRequest as buildMistralCliInvocation, MISTRAL_AGENT_MODES, GATEWAY_SESSION_PREFIX, resolveClaudePermissionFlags, resolveCodexSandboxFlags, CLAUDE_PERMISSION_MODES, GEMINI_APPROVAL_MODES, CODEX_SANDBOX_MODES, CODEX_ASK_FOR_APPROVAL_MODES, CLAUDE_EFFORT_LEVELS, prepareClaudeHighImpactFlags, validateClaudeAgentsMap, prepareCodexHighImpactFlags, prepareCodexForkRequest, CODEX_CONFIG_OVERRIDES_SCHEMA, resolveGeminiSessionPlan, GEMINI_HIGH_IMPACT_PARAMS_SCHEMA, } from "./request-helpers.js";
31
31
  import { createFlightRecorder } from "./flight-recorder.js";
32
32
  import { resolvePromptInput, PromptPartsSchema, assembleClaudeCacheBlocks, } from "./prompt-parts.js";
33
33
  import { computeSessionCacheStats, computeTtlRemaining, readPersistedRequest, PERSISTED_REQUEST_DEFAULT_MAX_CHARS, } from "./cache-stats.js";
@@ -38,7 +38,8 @@ import { printDoctorJson } from "./doctor.js";
38
38
  import { createWorkspace, describeWorkspace, getWorkspace, loadWorkspaceRegistry, registerExistingWorkspace, resolveWorkspaceForProvider, validatePathInsideWorkspace, } from "./workspace-registry.js";
39
39
  import { generateSecret, hashSecret } from "./oauth.js";
40
40
  import { registerValidationTools } from "./validation-tools.js";
41
- import { assertUpstreamCliArgs, assertUpstreamCliEnv, buildProviderSubcommandsCompactCatalog, buildUpstreamContractReport, getCliSubcommandContract, probeInstalledCliContract, serializeCliSubcommandContract, } from "./upstream-contracts.js";
41
+ import { assertUpstreamCliArgs, assertUpstreamCliEnv, buildProviderSubcommandsCompactCatalog, buildUpstreamContractReport, getCliSubcommandContract, probeInstalledCliContract, serializeCliSubcommandContract, UPSTREAM_CLI_CONTRACTS, } from "./upstream-contracts.js";
42
+ import { buildArgvFromGeneration, deriveZodShapeFromGeneration, GROK_FLAG_GENERATION, GROK_GEN_OUTPUT_FORMAT, GROK_GEN_MAIN, GROK_GEN_PROMPT_FILE, GROK_GEN_SINGLE, GROK_GEN_TAIL, } from "./provider-codegen.js";
42
43
  import { entrypointFileURL } from "./entrypoint-url.js";
43
44
  const logger = {
44
45
  info: (message, ...args) => {
@@ -162,7 +163,7 @@ Other: list_models, cli_versions, upstream_contracts, provider_subcommands_* (re
162
163
 
163
164
  Key behaviors:
164
165
  ${deferralLine}
165
- - Sessions: Claude --continue, Gemini --resume, Grok --resume/--continue, Mistral --resume/--continue (current Vibe defaults session logging on; doctor flags explicit session_logging.enabled=false), Codex \`exec resume <ID>\` / \`exec resume --last\` (all real CLI continuity). For Codex, sessionId must be a real Codex UUID (from ~/.codex/sessions/); gateway-generated gw-* IDs are rejected.
166
+ - Sessions: Claude --continue, Gemini (Antigravity) --conversation <id>/--continue, Grok --resume/--continue, Mistral --resume/--continue (current Vibe defaults session logging on; doctor flags explicit session_logging.enabled=false), Codex \`exec resume <ID>\` / \`exec resume --last\` (all real CLI continuity). For Codex, sessionId must be a real Codex UUID (from ~/.codex/sessions/); gateway-generated gw-* IDs are rejected.
166
167
  - Approval gates: opt-in via approvalStrategy:"mcp_managed".
167
168
  - Upstream drift detection: After upgrading any provider CLI (especially grok), use upstream_contracts with probeInstalled:true and provider_subcommand_drift for declared subcommand help surfaces. Probes are safe, read-only --help checks.
168
169
  - Idle timeout kills stuck processes (default 10min, configurable via idleTimeoutMs).
@@ -230,6 +231,7 @@ function getApprovalManager(runtimeLogger = logger) {
230
231
  const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
231
232
  const CLI_TYPE_ENUM = z.enum(CLI_TYPES);
232
233
  export const MAX_TURNS_SCHEMA = z.number().int().positive().safe().max(10_000);
234
+ const GROK_GENERATED_SHAPE = deriveZodShapeFromGeneration(UPSTREAM_CLI_CONTRACTS.grok, GROK_FLAG_GENERATION);
233
235
  export const MAX_TOKENS_SCHEMA = z.number().int().positive().safe().max(100_000_000);
234
236
  export const MAX_PRICE_SCHEMA = z.number().positive().finite().min(1e-6).max(10_000);
235
237
  export const WORKTREE_SCHEMA = z
@@ -340,7 +342,7 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, f
340
342
  runtime.persistence.asyncJobsEnabled &&
341
343
  runtime.asyncJobManager.hasStore();
342
344
  if (SYNC_DEADLINE_MS === 0 || !deferralAvailable) {
343
- const command = cli === "mistral" ? "vibe" : cli;
345
+ const command = providerCommandName(cli);
344
346
  try {
345
347
  return await executeCli(command, args, {
346
348
  idleTimeout: idleTimeoutMs,
@@ -687,6 +689,7 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
687
689
  content: [{ type: "text", text: errorMessage }],
688
690
  isError: true,
689
691
  structuredContent: {
692
+ response: errorMessage,
690
693
  correlationId: correlationId || null,
691
694
  cli,
692
695
  exitCode: code,
@@ -758,6 +761,7 @@ function buildAsyncFlightRecorderHandoff(cliName, prep, sessionId, outputFormat)
758
761
  stablePrefixHash: prep.stablePrefixHash ?? undefined,
759
762
  stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
760
763
  cacheControlBlocks: prep.cacheControlBlocks,
764
+ cacheControlTtlSeconds: prep.cacheControlTtlSeconds,
761
765
  },
762
766
  extractUsage: (stdout) => extractUsageAndCost(cli, stdout, fmt, { sessionId: sid, home }),
763
767
  };
@@ -1102,7 +1106,18 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1102
1106
  const ccEarly = params.promptParts?.cacheControl;
1103
1107
  const cacheControlRequestedEarly = !!(ccEarly &&
1104
1108
  (ccEarly.system || ccEarly.tools || ccEarly.context));
1105
- if (params.optimizePrompt && cacheControlRequestedEarly) {
1109
+ const explicitCacheControlBlockCount = params.promptParts && ccEarly
1110
+ ? (ccEarly.system && params.promptParts.system && params.promptParts.system.length > 0
1111
+ ? 1
1112
+ : 0) +
1113
+ (ccEarly.tools && params.promptParts.tools && params.promptParts.tools.length > 0 ? 1 : 0) +
1114
+ (ccEarly.context && params.promptParts.context && params.promptParts.context.length > 0
1115
+ ? 1
1116
+ : 0)
1117
+ : 0;
1118
+ const effectiveExplicitCacheControl = explicitCacheControlBlockCount > 0;
1119
+ const cacheControlNoop = cacheControlRequestedEarly && !effectiveExplicitCacheControl;
1120
+ if (params.optimizePrompt && effectiveExplicitCacheControl) {
1106
1121
  return createErrorResponse(params.operation, 1, "", corrId, new Error("optimizePrompt is incompatible with promptParts.cacheControl (slice κ): optimization rewrites the assembled prompt text the flight recorder logs, while the cache_control payload is built from raw promptParts; the two would desync and break Anthropic prefix-cache reuse. Disable optimizePrompt when opting into cacheControl."));
1107
1122
  }
1108
1123
  let effectivePrompt = assembledPrompt;
@@ -1111,7 +1126,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1111
1126
  logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
1112
1127
  effectivePrompt = optimized;
1113
1128
  }
1114
- const requestedMcpServers = normalizeMcpServers(params.mcpServers);
1129
+ const requestedMcpServers = params.mcpServers ? [...new Set(params.mcpServers)] : [];
1115
1130
  const mcpConfigResolution = resolveClaudeMcpConfig(params.operation, corrId, requestedMcpServers, params.strictMcpConfig);
1116
1131
  if ("errorResponse" in mcpConfigResolution) {
1117
1132
  return mcpConfigResolution.errorResponse;
@@ -1137,7 +1152,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1137
1152
  }
1138
1153
  }
1139
1154
  let autoEmittedCacheControlBlock = null;
1140
- if (!cacheControlRequestedEarly &&
1155
+ if (!effectiveExplicitCacheControl &&
1141
1156
  runtime.cacheAwareness.emitAnthropicCacheControl &&
1142
1157
  !params.optimizePrompt &&
1143
1158
  params.outputFormat === "stream-json" &&
@@ -1161,7 +1176,14 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1161
1176
  }
1162
1177
  }
1163
1178
  const warnings = [];
1164
- if (!cacheControlRequestedEarly &&
1179
+ if (cacheControlNoop) {
1180
+ warnings.push({
1181
+ code: "cache_control_noop",
1182
+ message: "promptParts.cacheControl only marked empty or omitted stable parts; no cache_control breakpoint will be emitted from the explicit marker.",
1183
+ reason: "cacheControl marker did not match a non-empty stable block",
1184
+ });
1185
+ }
1186
+ if (!effectiveExplicitCacheControl &&
1165
1187
  autoEmittedCacheControlBlock === null &&
1166
1188
  params.promptParts &&
1167
1189
  stablePrefixTokens !== null) {
@@ -1181,9 +1203,10 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1181
1203
  });
1182
1204
  }
1183
1205
  }
1184
- const cacheControlRequested = cacheControlRequestedEarly || autoEmittedCacheControlBlock !== null;
1206
+ const cacheControlRequested = effectiveExplicitCacheControl || autoEmittedCacheControlBlock !== null;
1185
1207
  let stdinPayload;
1186
1208
  let cacheControlBlocks;
1209
+ let cacheControlTtlSeconds;
1187
1210
  if (cacheControlRequested) {
1188
1211
  if (params.outputFormat !== "stream-json") {
1189
1212
  return createErrorResponse(params.operation, 1, "", corrId, new Error("promptParts.cacheControl requires outputFormat: 'stream-json' (slice κ pipes the cache_control blocks over --input-format stream-json; text/json output formats cannot carry the required NDJSON usage events)."));
@@ -1200,6 +1223,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1200
1223
  const built = assembleClaudeCacheBlocks(effectiveParts);
1201
1224
  stdinPayload = `${JSON.stringify(built.payload)}\n`;
1202
1225
  cacheControlBlocks = built.markedBlockCount;
1226
+ cacheControlTtlSeconds = built.markedBlockCount > 0 ? 3600 : undefined;
1203
1227
  }
1204
1228
  const args = cacheControlRequested
1205
1229
  ? [
@@ -1288,6 +1312,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1288
1312
  stablePrefixTokens,
1289
1313
  stdinPayload,
1290
1314
  cacheControlBlocks,
1315
+ cacheControlTtlSeconds,
1291
1316
  warnings: warnings.length > 0 ? warnings : undefined,
1292
1317
  };
1293
1318
  }
@@ -1320,7 +1345,7 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
1320
1345
  logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
1321
1346
  effectivePrompt = optimized;
1322
1347
  }
1323
- const requestedMcpServers = normalizeMcpServers(params.mcpServers);
1348
+ const requestedMcpServers = params.mcpServers ? [...new Set(params.mcpServers)] : [];
1324
1349
  let approvalDecision = null;
1325
1350
  if (params.approvalStrategy === "mcp_managed") {
1326
1351
  approvalDecision = runtime.approvalManager.decide({
@@ -1478,7 +1503,7 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
1478
1503
  logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
1479
1504
  effectivePrompt = optimized;
1480
1505
  }
1481
- const requestedMcpServers = normalizeMcpServers(params.mcpServers);
1506
+ const requestedMcpServers = params.mcpServers ? [...new Set(params.mcpServers)] : [];
1482
1507
  let approvalDecision = null;
1483
1508
  if (params.approvalStrategy === "mcp_managed") {
1484
1509
  approvalDecision = runtime.approvalManager.decide({
@@ -1498,51 +1523,45 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
1498
1523
  }
1499
1524
  }
1500
1525
  const effectiveApprovalMode = params.approvalStrategy === "mcp_managed" ? "yolo" : params.approvalMode;
1501
- const highImpact = prepareGeminiHighImpactFlags({
1502
- sandbox: params.sandbox,
1503
- policyFiles: params.policyFiles,
1504
- adminPolicyFiles: params.adminPolicyFiles,
1505
- });
1506
- if (highImpact.missingPolicyPath) {
1507
- return createErrorResponse(params.operation, 1, "", corrId, new Error(`${highImpact.missingPolicyField}: path does not exist: ${highImpact.missingPolicyPath}`));
1508
- }
1509
- if (params.attachments && params.attachments.length > 0) {
1510
- try {
1511
- effectivePrompt = prependGeminiAttachments(effectivePrompt, params.attachments);
1512
- }
1513
- catch (err) {
1514
- return createErrorResponse(params.operation, 1, "", corrId, err instanceof Error ? err : new Error(String(err)));
1515
- }
1516
- }
1517
- const args = ["-p", effectivePrompt];
1518
- if (resolvedModel)
1519
- args.push("--model", resolvedModel);
1520
- if (effectiveApprovalMode)
1521
- args.push("--approval-mode", effectiveApprovalMode);
1522
- if (params.yolo && effectiveApprovalMode !== "yolo") {
1523
- args.push("--yolo");
1526
+ const unsupported = (field, detail) => createErrorResponse(params.operation, 1, "", corrId, new Error(`${field} is not supported by Antigravity CLI (agy): ${detail}`));
1527
+ if (effectiveApprovalMode &&
1528
+ effectiveApprovalMode !== "default" &&
1529
+ effectiveApprovalMode !== "yolo") {
1530
+ return unsupported("approvalMode", "use 'default' for prompted execution or 'yolo'/yolo=true for --dangerously-skip-permissions");
1524
1531
  }
1525
1532
  if (params.allowedTools && params.allowedTools.length > 0) {
1526
- sanitizeCliArgValues(params.allowedTools, "allowedTools");
1527
- params.allowedTools.forEach(tool => args.push("--allowed-tools", tool));
1533
+ return unsupported("allowedTools", "agy has no non-interactive allowed-tools flag");
1528
1534
  }
1529
1535
  if (requestedMcpServers.length > 0) {
1530
- sanitizeCliArgValues(requestedMcpServers, "mcpServers");
1531
- requestedMcpServers.forEach(serverName => args.push("--allowed-mcp-server-names", serverName));
1536
+ return unsupported("mcpServers", "agy has no non-interactive allowed MCP server allowlist flag");
1532
1537
  }
1533
- if (params.includeDirs && params.includeDirs.length > 0) {
1534
- sanitizeCliArgValues(params.includeDirs, "includeDirs");
1535
- params.includeDirs.forEach(dir => args.push("--include-directories", dir));
1538
+ if (params.outputFormat && params.outputFormat !== "text") {
1539
+ return unsupported("outputFormat", "agy print mode currently emits text only");
1536
1540
  }
1537
- args.push(...highImpact.args);
1538
- if (params.outputFormat === "json") {
1539
- args.push("-o", "json");
1541
+ if (params.policyFiles && params.policyFiles.length > 0) {
1542
+ return unsupported("policyFiles", "agy has no --policy flag");
1540
1543
  }
1541
- else if (params.outputFormat === "stream-json") {
1542
- args.push("-o", "stream-json");
1544
+ if (params.adminPolicyFiles && params.adminPolicyFiles.length > 0) {
1545
+ return unsupported("adminPolicyFiles", "agy has no --admin-policy flag");
1546
+ }
1547
+ if (params.attachments && params.attachments.length > 0) {
1548
+ return unsupported("attachments", "agy has no documented @path attachment-token contract");
1543
1549
  }
1544
1550
  if (params.skipTrust) {
1545
- args.push("--skip-trust");
1551
+ return unsupported("skipTrust", "agy has no --skip-trust flag");
1552
+ }
1553
+ const args = ["--print", effectivePrompt];
1554
+ if (resolvedModel)
1555
+ args.push("--model", resolvedModel);
1556
+ if (params.includeDirs && params.includeDirs.length > 0) {
1557
+ sanitizeCliArgValues(params.includeDirs, "includeDirs");
1558
+ params.includeDirs.forEach(dir => args.push("--add-dir", dir));
1559
+ }
1560
+ if (params.sandbox) {
1561
+ args.push("--sandbox");
1562
+ }
1563
+ if (params.yolo || effectiveApprovalMode === "yolo") {
1564
+ args.push("--dangerously-skip-permissions");
1546
1565
  }
1547
1566
  return {
1548
1567
  corrId,
@@ -1611,76 +1630,19 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
1611
1630
  }
1612
1631
  }
1613
1632
  const effectiveAlwaysApprove = params.approvalStrategy === "mcp_managed" ? true : Boolean(params.alwaysApprove);
1633
+ const grokContract = UPSTREAM_CLI_CONTRACTS.grok;
1634
+ const genParams = params;
1614
1635
  const args = ["-p", effectivePrompt];
1615
1636
  if (resolvedModel)
1616
1637
  args.push("--model", resolvedModel);
1617
- if (params.outputFormat)
1618
- args.push("--output-format", params.outputFormat);
1638
+ args.push(...buildArgvFromGeneration(grokContract, GROK_GEN_OUTPUT_FORMAT, genParams));
1619
1639
  if (effectiveAlwaysApprove) {
1620
1640
  args.push("--always-approve");
1621
1641
  }
1622
1642
  else if (params.permissionMode) {
1623
1643
  args.push("--permission-mode", params.permissionMode);
1624
1644
  }
1625
- if (params.effort)
1626
- args.push("--effort", params.effort);
1627
- if (params.reasoningEffort)
1628
- args.push("--reasoning-effort", params.reasoningEffort);
1629
- if (params.allowedTools && params.allowedTools.length > 0) {
1630
- args.push("--tools", params.allowedTools.join(","));
1631
- }
1632
- if (params.disallowedTools && params.disallowedTools.length > 0) {
1633
- args.push("--disallowed-tools", params.disallowedTools.join(","));
1634
- }
1635
- if (params.maxTurns !== undefined) {
1636
- args.push("--max-turns", String(params.maxTurns));
1637
- }
1638
- if (params.workingDir) {
1639
- args.push("--cwd", params.workingDir);
1640
- }
1641
- if (params.sandbox) {
1642
- args.push("--sandbox", params.sandbox);
1643
- }
1644
- if (params.rules) {
1645
- args.push("--rules", params.rules);
1646
- }
1647
- if (params.systemPromptOverride) {
1648
- args.push("--system-prompt-override", params.systemPromptOverride);
1649
- }
1650
- if (params.allow && params.allow.length > 0) {
1651
- for (const rule of params.allow) {
1652
- args.push("--allow", rule);
1653
- }
1654
- }
1655
- if (params.deny && params.deny.length > 0) {
1656
- for (const rule of params.deny) {
1657
- args.push("--deny", rule);
1658
- }
1659
- }
1660
- if (params.compactionMode) {
1661
- args.push("--compaction-mode", params.compactionMode);
1662
- }
1663
- if (params.compactionDetail) {
1664
- args.push("--compaction-detail", params.compactionDetail);
1665
- }
1666
- if (params.agent) {
1667
- args.push("--agent", params.agent);
1668
- }
1669
- if (params.bestOfN !== undefined) {
1670
- args.push("--best-of-n", String(params.bestOfN));
1671
- }
1672
- if (params.check) {
1673
- args.push("--check");
1674
- }
1675
- if (params.disableWebSearch) {
1676
- args.push("--disable-web-search");
1677
- }
1678
- if (params.todoGate) {
1679
- args.push("--todo-gate");
1680
- }
1681
- if (params.verbatim) {
1682
- args.push("--verbatim");
1683
- }
1645
+ args.push(...buildArgvFromGeneration(grokContract, GROK_GEN_MAIN, genParams));
1684
1646
  if (params.agents !== undefined) {
1685
1647
  if (typeof params.agents === "string") {
1686
1648
  if (!params.agents.trim()) {
@@ -1696,9 +1658,7 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
1696
1658
  args.push("--agents", JSON.stringify(agentsResult.value));
1697
1659
  }
1698
1660
  }
1699
- if (params.promptFile) {
1700
- args.push("--prompt-file", params.promptFile);
1701
- }
1661
+ args.push(...buildArgvFromGeneration(grokContract, GROK_GEN_PROMPT_FILE, genParams));
1702
1662
  if (params.promptJson !== undefined) {
1703
1663
  const promptJsonValue = typeof params.promptJson === "string" ? params.promptJson : JSON.stringify(params.promptJson);
1704
1664
  if (!promptJsonValue.trim()) {
@@ -1706,33 +1666,8 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
1706
1666
  }
1707
1667
  args.push("--prompt-json", promptJsonValue);
1708
1668
  }
1709
- if (params.single) {
1710
- args.push("--single", params.single);
1711
- }
1712
- if (params.experimentalMemory) {
1713
- args.push("--experimental-memory");
1714
- }
1715
- if (params.noAltScreen) {
1716
- args.push("--no-alt-screen");
1717
- }
1718
- if (params.noMemory) {
1719
- args.push("--no-memory");
1720
- }
1721
- if (params.noPlan) {
1722
- args.push("--no-plan");
1723
- }
1724
- if (params.noSubagents) {
1725
- args.push("--no-subagents");
1726
- }
1727
- if (params.oauth) {
1728
- args.push("--oauth");
1729
- }
1730
- if (params.restoreCode) {
1731
- args.push("--restore-code");
1732
- }
1733
- if (params.leaderSocket) {
1734
- args.push("--leader-socket", params.leaderSocket);
1735
- }
1669
+ args.push(...buildArgvFromGeneration(grokContract, GROK_GEN_SINGLE, genParams));
1670
+ args.push(...buildArgvFromGeneration(grokContract, GROK_GEN_TAIL, genParams));
1736
1671
  if (params.nativeWorktree === true) {
1737
1672
  args.push("--worktree");
1738
1673
  }
@@ -1889,6 +1824,7 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
1889
1824
  const response = {
1890
1825
  content: [{ type: "text", text: finalStdout }],
1891
1826
  structuredContent: {
1827
+ response: finalStdout,
1892
1828
  model: prep.resolvedModel || "default",
1893
1829
  cli,
1894
1830
  correlationId: corrId,
@@ -2024,6 +1960,7 @@ function buildGrokApiToolResponse(args) {
2024
1960
  const response = {
2025
1961
  content: [{ type: "text", text }],
2026
1962
  structuredContent: {
1963
+ response: text,
2027
1964
  provider: "grok-api",
2028
1965
  cli: "grok-api",
2029
1966
  model: args.result.model || args.prep.resolvedModel,
@@ -3468,6 +3405,7 @@ export function createGatewayServer(deps = {}) {
3468
3405
  stablePrefixHash: prep.stablePrefixHash ?? undefined,
3469
3406
  stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
3470
3407
  cacheControlBlocks: prep.cacheControlBlocks,
3408
+ cacheControlTtlSeconds: prep.cacheControlTtlSeconds,
3471
3409
  }, runtime);
3472
3410
  logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prep.effectivePrompt.length}, sessionId=${effectiveSessionId}, cacheControlBlocks=${prep.cacheControlBlocks ?? 0}`);
3473
3411
  try {
@@ -3967,23 +3905,23 @@ export function createGatewayServer(deps = {}) {
3967
3905
  performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
3968
3906
  }
3969
3907
  });
3970
- server.tool("gemini_request", "Run a Google Gemini CLI request synchronously (when async jobs are enabled, auto-defers to a pollable job past the sync deadline; otherwise runs to completion). Requires exactly one of prompt or promptParts.", {
3908
+ server.tool("gemini_request", "Run a Google Antigravity CLI (`agy`) request through the Gemini-compatible gateway tool synchronously (when async jobs are enabled, auto-defers to a pollable job past the sync deadline; otherwise runs to completion). Requires exactly one of prompt or promptParts.", {
3971
3909
  prompt: z
3972
3910
  .string()
3973
3911
  .min(1, "Prompt cannot be empty")
3974
3912
  .max(100000, "Prompt too long (max 100k chars)")
3975
3913
  .optional()
3976
- .describe("Prompt text for Gemini (mutually exclusive with promptParts)"),
3914
+ .describe("Prompt text for Antigravity CLI (mutually exclusive with promptParts)"),
3977
3915
  promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
3978
3916
  model: z
3979
3917
  .string()
3980
3918
  .optional()
3981
- .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
3919
+ .describe("Model name or alias passed to agy --model (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
3982
3920
  sessionId: z
3983
3921
  .string()
3984
3922
  .optional()
3985
- .describe("Gemini session ID to resume (emits --resume <id>), or 'latest' for the most recent session in this cwd"),
3986
- resumeLatest: z.boolean().default(false).describe("Resume latest session"),
3923
+ .describe("Antigravity conversation ID to resume (emits --conversation <id>)"),
3924
+ resumeLatest: z.boolean().default(false).describe("Continue the most recent conversation"),
3987
3925
  createNewSession: z.boolean().default(false).describe("Force new session"),
3988
3926
  approvalMode: z
3989
3927
  .enum(GEMINI_APPROVAL_MODES)
@@ -3999,13 +3937,16 @@ export function createGatewayServer(deps = {}) {
3999
3937
  .describe("Approval policy override"),
4000
3938
  mcpServers: z
4001
3939
  .array(MCP_SERVER_ENUM)
4002
- .default(["sqry"])
4003
- .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
3940
+ .default([])
3941
+ .describe("Unsupported for Antigravity CLI; non-empty values are rejected"),
4004
3942
  allowedTools: z
4005
3943
  .array(z.string())
4006
3944
  .optional()
4007
- .describe("Allowed tools (['Write','Edit','Bash'])"),
4008
- includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
3945
+ .describe("Unsupported for Antigravity CLI; non-empty values are rejected"),
3946
+ includeDirs: z
3947
+ .array(z.string())
3948
+ .optional()
3949
+ .describe("Additional workspace directories passed as --add-dir"),
4009
3950
  correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
4010
3951
  optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
4011
3952
  optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
@@ -4023,19 +3964,19 @@ export function createGatewayServer(deps = {}) {
4023
3964
  outputFormat: z
4024
3965
  .enum(["text", "json", "stream-json"])
4025
3966
  .default("text")
4026
- .describe("Gemini output format. `json` emits `-o json` (single JSON with usageMetadata). `stream-json` emits `-o stream-json` (NDJSON event stream — `init`/`message`/`result` lines, usage extracted from the terminal `result.stats` event). Both report usage to the flight recorder."),
4027
- sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
4028
- policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
4029
- adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
4030
- attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
3967
+ .describe("Antigravity CLI currently supports text output only through the gateway; json and stream-json are rejected."),
3968
+ sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Antigravity in sandbox mode (--sandbox)"),
3969
+ policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Unsupported for Antigravity CLI; non-empty values are rejected."),
3970
+ adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Unsupported for Antigravity CLI; non-empty values are rejected."),
3971
+ attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Unsupported for Antigravity CLI; non-empty values are rejected."),
4031
3972
  skipTrust: z
4032
3973
  .boolean()
4033
3974
  .default(false)
4034
- .describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
3975
+ .describe("Unsupported for Antigravity CLI; true is rejected."),
4035
3976
  yolo: z
4036
3977
  .boolean()
4037
3978
  .optional()
4038
- .describe("Emit `--yolo` to auto-approve all actions. Equivalent to approvalMode 'yolo'; routed through the same approval gate. Under mcp_managed the gate still decides."),
3979
+ .describe("Emit `--dangerously-skip-permissions` to auto-approve all actions. Routed through the same approval gate. Under mcp_managed the gate still decides."),
4039
3980
  workspace: WORKSPACE_ALIAS_SCHEMA.optional(),
4040
3981
  worktree: WORKTREE_SCHEMA.optional(),
4041
3982
  }, {
@@ -4083,10 +4024,7 @@ export function createGatewayServer(deps = {}) {
4083
4024
  .describe("Prompt text for Grok (mutually exclusive with promptParts)"),
4084
4025
  promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
4085
4026
  model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
4086
- outputFormat: z
4087
- .enum(["plain", "json", "streaming-json"])
4088
- .optional()
4089
- .describe("Output format (plain|json|streaming-json). Grok default is plain."),
4027
+ ...GROK_GENERATED_SHAPE,
4090
4028
  sessionId: z
4091
4029
  .string()
4092
4030
  .optional()
@@ -4104,11 +4042,6 @@ export function createGatewayServer(deps = {}) {
4104
4042
  .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
4105
4043
  .optional()
4106
4044
  .describe("Grok permission mode"),
4107
- effort: z
4108
- .enum(["low", "medium", "high", "xhigh", "max"])
4109
- .optional()
4110
- .describe("Grok effort level"),
4111
- reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
4112
4045
  approvalStrategy: z
4113
4046
  .enum(["legacy", "mcp_managed"])
4114
4047
  .default("legacy")
@@ -4121,14 +4054,6 @@ export function createGatewayServer(deps = {}) {
4121
4054
  .array(MCP_SERVER_ENUM)
4122
4055
  .default(["sqry"])
4123
4056
  .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
4124
- allowedTools: z
4125
- .array(z.string())
4126
- .optional()
4127
- .describe("Allowed built-in tools (passed as --tools comma list)"),
4128
- disallowedTools: z
4129
- .array(z.string())
4130
- .optional()
4131
- .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
4132
4057
  correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
4133
4058
  optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
4134
4059
  optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
@@ -4143,107 +4068,14 @@ export function createGatewayServer(deps = {}) {
4143
4068
  .boolean()
4144
4069
  .default(false)
4145
4070
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
4146
- maxTurns: MAX_TURNS_SCHEMA.optional().describe("Grok `--max-turns N`: cap on agent-loop iterations for cost / latency control (Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
4147
- workingDir: z
4148
- .string()
4149
- .min(1)
4150
- .optional()
4151
- .describe("Grok --cwd <DIR>: working directory for this invocation. Lets headless callers run Grok against a directory other than the gateway process's cwd."),
4152
- sandbox: z
4153
- .string()
4154
- .min(1)
4155
- .optional()
4156
- .describe("Grok --sandbox <PROFILE>: sandbox profile for filesystem and network access. Freeform per `grok --help` (no enum constraint on Grok 0.1.210); also settable via GROK_SANDBOX env var. Caller responsibility to pass a valid profile name."),
4157
- rules: z
4158
- .string()
4159
- .min(1)
4160
- .optional()
4161
- .describe("Grok --rules <RULES>: extra rules to append to the system prompt. Supports `@file` prefix per `grok --help` to load from a file; gateway passes the value verbatim and lets Grok parse the prefix."),
4162
- systemPromptOverride: z
4163
- .string()
4164
- .min(1)
4165
- .optional()
4166
- .describe("Grok --system-prompt-override <PROMPT>: replace the agent's system prompt entirely. Distinct from Claude's --system-prompt / --append-system-prompt (Grok has only one override flag, not a pair)."),
4167
- allow: z
4168
- .array(z.string())
4169
- .optional()
4170
- .describe('Grok --allow <RULE>: permission allow rules. Each entry is emitted as its own --allow instance (per `grok --help`: "Repeat to add multiple rules").'),
4171
- deny: z
4172
- .array(z.string())
4173
- .optional()
4174
- .describe('Grok --deny <RULE>: permission deny rules. Each entry is emitted as its own --deny instance (per `grok --help`: "Repeat to add multiple rules").'),
4175
- compactionMode: z
4176
- .enum(["summary", "transcript", "segments"])
4177
- .optional()
4178
- .describe("Grok --compaction-mode: summary (default; no pointer) | transcript (points at the raw transcript) | segments (persists per-segment markdown to grep). Sets GROK_COMPACTION_MODE."),
4179
- compactionDetail: z
4180
- .enum(["none", "minimal", "balanced", "verbose"])
4181
- .optional()
4182
- .describe("Grok --compaction-detail: verbatim segment detail (none|minimal|balanced|verbose, default verbose). Only affects `--compaction-mode segments`. Sets GROK_COMPACTION_DETAIL."),
4183
- agent: z
4184
- .string()
4185
- .min(1)
4186
- .optional()
4187
- .describe("Grok --agent <NAME>: agent name or definition file path."),
4188
- bestOfN: MAX_TURNS_SCHEMA.optional().describe("Grok --best-of-n <N>: run the task N ways in parallel and pick the best (headless only)."),
4189
- check: z
4190
- .boolean()
4191
- .optional()
4192
- .describe("Grok --check: append a self-verification loop to the prompt (headless only)."),
4193
- disableWebSearch: z
4194
- .boolean()
4195
- .optional()
4196
- .describe("Grok --disable-web-search: disable web search and remote retrieval tools."),
4197
- todoGate: z
4198
- .boolean()
4199
- .optional()
4200
- .describe("Grok --todo-gate: enable runtime turn-end TodoGate for this session (session-scoped, not persisted)."),
4201
- verbatim: z
4202
- .boolean()
4203
- .optional()
4204
- .describe("Grok --verbatim: send the prompt exactly as given. Also skips gateway optimizePrompt when true."),
4205
4071
  agents: z
4206
4072
  .union([z.string().min(1), z.record(z.string(), z.record(z.string(), z.unknown()))])
4207
4073
  .optional()
4208
4074
  .describe("Grok --agents <JSON>: inline subagent definitions (JSON string or name → { description, prompt, … } map)."),
4209
- promptFile: z
4210
- .string()
4211
- .min(1)
4212
- .optional()
4213
- .describe("Grok --prompt-file <PATH>: single-turn prompt loaded from a file."),
4214
4075
  promptJson: z
4215
4076
  .union([z.string(), z.array(z.unknown()), z.record(z.string(), z.unknown())])
4216
4077
  .optional()
4217
4078
  .describe("Grok --prompt-json <JSON>: single-turn prompt JSON blocks (string or serializable value)."),
4218
- single: z
4219
- .string()
4220
- .min(1)
4221
- .optional()
4222
- .describe("Grok --single <PROMPT>: single-turn prompt (in addition to gateway -p)."),
4223
- experimentalMemory: z
4224
- .boolean()
4225
- .optional()
4226
- .describe("Grok --experimental-memory: enable cross-session memory."),
4227
- noAltScreen: z
4228
- .boolean()
4229
- .optional()
4230
- .describe("Grok --no-alt-screen: run inline without alt screen."),
4231
- noMemory: z.boolean().optional().describe("Grok --no-memory: disable cross-session memory."),
4232
- noPlan: z.boolean().optional().describe("Grok --no-plan: disable plan mode."),
4233
- noSubagents: z
4234
- .boolean()
4235
- .optional()
4236
- .describe("Grok --no-subagents: disable subagent spawning."),
4237
- oauth: z.boolean().optional().describe("Grok --oauth: use OAuth during authentication."),
4238
- restoreCode: z
4239
- .boolean()
4240
- .optional()
4241
- .describe("Grok --restore-code: check out the original session commit when resuming."),
4242
- leaderSocket: z
4243
- .string()
4244
- .min(1)
4245
- .optional()
4246
- .describe("Grok 0.2.32+ --leader-socket <PATH>: custom leader socket path (default ~/.grok/leader.sock). Targets an isolated leader process, e.g. a local/branch Grok build; name it ~/.grok/leader-*.sock to keep `grok leader list/kill` discovery working."),
4247
4079
  nativeWorktree: z
4248
4080
  .union([z.boolean(), z.string().min(1)])
4249
4081
  .optional()
@@ -4835,23 +4667,23 @@ export function createGatewayServer(deps = {}) {
4835
4667
  worktree,
4836
4668
  });
4837
4669
  });
4838
- server.tool("gemini_request_async", "Start a Google Gemini CLI request as a durable background job. Poll with llm_job_status, collect with llm_job_result.", {
4670
+ server.tool("gemini_request_async", "Start a Google Antigravity CLI (`agy`) request as a durable background job through the Gemini-compatible gateway tool. Poll with llm_job_status, collect with llm_job_result.", {
4839
4671
  prompt: z
4840
4672
  .string()
4841
4673
  .min(1, "Prompt cannot be empty")
4842
4674
  .max(100000, "Prompt too long (max 100k chars)")
4843
4675
  .optional()
4844
- .describe("Prompt text for Gemini (mutually exclusive with promptParts)"),
4676
+ .describe("Prompt text for Antigravity CLI (mutually exclusive with promptParts)"),
4845
4677
  promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
4846
4678
  model: z
4847
4679
  .string()
4848
4680
  .optional()
4849
- .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
4681
+ .describe("Model name or alias passed to agy --model (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
4850
4682
  sessionId: z
4851
4683
  .string()
4852
4684
  .optional()
4853
- .describe("Gemini session ID to resume (emits --resume <id>), or 'latest' for the most recent session in this cwd"),
4854
- resumeLatest: z.boolean().default(false).describe("Resume latest session"),
4685
+ .describe("Antigravity conversation ID to resume (emits --conversation <id>)"),
4686
+ resumeLatest: z.boolean().default(false).describe("Continue the most recent conversation"),
4855
4687
  createNewSession: z.boolean().default(false).describe("Force new session"),
4856
4688
  approvalMode: z
4857
4689
  .enum(GEMINI_APPROVAL_MODES)
@@ -4867,13 +4699,16 @@ export function createGatewayServer(deps = {}) {
4867
4699
  .describe("Approval policy override"),
4868
4700
  mcpServers: z
4869
4701
  .array(MCP_SERVER_ENUM)
4870
- .default(["sqry"])
4871
- .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
4702
+ .default([])
4703
+ .describe("Unsupported for Antigravity CLI; non-empty values are rejected"),
4872
4704
  allowedTools: z
4873
4705
  .array(z.string())
4874
4706
  .optional()
4875
- .describe("Allowed tools (['Write','Edit','Bash'])"),
4876
- includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
4707
+ .describe("Unsupported for Antigravity CLI; non-empty values are rejected"),
4708
+ includeDirs: z
4709
+ .array(z.string())
4710
+ .optional()
4711
+ .describe("Additional workspace directories passed as --add-dir"),
4877
4712
  correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
4878
4713
  optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
4879
4714
  idleTimeoutMs: z
@@ -4890,19 +4725,19 @@ export function createGatewayServer(deps = {}) {
4890
4725
  outputFormat: z
4891
4726
  .enum(["text", "json", "stream-json"])
4892
4727
  .default("text")
4893
- .describe("Gemini output format. `json` emits `-o json` (single JSON with usageMetadata). `stream-json` emits `-o stream-json` (NDJSON event stream — `init`/`message`/`result` lines, usage extracted from the terminal `result.stats` event). Both report usage to the flight recorder."),
4894
- sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
4895
- policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
4896
- adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
4897
- attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
4728
+ .describe("Antigravity CLI currently supports text output only through the gateway; json and stream-json are rejected."),
4729
+ sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Antigravity in sandbox mode (--sandbox)"),
4730
+ policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Unsupported for Antigravity CLI; non-empty values are rejected."),
4731
+ adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Unsupported for Antigravity CLI; non-empty values are rejected."),
4732
+ attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Unsupported for Antigravity CLI; non-empty values are rejected."),
4898
4733
  skipTrust: z
4899
4734
  .boolean()
4900
4735
  .default(false)
4901
- .describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
4736
+ .describe("Unsupported for Antigravity CLI; true is rejected."),
4902
4737
  yolo: z
4903
4738
  .boolean()
4904
4739
  .optional()
4905
- .describe("Emit `--yolo` to auto-approve all actions. Equivalent to approvalMode 'yolo'; routed through the same approval gate. Under mcp_managed the gate still decides."),
4740
+ .describe("Emit `--dangerously-skip-permissions` to auto-approve all actions. Routed through the same approval gate. Under mcp_managed the gate still decides."),
4906
4741
  workspace: WORKSPACE_ALIAS_SCHEMA.optional(),
4907
4742
  worktree: WORKTREE_SCHEMA.optional(),
4908
4743
  }, {