llm-cli-gateway 2.2.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -12,12 +12,13 @@ import { parseCodexJsonStream } from "./codex-json-parser.js";
12
12
  import { parseGeminiJson, parseGeminiStreamJson } from "./gemini-json-parser.js";
13
13
  import { parseVibeMetaJson } from "./mistral-meta-json-parser.js";
14
14
  import { homedir } from "os";
15
- import { createSessionManager } from "./session-manager.js";
15
+ import { CLI_TYPES, PROVIDER_TYPES, createSessionManager, } from "./session-manager.js";
16
16
  import { createWorktree, createWorktreeSessionCleanupHook, } from "./worktree-manager.js";
17
17
  import { ResourceProvider } from "./resources.js";
18
18
  import { PerformanceMetrics } from "./metrics.js";
19
19
  import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse as optimizeResponseText, } from "./optimizer.js";
20
- import { loadConfig, loadPersistenceConfig, loadCacheAwarenessConfig, minStableTokensForModel, } from "./config.js";
20
+ import { loadConfig, loadPersistenceConfig, loadCacheAwarenessConfig, loadProvidersConfig, isXaiProviderEnabled, minStableTokensForModel, } from "./config.js";
21
+ import { createXaiResponse, XaiApiError, } from "./xai-api-provider.js";
21
22
  import { checkHealth } from "./health.js";
22
23
  import { clearModelRegistryCache, getAvailableCliInfo, getCliInfo, resolveModelAlias, } from "./model-registry.js";
23
24
  import { AsyncJobManager, } from "./async-job-manager.js";
@@ -33,7 +34,7 @@ import { getCliVersions, runCliUpgrade } from "./cli-updater.js";
33
34
  import { startHttpGateway } from "./http-transport.js";
34
35
  import { printDoctorJson } from "./doctor.js";
35
36
  import { registerValidationTools } from "./validation-tools.js";
36
- import { assertUpstreamCliArgs, assertUpstreamCliEnv, buildUpstreamContractReport, } from "./upstream-contracts.js";
37
+ import { assertUpstreamCliArgs, assertUpstreamCliEnv, buildProviderSubcommandsCompactCatalog, buildUpstreamContractReport, getCliSubcommandContract, probeInstalledCliContract, serializeCliSubcommandContract, } from "./upstream-contracts.js";
37
38
  import { entrypointFileURL } from "./entrypoint-url.js";
38
39
  const logger = {
39
40
  info: (message, ...args) => {
@@ -141,31 +142,32 @@ function loadSkills() {
141
142
  return skills;
142
143
  }
143
144
  const loadedSkills = loadSkills();
144
- export function buildServerInstructions(asyncJobsEnabled) {
145
+ export function buildServerInstructions(asyncJobsEnabled, grokApiToolsEnabled = false) {
145
146
  const asyncToolsNote = asyncJobsEnabled ? " | *_request_async (async)" : "";
147
+ const apiToolsNote = grokApiToolsEnabled ? ", grok_api_request" : "";
146
148
  const jobsLine = asyncJobsEnabled ? "Jobs: llm_job_status, llm_job_result, llm_job_cancel\n" : "";
147
149
  const deferralLine = asyncJobsEnabled
148
150
  ? `- Sync auto-defers at ${SYNC_DEADLINE_MS}ms. Poll deferred jobs via llm_job_status/llm_job_result.`
149
151
  : '- Async jobs are DISABLED (persistence.backend = "none"): *_request_async and llm_job_* tools are not registered, and sync requests run to completion (no auto-deferral).';
150
152
  return `llm-cli-gateway: Multi-LLM orchestration via MCP.
151
153
 
152
- Tools: claude_request, codex_request, gemini_request, grok_request, mistral_request (sync)${asyncToolsNote} | codex_fork_session (fork a Codex session into a new branch)
154
+ Tools: claude_request, codex_request, gemini_request, grok_request, mistral_request${apiToolsNote} (sync)${asyncToolsNote} | codex_fork_session (fork a Codex session into a new branch)
153
155
  Validation: validate_with_models, second_opinion, compare_answers, red_team_review, consensus_check, ask_model, synthesize_validation, list_available_models | job_status/job_result (validation jobs)
154
156
  ${jobsLine}Sessions: session_create, session_list, session_set_active, session_get, session_delete, session_clear_all
155
- Other: list_models, cli_versions, upstream_contracts (use --probe-installed after CLI upgrades to detect drift), cli_upgrade, approval_list, llm_process_health, llm_request_result (read back any persisted request — sync or async — by correlationId)
157
+ Other: list_models, cli_versions, upstream_contracts, provider_subcommands_* (read-only subcommand contract/drift introspection), cli_upgrade, approval_list, llm_process_health, llm_request_result (read back any persisted request — sync or async — by correlationId)
156
158
 
157
159
  Key behaviors:
158
160
  ${deferralLine}
159
161
  - Sessions: Claude --continue, Gemini --resume, Grok --resume/--continue, Mistral --resume/--continue (current Vibe defaults session logging on; doctor flags explicit session_logging.enabled=false), Codex \`exec resume <ID>\` / \`exec resume --last\` (all real CLI continuity). For Codex, sessionId must be a real Codex UUID (from ~/.codex/sessions/); gateway-generated gw-* IDs are rejected.
160
162
  - Approval gates: opt-in via approvalStrategy:"mcp_managed".
161
- - Upstream drift detection: After upgrading any provider CLI (especially grok), use the upstream_contracts tool with probeInstalled: true (or the CLI command "llm-cli-gateway contracts --json --probe-installed"). This is the primary reliable way to detect when an installed binary has gained or lost flags compared to the gateway's declared contract. The probe is safe and read-only.
163
+ - Upstream drift detection: After upgrading any provider CLI (especially grok), use upstream_contracts with probeInstalled:true and provider_subcommand_drift for declared subcommand help surfaces. Probes are safe, read-only --help checks.
162
164
  - Idle timeout kills stuck processes (default 10min, configurable via idleTimeoutMs).
163
165
 
164
166
  Skills (full docs via MCP resources):
165
167
  ${loadedSkills.map(s => `- skills://${s.name} — ${s.description}`).join("\n")}`;
166
168
  }
167
- function newGatewayMcpServer(asyncJobsEnabled = true) {
168
- return new McpServer({ name: "llm-cli-gateway", version: packageVersion() }, { instructions: buildServerInstructions(asyncJobsEnabled) });
169
+ function newGatewayMcpServer(asyncJobsEnabled = true, grokApiToolsEnabled = false) {
170
+ return new McpServer({ name: "llm-cli-gateway", version: packageVersion() }, { instructions: buildServerInstructions(asyncJobsEnabled, grokApiToolsEnabled) });
169
171
  }
170
172
  let sessionManager;
171
173
  let db = null;
@@ -174,6 +176,7 @@ let resourceProvider;
174
176
  let flightRecorder = null;
175
177
  let persistenceConfig = null;
176
178
  let cacheAwarenessConfig = null;
179
+ let providersConfig = null;
177
180
  let jobStore = null;
178
181
  let jobStoreInitialized = false;
179
182
  let asyncJobManager = null;
@@ -190,6 +193,10 @@ function getCacheAwarenessConfig(runtimeLogger = logger) {
190
193
  cacheAwarenessConfig ??= loadCacheAwarenessConfig(runtimeLogger);
191
194
  return cacheAwarenessConfig;
192
195
  }
196
+ function getProvidersConfig(runtimeLogger = logger) {
197
+ providersConfig ??= loadProvidersConfig(runtimeLogger);
198
+ return providersConfig;
199
+ }
193
200
  function getJobStore(runtimeLogger = logger) {
194
201
  if (jobStoreInitialized)
195
202
  return jobStore;
@@ -217,6 +224,7 @@ function getApprovalManager(runtimeLogger = logger) {
217
224
  return approvalManager;
218
225
  }
219
226
  const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
227
+ const CLI_TYPE_ENUM = z.enum(CLI_TYPES);
220
228
  export const MAX_TURNS_SCHEMA = z.number().int().positive().safe().max(10_000);
221
229
  export const MAX_TOKENS_SCHEMA = z.number().int().positive().safe().max(100_000_000);
222
230
  export const MAX_PRICE_SCHEMA = z.number().positive().finite().min(1e-6).max(10_000);
@@ -244,7 +252,7 @@ export const WORKTREE_SCHEMA = z
244
252
  "path. NOTE: callers should `.gitignore` the `.worktrees/` " +
245
253
  "directory in their repo (the gateway does NOT auto-gitignore — " +
246
254
  "see slice λ spec Q4).");
247
- export const SESSION_PROVIDER_VALUES = ["claude", "codex", "gemini", "grok", "mistral"];
255
+ export const SESSION_PROVIDER_VALUES = PROVIDER_TYPES;
248
256
  export const SESSION_PROVIDER_ENUM = z.enum(SESSION_PROVIDER_VALUES);
249
257
  let activeServer = null;
250
258
  let activeHttpGateway = null;
@@ -277,8 +285,12 @@ export function resolveGatewayServerRuntime(deps = {}, options = {}) {
277
285
  logger: runtimeLogger,
278
286
  persistence: deps.persistence ?? getPersistenceConfig(runtimeLogger),
279
287
  cacheAwareness: deps.cacheAwareness ?? getCacheAwarenessConfig(runtimeLogger),
288
+ providers: deps.providers ?? getProvidersConfig(runtimeLogger),
280
289
  };
281
290
  }
291
+ export function shouldRegisterGrokApiTools(providers) {
292
+ return isXaiProviderEnabled(providers);
293
+ }
282
294
  const CLI_IDLE_TIMEOUTS = {
283
295
  claude: 600_000,
284
296
  codex: 600_000,
@@ -741,12 +753,12 @@ function registerBaseResources(server, runtime) {
741
753
  const contents = await runtime.resourceProvider.readResource(uri.href);
742
754
  return { contents: contents ? [contents] : [] };
743
755
  });
744
- server.registerResource("cache-state-global", "cache_state://global", {
756
+ server.registerResource("cache-state-global", "cache-state://global", {
745
757
  title: "💾 Cache State (Global)",
746
758
  description: "Aggregate cache hit/miss/savings across all CLIs in the flight recorder. Tokens/hashes only — no prompt text.",
747
759
  mimeType: "application/json",
748
760
  }, async (uri) => {
749
- runtime.logger.debug("Reading cache_state://global resource");
761
+ runtime.logger.debug("Reading cache-state://global resource");
750
762
  const stats = runtime.resourceProvider.readCacheStateGlobal({
751
763
  lastNHours: 24,
752
764
  });
@@ -760,7 +772,7 @@ function registerBaseResources(server, runtime) {
760
772
  ],
761
773
  };
762
774
  });
763
- server.registerResource("cache-state-session", new ResourceTemplate("cache_state://session/{sessionId}", { list: undefined }), {
775
+ server.registerResource("cache-state-session", new ResourceTemplate("cache-state://session/{sessionId}", { list: undefined }), {
764
776
  title: "💾 Cache State (Session)",
765
777
  description: "Per-session cache hit/miss/savings. Tokens/hashes only — no prompt text.",
766
778
  mimeType: "application/json",
@@ -768,7 +780,7 @@ function registerBaseResources(server, runtime) {
768
780
  const sessionId = Array.isArray(variables.sessionId)
769
781
  ? variables.sessionId[0]
770
782
  : variables.sessionId;
771
- runtime.logger.debug(`Reading cache_state://session/${sessionId}`);
783
+ runtime.logger.debug(`Reading cache-state://session/${sessionId}`);
772
784
  const stats = runtime.resourceProvider.readCacheStateSession(String(sessionId));
773
785
  return {
774
786
  contents: [
@@ -780,13 +792,13 @@ function registerBaseResources(server, runtime) {
780
792
  ],
781
793
  };
782
794
  });
783
- server.registerResource("cache-state-prefix", new ResourceTemplate("cache_state://prefix/{hash}", { list: undefined }), {
795
+ server.registerResource("cache-state-prefix", new ResourceTemplate("cache-state://prefix/{hash}", { list: undefined }), {
784
796
  title: "💾 Cache State (Prefix)",
785
797
  description: "Per-stable-prefix-hash cache hit/miss/savings, with CLI breakdown. Tokens/hashes only — no prompt text.",
786
798
  mimeType: "application/json",
787
799
  }, async (uri, variables) => {
788
800
  const hash = Array.isArray(variables.hash) ? variables.hash[0] : variables.hash;
789
- runtime.logger.debug(`Reading cache_state://prefix/${hash}`);
801
+ runtime.logger.debug(`Reading cache-state://prefix/${hash}`);
790
802
  const stats = runtime.resourceProvider.readCacheStateForPrefix(String(hash));
791
803
  return {
792
804
  contents: [
@@ -798,6 +810,30 @@ function registerBaseResources(server, runtime) {
798
810
  ],
799
811
  };
800
812
  });
813
+ server.registerResource("provider-subcommands-catalog", "provider-subcommands://catalog", {
814
+ title: "Provider Subcommands Catalog",
815
+ description: "Compact read-only catalog of declared provider CLI subcommands",
816
+ mimeType: "application/json",
817
+ }, async (uri) => {
818
+ runtime.logger.debug("Reading provider-subcommands://catalog resource");
819
+ const contents = await runtime.resourceProvider.readResource(uri.href);
820
+ return { contents: contents ? [contents] : [] };
821
+ });
822
+ server.registerResource("provider-subcommand-contract", new ResourceTemplate("provider-subcommands://{provider}/{+commandPath}", { list: undefined }), {
823
+ title: "Provider Subcommand Contract",
824
+ description: "Detailed read-only contract for one declared provider CLI subcommand",
825
+ mimeType: "application/json",
826
+ }, async (uri, variables) => {
827
+ const provider = Array.isArray(variables.provider)
828
+ ? variables.provider[0]
829
+ : variables.provider;
830
+ const commandPath = Array.isArray(variables.commandPath)
831
+ ? variables.commandPath[0]
832
+ : variables.commandPath;
833
+ runtime.logger.debug(`Reading provider-subcommands://${provider}/${commandPath}`);
834
+ const contents = await runtime.resourceProvider.readResource(uri.href);
835
+ return { contents: contents ? [contents] : [] };
836
+ });
801
837
  }
802
838
  function resolvePromptOrPartsForPrep(args) {
803
839
  const hasPrompt = typeof args.prompt === "string" && args.prompt.length > 0;
@@ -1676,6 +1712,271 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
1676
1712
  }
1677
1713
  return response;
1678
1714
  }
1715
+ function buildXaiPromptPartsUserContent(promptParts) {
1716
+ const userSections = [];
1717
+ if (promptParts.tools && promptParts.tools.length > 0) {
1718
+ userSections.push(`<tools>\n${promptParts.tools}\n</tools>`);
1719
+ }
1720
+ if (promptParts.context && promptParts.context.length > 0) {
1721
+ userSections.push(`<context>\n${promptParts.context}\n</context>`);
1722
+ }
1723
+ if (promptParts.task && promptParts.task.length > 0) {
1724
+ userSections.push(promptParts.task);
1725
+ }
1726
+ return userSections.join("\n\n");
1727
+ }
1728
+ function buildXaiPromptPartsEffectivePrompt(instructions, userContent) {
1729
+ return instructions && instructions.length > 0
1730
+ ? `${instructions}\n\n${userContent}`
1731
+ : userContent;
1732
+ }
1733
+ function prepareGrokApiRequest(params, providers) {
1734
+ const corrId = params.correlationId || randomUUID();
1735
+ if (!providers.xai) {
1736
+ return createErrorResponse("grok_api_request", 1, "", corrId, new Error("[providers.xai] is not configured"));
1737
+ }
1738
+ const inputResolution = resolvePromptOrPartsForPrep({
1739
+ prompt: params.prompt,
1740
+ promptParts: params.promptParts,
1741
+ operation: "grok_api_request",
1742
+ correlationId: corrId,
1743
+ });
1744
+ if (!inputResolution.ok)
1745
+ return inputResolution.error;
1746
+ const instructions = params.promptParts?.system && params.promptParts.system.length > 0
1747
+ ? params.promptParts.system
1748
+ : undefined;
1749
+ let effectivePrompt = inputResolution.assembledPrompt;
1750
+ let input;
1751
+ if (params.promptParts) {
1752
+ let userContent = buildXaiPromptPartsUserContent(params.promptParts);
1753
+ if (params.optimizePrompt) {
1754
+ const optimized = optimizePromptText(userContent);
1755
+ logOptimizationTokens("prompt", corrId, userContent, optimized);
1756
+ userContent = optimized;
1757
+ }
1758
+ effectivePrompt = buildXaiPromptPartsEffectivePrompt(instructions, userContent);
1759
+ input = [{ role: "user", content: userContent }];
1760
+ }
1761
+ else {
1762
+ if (params.optimizePrompt) {
1763
+ const optimized = optimizePromptText(effectivePrompt);
1764
+ logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
1765
+ effectivePrompt = optimized;
1766
+ }
1767
+ input = effectivePrompt;
1768
+ }
1769
+ const resolvedModel = params.model ?? providers.xai.defaultModel;
1770
+ if (params.reasoningEffort && !/^grok-4\.3(?:$|[-.])/.test(resolvedModel)) {
1771
+ return createErrorResponse("grok_api_request", 1, "", corrId, new Error("reasoningEffort is currently supported only for xAI model grok-4.3"));
1772
+ }
1773
+ return {
1774
+ corrId,
1775
+ effectivePrompt,
1776
+ resolvedModel,
1777
+ instructions,
1778
+ input,
1779
+ stablePrefixHash: inputResolution.stablePrefixHash,
1780
+ stablePrefixTokens: inputResolution.stablePrefixTokens,
1781
+ };
1782
+ }
1783
+ function usageFromXaiResult(result) {
1784
+ return {
1785
+ inputTokens: result.usage.inputTokens,
1786
+ outputTokens: result.usage.outputTokens,
1787
+ cacheReadTokens: result.usage.cacheReadTokens,
1788
+ costUsd: result.usage.costUsd,
1789
+ };
1790
+ }
1791
+ async function getExistingSessionForProvider(sessionManager, sessionId, provider) {
1792
+ if (!sessionId)
1793
+ return null;
1794
+ const existing = await sessionManager.getSession(sessionId);
1795
+ if (existing && existing.cli !== provider) {
1796
+ throw new Error(`Session ${sessionId} belongs to provider '${existing.cli}', not '${provider}'`);
1797
+ }
1798
+ return existing;
1799
+ }
1800
+ function asXaiApiError(error) {
1801
+ if (error instanceof XaiApiError)
1802
+ return error;
1803
+ const cause = error?.cause;
1804
+ return cause instanceof XaiApiError ? cause : null;
1805
+ }
1806
+ function buildGrokApiToolResponse(args) {
1807
+ let text = args.result.text;
1808
+ if (args.optimizeResponse) {
1809
+ const optimized = optimizeResponseText(text);
1810
+ logOptimizationTokens("response", args.corrId, text, optimized);
1811
+ text = optimized;
1812
+ }
1813
+ const response = {
1814
+ content: [{ type: "text", text }],
1815
+ structuredContent: {
1816
+ provider: "grok-api",
1817
+ cli: "grok-api",
1818
+ model: args.result.model || args.prep.resolvedModel,
1819
+ correlationId: args.corrId,
1820
+ sessionId: args.sessionId || null,
1821
+ responseId: args.result.responseId,
1822
+ previousResponseId: args.previousResponseId || null,
1823
+ stalePreviousResponseCleared: args.stalePreviousResponseCleared,
1824
+ status: args.result.status,
1825
+ httpStatus: args.result.httpStatus,
1826
+ durationMs: args.durationMs,
1827
+ ...usageFromXaiResult(args.result),
1828
+ exitCode: 0,
1829
+ retryCount: 0,
1830
+ },
1831
+ };
1832
+ if (args.sessionId)
1833
+ response.sessionId = args.sessionId;
1834
+ return response;
1835
+ }
1836
+ async function resolveGrokApiSession(params, runtime) {
1837
+ if (params.sessionId) {
1838
+ const existing = await getExistingSessionForProvider(runtime.sessionManager, params.sessionId, "grok-api");
1839
+ const session = existing ??
1840
+ (await runtime.sessionManager.createSession("grok-api", "Grok API Session", params.sessionId));
1841
+ const previous = !params.createNewSession && typeof session.metadata?.xaiPreviousResponseId === "string"
1842
+ ? session.metadata.xaiPreviousResponseId
1843
+ : undefined;
1844
+ return { sessionId: session.id, previousResponseId: previous };
1845
+ }
1846
+ if (!params.createNewSession) {
1847
+ const active = await runtime.sessionManager.getActiveSession("grok-api");
1848
+ if (active) {
1849
+ const previous = typeof active.metadata?.xaiPreviousResponseId === "string"
1850
+ ? active.metadata.xaiPreviousResponseId
1851
+ : undefined;
1852
+ return { sessionId: active.id, previousResponseId: previous };
1853
+ }
1854
+ }
1855
+ const session = await runtime.sessionManager.createSession("grok-api", "Grok API Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
1856
+ return { sessionId: session.id };
1857
+ }
1858
+ export async function handleGrokApiRequest(deps, params) {
1859
+ const runtime = resolveHandlerRuntime(deps);
1860
+ const startTime = Date.now();
1861
+ const prep = prepareGrokApiRequest(params, runtime.providers);
1862
+ if ("content" in prep)
1863
+ return prep;
1864
+ const { corrId } = prep;
1865
+ const xaiConfig = runtime.providers.xai;
1866
+ let durationMs = 0;
1867
+ let wasSuccessful = false;
1868
+ try {
1869
+ await getExistingSessionForProvider(runtime.sessionManager, params.sessionId, "grok-api");
1870
+ }
1871
+ catch (err) {
1872
+ return createErrorResponse("grok_api_request", 1, "", corrId, err);
1873
+ }
1874
+ if (!xaiConfig) {
1875
+ return createErrorResponse("grok_api_request", 1, "", corrId, new Error("[providers.xai] is not configured"));
1876
+ }
1877
+ const apiKey = process.env[xaiConfig.apiKeyEnv]?.trim();
1878
+ if (!apiKey) {
1879
+ return createErrorResponse("grok_api_request", 1, "", corrId, new Error(`xAI API key env var ${xaiConfig.apiKeyEnv} is not set`));
1880
+ }
1881
+ safeFlightStart({
1882
+ correlationId: corrId,
1883
+ cli: "grok-api",
1884
+ model: prep.resolvedModel,
1885
+ prompt: prep.effectivePrompt,
1886
+ sessionId: params.sessionId,
1887
+ stablePrefixHash: prep.stablePrefixHash ?? undefined,
1888
+ stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
1889
+ }, runtime);
1890
+ let sessionId;
1891
+ let previousResponseId;
1892
+ let stalePreviousResponseCleared = false;
1893
+ try {
1894
+ const session = await resolveGrokApiSession(params, runtime);
1895
+ sessionId = session.sessionId;
1896
+ previousResponseId = session.previousResponseId;
1897
+ const call = (prev) => createXaiResponse({
1898
+ baseUrl: xaiConfig.baseUrl,
1899
+ apiKey,
1900
+ model: prep.resolvedModel,
1901
+ input: prep.input,
1902
+ instructions: prep.instructions,
1903
+ previousResponseId: prev,
1904
+ maxOutputTokens: params.maxOutputTokens,
1905
+ temperature: params.temperature,
1906
+ topP: params.topP,
1907
+ reasoningEffort: params.reasoningEffort,
1908
+ timeoutMs: params.timeoutMs,
1909
+ }, runtime.logger);
1910
+ let result;
1911
+ try {
1912
+ result = await call(previousResponseId);
1913
+ }
1914
+ catch (error) {
1915
+ const xaiError = asXaiApiError(error);
1916
+ if (xaiError?.status === 404 && previousResponseId) {
1917
+ runtime.logger.warn(`[${corrId}] xAI previous_response_id was rejected; clearing stale session metadata and retrying fresh`);
1918
+ await runtime.sessionManager.updateSessionMetadata(sessionId, {
1919
+ xaiPreviousResponseId: null,
1920
+ xaiResponseCreatedAt: null,
1921
+ });
1922
+ stalePreviousResponseCleared = true;
1923
+ previousResponseId = undefined;
1924
+ result = await call(undefined);
1925
+ }
1926
+ else {
1927
+ throw error;
1928
+ }
1929
+ }
1930
+ durationMs = Math.max(0, Date.now() - startTime);
1931
+ wasSuccessful = true;
1932
+ await runtime.sessionManager.updateSessionMetadata(sessionId, {
1933
+ xaiPreviousResponseId: result.responseId,
1934
+ xaiResponseCreatedAt: new Date().toISOString(),
1935
+ xaiModel: result.model || prep.resolvedModel,
1936
+ });
1937
+ await runtime.sessionManager.updateSessionUsage(sessionId);
1938
+ safeFlightComplete(corrId, {
1939
+ response: result.text,
1940
+ durationMs,
1941
+ retryCount: 0,
1942
+ circuitBreakerState: "closed",
1943
+ optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
1944
+ exitCode: 0,
1945
+ status: "completed",
1946
+ ...usageFromXaiResult(result),
1947
+ }, runtime);
1948
+ return buildGrokApiToolResponse({
1949
+ result,
1950
+ prep,
1951
+ corrId,
1952
+ durationMs,
1953
+ sessionId,
1954
+ previousResponseId,
1955
+ stalePreviousResponseCleared,
1956
+ optimizeResponse: params.optimizeResponse ?? false,
1957
+ });
1958
+ }
1959
+ catch (error) {
1960
+ durationMs = Math.max(0, Date.now() - startTime);
1961
+ const err = error;
1962
+ const xaiError = asXaiApiError(error);
1963
+ runtime.logger.error(`[${corrId}] grok_api_request failed`, err.message);
1964
+ safeFlightComplete(corrId, {
1965
+ response: xaiError?.responseText ?? "",
1966
+ durationMs,
1967
+ retryCount: 0,
1968
+ circuitBreakerState: "closed",
1969
+ optimizationApplied: false,
1970
+ exitCode: 1,
1971
+ errorMessage: err.message,
1972
+ status: "failed",
1973
+ }, runtime);
1974
+ return createErrorResponse("grok_api_request", 1, "", corrId, err);
1975
+ }
1976
+ finally {
1977
+ runtime.performanceMetrics.recordRequest("grok-api", durationMs || Math.max(0, Date.now() - startTime), wasSuccessful);
1978
+ }
1979
+ }
1679
1980
  function maybeBuildCacheTtlWarning(args) {
1680
1981
  if (args.cli !== "claude")
1681
1982
  return null;
@@ -1762,9 +2063,12 @@ export async function handleGeminiRequest(deps, params) {
1762
2063
  resumeLatest: params.resumeLatest,
1763
2064
  createNewSession: params.createNewSession,
1764
2065
  });
1765
- args.push(...sessionPlan.args);
1766
2066
  const userProvidedSession = sessionPlan.resumed;
1767
2067
  const effectiveSessionIdHint = sessionPlan.resumed ? params.sessionId : undefined;
2068
+ if (effectiveSessionIdHint) {
2069
+ await getExistingSessionForProvider(deps.sessionManager, effectiveSessionIdHint, "gemini");
2070
+ }
2071
+ args.push(...sessionPlan.args);
1768
2072
  let worktreeResolution = {};
1769
2073
  try {
1770
2074
  worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionIdHint, runtime);
@@ -1887,11 +2191,11 @@ export async function handleGeminiRequestAsync(deps, params) {
1887
2191
  resumeLatest: params.resumeLatest,
1888
2192
  createNewSession: params.createNewSession,
1889
2193
  });
1890
- args.push(...sessionPlan.args);
1891
2194
  let effectiveSessionId = sessionPlan.resumed ? params.sessionId : undefined;
2195
+ const existingSession = await getExistingSessionForProvider(deps.sessionManager, effectiveSessionId, "gemini");
2196
+ args.push(...sessionPlan.args);
1892
2197
  if (effectiveSessionId) {
1893
- const existing = await deps.sessionManager.getSession(effectiveSessionId);
1894
- if (!existing) {
2198
+ if (!existingSession) {
1895
2199
  try {
1896
2200
  await deps.sessionManager.createSession("gemini", "Gemini Session", effectiveSessionId);
1897
2201
  }
@@ -2012,6 +2316,9 @@ export async function handleGrokRequest(deps, params) {
2012
2316
  resumeLatest: params.resumeLatest,
2013
2317
  createNewSession: params.createNewSession,
2014
2318
  });
2319
+ if (sessionResult.userProvidedSession) {
2320
+ await getExistingSessionForProvider(deps.sessionManager, sessionResult.effectiveSessionId, "grok");
2321
+ }
2015
2322
  args.push(...sessionResult.resumeArgs);
2016
2323
  let worktreeResolution = {};
2017
2324
  try {
@@ -2158,6 +2465,9 @@ export async function handleGrokRequestAsync(deps, params) {
2158
2465
  resumeLatest: params.resumeLatest,
2159
2466
  createNewSession: params.createNewSession,
2160
2467
  });
2468
+ if (sessionResult.userProvidedSession) {
2469
+ await getExistingSessionForProvider(deps.sessionManager, sessionResult.effectiveSessionId, "grok");
2470
+ }
2161
2471
  args.push(...sessionResult.resumeArgs);
2162
2472
  let effectiveSessionId = sessionResult.effectiveSessionId;
2163
2473
  if (sessionResult.userProvidedSession && effectiveSessionId) {
@@ -2262,6 +2572,9 @@ export async function handleMistralRequest(deps, params) {
2262
2572
  resumeLatest: params.resumeLatest,
2263
2573
  createNewSession: params.createNewSession,
2264
2574
  });
2575
+ if (sessionResult.userProvidedSession) {
2576
+ await getExistingSessionForProvider(deps.sessionManager, sessionResult.effectiveSessionId, "mistral");
2577
+ }
2265
2578
  args.push(...sessionResult.resumeArgs);
2266
2579
  let worktreeResolution = {};
2267
2580
  try {
@@ -2397,11 +2710,11 @@ export async function handleMistralRequestAsync(deps, params) {
2397
2710
  resumeLatest: params.resumeLatest,
2398
2711
  createNewSession: params.createNewSession,
2399
2712
  });
2400
- args.push(...sessionResult.resumeArgs);
2401
2713
  let effectiveSessionId = sessionResult.effectiveSessionId;
2714
+ const existingSession = await getExistingSessionForProvider(deps.sessionManager, sessionResult.userProvidedSession ? effectiveSessionId : undefined, "mistral");
2715
+ args.push(...sessionResult.resumeArgs);
2402
2716
  if (sessionResult.userProvidedSession && effectiveSessionId) {
2403
- const existing = await deps.sessionManager.getSession(effectiveSessionId);
2404
- if (!existing) {
2717
+ if (!existingSession) {
2405
2718
  try {
2406
2719
  await deps.sessionManager.createSession("mistral", "Mistral Session", effectiveSessionId);
2407
2720
  }
@@ -2458,6 +2771,12 @@ export async function handleMistralRequestAsync(deps, params) {
2458
2771
  }
2459
2772
  export async function handleCodexRequestAsync(deps, params) {
2460
2773
  const runtime = resolveHandlerRuntime(deps);
2774
+ try {
2775
+ await getExistingSessionForProvider(deps.sessionManager, params.sessionId, "codex");
2776
+ }
2777
+ catch (err) {
2778
+ return createErrorResponse("codex_request_async", 1, "", params.correlationId, err);
2779
+ }
2461
2780
  const prep = prepareCodexRequest({
2462
2781
  prompt: params.prompt,
2463
2782
  promptParts: params.promptParts,
@@ -2572,13 +2891,89 @@ export async function handleCodexRequestAsync(deps, params) {
2572
2891
  }
2573
2892
  export function createGatewayServer(deps = {}) {
2574
2893
  const runtime = resolveGatewayServerRuntime(deps, { isolateState: true });
2575
- const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger, persistence, flightRecorder, cacheAwareness, } = runtime;
2894
+ const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger, persistence, flightRecorder, cacheAwareness, providers, } = runtime;
2576
2895
  void flightRecorder;
2577
2896
  void cacheAwareness;
2897
+ const grokApiToolsEnabled = shouldRegisterGrokApiTools(providers);
2578
2898
  const asyncJobsEnabled = persistence.backend !== "none" && persistence.asyncJobsEnabled && asyncJobManager.hasStore();
2579
- const server = newGatewayMcpServer(asyncJobsEnabled);
2899
+ const server = newGatewayMcpServer(asyncJobsEnabled, grokApiToolsEnabled);
2580
2900
  registerBaseResources(server, runtime);
2581
2901
  registerValidationTools(server, { asyncJobManager });
2902
+ if (grokApiToolsEnabled) {
2903
+ server.tool("grok_api_request", "Run an xAI Grok API request synchronously through the Responses API. Requires exactly one of prompt or promptParts. Registered only when [providers.xai] is configured and its API-key env var is present.", {
2904
+ prompt: z
2905
+ .string()
2906
+ .min(1, "Prompt cannot be empty")
2907
+ .max(100000, "Prompt too long (max 100k chars)")
2908
+ .optional()
2909
+ .describe("Prompt text for xAI Grok API (mutually exclusive with promptParts)"),
2910
+ promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. The stable prefix hash is logged for cache_state aggregates; xAI does not receive cache_control hints."),
2911
+ model: z
2912
+ .string()
2913
+ .min(1)
2914
+ .optional()
2915
+ .describe("xAI model id; defaults to [providers.xai].default_model"),
2916
+ sessionId: z
2917
+ .string()
2918
+ .optional()
2919
+ .describe("Gateway grok-api session to continue. The gateway stores xAI previous_response_id in session metadata."),
2920
+ createNewSession: z
2921
+ .boolean()
2922
+ .default(false)
2923
+ .describe("Start a fresh xAI response chain. With sessionId, ignores any stored previous_response_id for this request."),
2924
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2925
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2926
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
2927
+ maxOutputTokens: MAX_TOKENS_SCHEMA.optional().describe("xAI Responses API max_output_tokens. Bounded to safe integers <= 100000000."),
2928
+ temperature: z
2929
+ .number()
2930
+ .finite()
2931
+ .min(0)
2932
+ .max(2)
2933
+ .optional()
2934
+ .describe("Sampling temperature passed to xAI Responses API"),
2935
+ topP: z
2936
+ .number()
2937
+ .finite()
2938
+ .min(0)
2939
+ .max(1)
2940
+ .optional()
2941
+ .describe("Nucleus sampling top_p passed to xAI Responses API"),
2942
+ reasoningEffort: z
2943
+ .enum(["none", "low", "medium", "high"])
2944
+ .optional()
2945
+ .describe("xAI Responses API reasoning.effort"),
2946
+ timeoutMs: z
2947
+ .number()
2948
+ .int()
2949
+ .min(30_000)
2950
+ .max(3_600_000)
2951
+ .optional()
2952
+ .describe("HTTP request timeout in ms (min 30s, max 1h, default 10m)"),
2953
+ }, {
2954
+ title: "Grok API request",
2955
+ readOnlyHint: false,
2956
+ destructiveHint: false,
2957
+ idempotentHint: false,
2958
+ openWorldHint: true,
2959
+ }, async ({ prompt, promptParts, model, sessionId, createNewSession, correlationId, optimizePrompt, optimizeResponse, maxOutputTokens, temperature, topP, reasoningEffort, timeoutMs, }) => {
2960
+ return handleGrokApiRequest({ sessionManager, logger, runtime }, {
2961
+ prompt,
2962
+ promptParts,
2963
+ model,
2964
+ sessionId,
2965
+ createNewSession,
2966
+ correlationId,
2967
+ optimizePrompt,
2968
+ optimizeResponse,
2969
+ maxOutputTokens,
2970
+ temperature,
2971
+ topP,
2972
+ reasoningEffort,
2973
+ timeoutMs,
2974
+ });
2975
+ });
2976
+ }
2582
2977
  server.tool("claude_request", "Run a Claude Code CLI request synchronously (when async jobs are enabled, auto-defers to a pollable job past the sync deadline; otherwise runs to completion). Requires exactly one of prompt or promptParts.", {
2583
2978
  prompt: z
2584
2979
  .string()
@@ -2718,6 +3113,12 @@ export function createGatewayServer(deps = {}) {
2718
3113
  .boolean()
2719
3114
  .default(false)
2720
3115
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3116
+ }, {
3117
+ title: "Claude Code request",
3118
+ readOnlyHint: false,
3119
+ destructiveHint: true,
3120
+ idempotentHint: false,
3121
+ openWorldHint: true,
2721
3122
  }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, noSessionPersistence, settingSources, settings, tools, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2722
3123
  const startTime = Date.now();
2723
3124
  if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
@@ -2777,6 +3178,12 @@ export function createGatewayServer(deps = {}) {
2777
3178
  if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
2778
3179
  useContinue = true;
2779
3180
  }
3181
+ try {
3182
+ await getExistingSessionForProvider(sessionManager, effectiveSessionId, "claude");
3183
+ }
3184
+ catch (err) {
3185
+ return createErrorResponse("claude_request", 1, "", corrId, err);
3186
+ }
2780
3187
  const ttlWarning = maybeBuildCacheTtlWarning({
2781
3188
  runtime,
2782
3189
  sessionId: effectiveSessionId,
@@ -3019,6 +3426,12 @@ export function createGatewayServer(deps = {}) {
3019
3426
  .optional()
3020
3427
  .describe("Codex --add-dir <DIR>: additional writable workspace directories. Emitted once per entry on new sessions only; resume inherits the original session's writable-dir policy."),
3021
3428
  worktree: WORKTREE_SCHEMA.optional(),
3429
+ }, {
3430
+ title: "Codex request",
3431
+ readOnlyHint: false,
3432
+ destructiveHint: true,
3433
+ idempotentHint: false,
3434
+ openWorldHint: true,
3022
3435
  }, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, workingDir, addDir, worktree, }) => {
3023
3436
  const startTime = Date.now();
3024
3437
  const prep = prepareCodexRequest({
@@ -3056,6 +3469,12 @@ export function createGatewayServer(deps = {}) {
3056
3469
  const { corrId, args } = prep;
3057
3470
  let durationMs = 0;
3058
3471
  let wasSuccessful = false;
3472
+ try {
3473
+ await getExistingSessionForProvider(sessionManager, sessionId, "codex");
3474
+ }
3475
+ catch (err) {
3476
+ return createErrorResponse("codex_request", 1, "", corrId, err);
3477
+ }
3059
3478
  safeFlightStart({
3060
3479
  correlationId: corrId,
3061
3480
  cli: "codex",
@@ -3191,6 +3610,12 @@ export function createGatewayServer(deps = {}) {
3191
3610
  .max(3_600_000)
3192
3611
  .optional()
3193
3612
  .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3613
+ }, {
3614
+ title: "Fork Codex session",
3615
+ readOnlyHint: false,
3616
+ destructiveHint: true,
3617
+ idempotentHint: false,
3618
+ openWorldHint: true,
3194
3619
  }, async ({ prompt, sessionId, forkLast, model, sandboxMode, askForApproval, correlationId, idleTimeoutMs, }) => {
3195
3620
  const corrId = correlationId || randomUUID();
3196
3621
  const startTime = Date.now();
@@ -3202,6 +3627,12 @@ export function createGatewayServer(deps = {}) {
3202
3627
  if (!sessionId && !forkLast) {
3203
3628
  return createErrorResponse("codex_fork_session", 1, "", corrId, new Error("one of sessionId or forkLast is required"));
3204
3629
  }
3630
+ try {
3631
+ await getExistingSessionForProvider(sessionManager, sessionId, "codex");
3632
+ }
3633
+ catch (err) {
3634
+ return createErrorResponse("codex_fork_session", 1, "", corrId, err);
3635
+ }
3205
3636
  let forkArgs;
3206
3637
  try {
3207
3638
  forkArgs = prepareCodexForkRequest({ prompt, sessionId, forkLast }).args;
@@ -3317,6 +3748,12 @@ export function createGatewayServer(deps = {}) {
3317
3748
  .optional()
3318
3749
  .describe("Emit `--yolo` to auto-approve all actions. Equivalent to approvalMode 'yolo'; routed through the same approval gate. Under mcp_managed the gate still decides."),
3319
3750
  worktree: WORKTREE_SCHEMA.optional(),
3751
+ }, {
3752
+ title: "Gemini request",
3753
+ readOnlyHint: false,
3754
+ destructiveHint: true,
3755
+ idempotentHint: false,
3756
+ openWorldHint: true,
3320
3757
  }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, yolo, worktree, }) => {
3321
3758
  return handleGeminiRequest({ sessionManager, logger, runtime }, {
3322
3759
  prompt,
@@ -3521,6 +3958,12 @@ export function createGatewayServer(deps = {}) {
3521
3958
  .optional()
3522
3959
  .describe("Grok -w/--worktree: native CLI worktree flag (`true` → bare `--worktree`, string → named). NOT gateway slice λ `worktree`."),
3523
3960
  worktree: WORKTREE_SCHEMA.optional(),
3961
+ }, {
3962
+ title: "Grok request",
3963
+ readOnlyHint: false,
3964
+ destructiveHint: true,
3965
+ idempotentHint: false,
3966
+ openWorldHint: true,
3524
3967
  }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, compactionMode, compactionDetail, agent, bestOfN, check, disableWebSearch, todoGate, verbatim, agents, promptFile, promptJson, single, experimentalMemory, noAltScreen, noMemory, noPlan, noSubagents, oauth, restoreCode, leaderSocket, nativeWorktree, worktree, }) => {
3525
3968
  return handleGrokRequest({ sessionManager, logger, runtime }, {
3526
3969
  prompt,
@@ -3655,6 +4098,12 @@ export function createGatewayServer(deps = {}) {
3655
4098
  .optional()
3656
4099
  .describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance (Vibe states this flag may be specified multiple times)."),
3657
4100
  worktree: WORKTREE_SCHEMA.optional(),
4101
+ }, {
4102
+ title: "Mistral Vibe request",
4103
+ readOnlyHint: false,
4104
+ destructiveHint: true,
4105
+ idempotentHint: false,
4106
+ openWorldHint: true,
3658
4107
  }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, maxTokens, workingDir, addDir, worktree, }) => {
3659
4108
  return handleMistralRequest({ sessionManager, logger, runtime }, {
3660
4109
  prompt,
@@ -3823,6 +4272,12 @@ export function createGatewayServer(deps = {}) {
3823
4272
  .boolean()
3824
4273
  .default(false)
3825
4274
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
4275
+ }, {
4276
+ title: "Claude Code request (async job)",
4277
+ readOnlyHint: false,
4278
+ destructiveHint: true,
4279
+ idempotentHint: false,
4280
+ openWorldHint: true,
3826
4281
  }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, noSessionPersistence, settingSources, settings, tools, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3827
4282
  if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
3828
4283
  return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
@@ -3874,6 +4329,7 @@ export function createGatewayServer(deps = {}) {
3874
4329
  if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
3875
4330
  useContinue = true;
3876
4331
  }
4332
+ const existingSession = await getExistingSessionForProvider(sessionManager, effectiveSessionId, "claude");
3877
4333
  if (useContinue) {
3878
4334
  args.push("--continue");
3879
4335
  }
@@ -3882,7 +4338,6 @@ export function createGatewayServer(deps = {}) {
3882
4338
  await sessionManager.updateSessionUsage(effectiveSessionId);
3883
4339
  }
3884
4340
  if (effectiveSessionId) {
3885
- const existingSession = await sessionManager.getSession(effectiveSessionId);
3886
4341
  if (!existingSession) {
3887
4342
  await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
3888
4343
  }
@@ -4035,6 +4490,12 @@ export function createGatewayServer(deps = {}) {
4035
4490
  .optional()
4036
4491
  .describe("Codex --add-dir <DIR>: additional writable workspace directories (repeat per entry). New sessions only."),
4037
4492
  worktree: WORKTREE_SCHEMA.optional(),
4493
+ }, {
4494
+ title: "Codex request (async job)",
4495
+ readOnlyHint: false,
4496
+ destructiveHint: true,
4497
+ idempotentHint: false,
4498
+ openWorldHint: true,
4038
4499
  }, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, workingDir, addDir, worktree, }) => {
4039
4500
  return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
4040
4501
  prompt,
@@ -4138,6 +4599,12 @@ export function createGatewayServer(deps = {}) {
4138
4599
  .optional()
4139
4600
  .describe("Emit `--yolo` to auto-approve all actions. Equivalent to approvalMode 'yolo'; routed through the same approval gate. Under mcp_managed the gate still decides."),
4140
4601
  worktree: WORKTREE_SCHEMA.optional(),
4602
+ }, {
4603
+ title: "Gemini request (async job)",
4604
+ readOnlyHint: false,
4605
+ destructiveHint: true,
4606
+ idempotentHint: false,
4607
+ openWorldHint: true,
4141
4608
  }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, yolo, worktree, }) => {
4142
4609
  return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
4143
4610
  prompt,
@@ -4343,6 +4810,12 @@ export function createGatewayServer(deps = {}) {
4343
4810
  .optional()
4344
4811
  .describe("Grok -w/--worktree: native CLI worktree flag (`true` → bare `--worktree`, string → named). NOT gateway slice λ `worktree`."),
4345
4812
  worktree: WORKTREE_SCHEMA.optional(),
4813
+ }, {
4814
+ title: "Grok request (async job)",
4815
+ readOnlyHint: false,
4816
+ destructiveHint: true,
4817
+ idempotentHint: false,
4818
+ openWorldHint: true,
4346
4819
  }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, compactionMode, compactionDetail, agent, bestOfN, check, disableWebSearch, todoGate, verbatim, agents, promptFile, promptJson, single, experimentalMemory, noAltScreen, noMemory, noPlan, noSubagents, oauth, restoreCode, leaderSocket, nativeWorktree, worktree, }) => {
4347
4820
  return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
4348
4821
  prompt,
@@ -4475,6 +4948,12 @@ export function createGatewayServer(deps = {}) {
4475
4948
  .optional()
4476
4949
  .describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance."),
4477
4950
  worktree: WORKTREE_SCHEMA.optional(),
4951
+ }, {
4952
+ title: "Mistral Vibe request (async job)",
4953
+ readOnlyHint: false,
4954
+ destructiveHint: true,
4955
+ idempotentHint: false,
4956
+ openWorldHint: true,
4478
4957
  }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, maxTokens, workingDir, addDir, worktree, }) => {
4479
4958
  return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
4480
4959
  prompt,
@@ -4505,6 +4984,12 @@ export function createGatewayServer(deps = {}) {
4505
4984
  });
4506
4985
  server.tool("llm_job_status", "Check lifecycle status (running|completed|failed|canceled|orphaned) of a gateway async or deferred-sync job by jobId.", {
4507
4986
  jobId: z.string().describe("Async job ID from *_request_async"),
4987
+ }, {
4988
+ title: "Async job status",
4989
+ readOnlyHint: true,
4990
+ destructiveHint: false,
4991
+ idempotentHint: true,
4992
+ openWorldHint: false,
4508
4993
  }, async ({ jobId }) => {
4509
4994
  const job = asyncJobManager.getJobSnapshot(jobId);
4510
4995
  if (!job) {
@@ -4543,6 +5028,12 @@ export function createGatewayServer(deps = {}) {
4543
5028
  .max(2000000)
4544
5029
  .default(200000)
4545
5030
  .describe("Max chars returned per stream"),
5031
+ }, {
5032
+ title: "Async job result",
5033
+ readOnlyHint: true,
5034
+ destructiveHint: false,
5035
+ idempotentHint: true,
5036
+ openWorldHint: false,
4546
5037
  }, async ({ jobId, maxChars }) => {
4547
5038
  const result = asyncJobManager.getJobResult(jobId, maxChars);
4548
5039
  if (!result) {
@@ -4590,6 +5081,12 @@ export function createGatewayServer(deps = {}) {
4590
5081
  });
4591
5082
  server.tool("llm_job_cancel", "Cancel a running gateway async or deferred-sync job by jobId.", {
4592
5083
  jobId: z.string().describe("Async job ID from *_request_async"),
5084
+ }, {
5085
+ title: "Cancel async job",
5086
+ readOnlyHint: false,
5087
+ destructiveHint: true,
5088
+ idempotentHint: true,
5089
+ openWorldHint: false,
4593
5090
  }, async ({ jobId }) => {
4594
5091
  const cancel = asyncJobManager.cancelJob(jobId);
4595
5092
  if (!cancel.canceled) {
@@ -4636,6 +5133,12 @@ export function createGatewayServer(deps = {}) {
4636
5133
  .boolean()
4637
5134
  .default(false)
4638
5135
  .describe("Include the full persisted prompt text in the result"),
5136
+ }, {
5137
+ title: "Persisted request lookup",
5138
+ readOnlyHint: true,
5139
+ destructiveHint: false,
5140
+ idempotentHint: true,
5141
+ openWorldHint: false,
4639
5142
  }, async ({ correlationId, maxChars, includePrompt }) => {
4640
5143
  const record = readPersistedRequest(flightRecorder, correlationId, {
4641
5144
  maxChars,
@@ -4666,7 +5169,13 @@ export function createGatewayServer(deps = {}) {
4666
5169
  ],
4667
5170
  };
4668
5171
  });
4669
- server.tool("llm_process_health", "Report gateway process health: async-job manager state plus the resolved persistence configuration and paths.", {}, async () => {
5172
+ server.tool("llm_process_health", "Report gateway process health: async-job manager state plus the resolved persistence configuration and paths.", {}, {
5173
+ title: "Gateway process health",
5174
+ readOnlyHint: true,
5175
+ destructiveHint: false,
5176
+ idempotentHint: true,
5177
+ openWorldHint: false,
5178
+ }, async () => {
4670
5179
  const health = asyncJobManager.getJobHealth();
4671
5180
  const persistenceBlock = {
4672
5181
  backend: persistence.backend,
@@ -4681,11 +5190,33 @@ export function createGatewayServer(deps = {}) {
4681
5190
  ? null
4682
5191
  : "Async job persistence is disabled (backend = 'none'). *_request_async tools are NOT registered on this gateway. Set [persistence].backend = 'sqlite' (or 'memory' + acknowledgeEphemeral = true) to enable them.",
4683
5192
  };
5193
+ const outboundProviders = {
5194
+ xai: providers.xai
5195
+ ? {
5196
+ configured: true,
5197
+ enabled: isXaiProviderEnabled(providers),
5198
+ apiKeyEnv: providers.xai.apiKeyEnv,
5199
+ apiKeyPresent: isXaiProviderEnabled(providers),
5200
+ baseUrl: providers.xai.baseUrl,
5201
+ defaultModel: providers.xai.defaultModel,
5202
+ mode: isXaiProviderEnabled(providers) ? "sync" : "configured-missing-key",
5203
+ }
5204
+ : {
5205
+ configured: false,
5206
+ enabled: false,
5207
+ apiKeyEnv: null,
5208
+ apiKeyPresent: false,
5209
+ baseUrl: null,
5210
+ defaultModel: null,
5211
+ mode: "disabled",
5212
+ },
5213
+ sources: providers.sources,
5214
+ };
4684
5215
  return {
4685
5216
  content: [
4686
5217
  {
4687
5218
  type: "text",
4688
- text: JSON.stringify({ success: true, ...health, persistence: persistenceBlock }, null, 2),
5219
+ text: JSON.stringify({ success: true, ...health, persistence: persistenceBlock, outboundProviders }, null, 2),
4689
5220
  },
4690
5221
  ],
4691
5222
  };
@@ -4702,6 +5233,12 @@ export function createGatewayServer(deps = {}) {
4702
5233
  .enum(["claude", "codex", "gemini", "grok", "mistral"])
4703
5234
  .optional()
4704
5235
  .describe("Optional CLI filter"),
5236
+ }, {
5237
+ title: "Approval decisions",
5238
+ readOnlyHint: true,
5239
+ destructiveHint: false,
5240
+ idempotentHint: true,
5241
+ openWorldHint: false,
4705
5242
  }, async ({ limit, cli }) => {
4706
5243
  const approvals = approvalManager.list(limit, cli);
4707
5244
  return {
@@ -4721,6 +5258,12 @@ export function createGatewayServer(deps = {}) {
4721
5258
  cli: z
4722
5259
  .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
4723
5260
  .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
5261
+ }, {
5262
+ title: "Provider models",
5263
+ readOnlyHint: true,
5264
+ destructiveHint: false,
5265
+ idempotentHint: true,
5266
+ openWorldHint: false,
4724
5267
  }, async ({ cli }) => {
4725
5268
  const cliInfo = getAvailableCliInfo();
4726
5269
  const result = cli ? { [cli]: cliInfo[cli] } : cliInfo;
@@ -4730,22 +5273,161 @@ export function createGatewayServer(deps = {}) {
4730
5273
  cli: z
4731
5274
  .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
4732
5275
  .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
5276
+ }, {
5277
+ title: "Provider CLI versions",
5278
+ readOnlyHint: true,
5279
+ destructiveHint: false,
5280
+ idempotentHint: true,
5281
+ openWorldHint: false,
4733
5282
  }, async ({ cli }) => {
4734
5283
  const versions = await getCliVersions(cli);
4735
5284
  return { content: [{ type: "text", text: JSON.stringify({ versions }, null, 2) }] };
4736
5285
  });
4737
5286
  server.tool("upstream_contracts", "Return the gateway's declared provider CLI contracts; with probeInstalled true, diff against installed --help surfaces to detect flag drift.", {
4738
5287
  cli: z
4739
- .preprocess(value => (value === "" || value === null ? undefined : value), SESSION_PROVIDER_ENUM.optional())
5288
+ .preprocess(value => (value === "" || value === null ? undefined : value), CLI_TYPE_ENUM.optional())
4740
5289
  .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
4741
5290
  probeInstalled: z
4742
5291
  .boolean()
4743
5292
  .default(false)
4744
5293
  .describe("When true, run local --help probes and compare advertised flags against the declared contract. Strongly recommended after any provider CLI upgrade to detect drift."),
5294
+ }, {
5295
+ title: "Provider CLI contracts",
5296
+ readOnlyHint: true,
5297
+ destructiveHint: false,
5298
+ idempotentHint: true,
5299
+ openWorldHint: false,
4745
5300
  }, async ({ cli, probeInstalled }) => {
4746
5301
  const report = buildUpstreamContractReport({ cli, probeInstalled });
4747
5302
  return { content: [{ type: "text", text: JSON.stringify(report, null, 2) }] };
4748
5303
  });
5304
+ server.tool("provider_subcommands_list", "Return a compact, filterable read-only catalog of declared provider CLI subcommands without flags or raw help.", {
5305
+ provider: z
5306
+ .preprocess(value => (value === "" || value === null ? undefined : value), CLI_TYPE_ENUM.optional())
5307
+ .describe("Optional provider filter (claude|codex|gemini|grok|mistral)"),
5308
+ tier: z
5309
+ .enum(["catalog", "inspect", "execute_candidate", "diagnostic"])
5310
+ .optional()
5311
+ .describe("Optional subcommand tier filter"),
5312
+ risk: z
5313
+ .enum([
5314
+ "read_only",
5315
+ "writes_local_config",
5316
+ "auth",
5317
+ "network",
5318
+ "starts_server",
5319
+ "updates_binary",
5320
+ "destructive",
5321
+ "executes_agent",
5322
+ ])
5323
+ .optional()
5324
+ .describe("Optional risk classification filter"),
5325
+ exposure: z
5326
+ .enum(["tracked_only", "mcp_readonly", "mcp_requires_approval", "not_exposed"])
5327
+ .optional()
5328
+ .describe("Optional MCP exposure filter"),
5329
+ commandPathPrefix: z
5330
+ .array(z.string().min(1))
5331
+ .optional()
5332
+ .describe("Optional command path prefix filter, e.g. ['agent']"),
5333
+ }, {
5334
+ title: "Provider subcommands catalog",
5335
+ readOnlyHint: true,
5336
+ destructiveHint: false,
5337
+ idempotentHint: true,
5338
+ openWorldHint: false,
5339
+ }, async ({ provider, tier, risk, exposure, commandPathPrefix }) => {
5340
+ const catalog = buildProviderSubcommandsCompactCatalog({
5341
+ provider,
5342
+ tier,
5343
+ risk,
5344
+ exposure,
5345
+ commandPathPrefix,
5346
+ });
5347
+ return {
5348
+ content: [
5349
+ {
5350
+ type: "text",
5351
+ text: JSON.stringify({ ...catalog, total: catalog.rows.length }),
5352
+ },
5353
+ ],
5354
+ };
5355
+ });
5356
+ server.tool("provider_subcommand_contract", "Return the detailed read-only contract for exactly one declared provider CLI subcommand.", {
5357
+ provider: CLI_TYPE_ENUM.describe("Provider (claude|codex|gemini|grok|mistral)"),
5358
+ commandPath: z.array(z.string().min(1)).min(1).describe("Command path segments"),
5359
+ }, {
5360
+ title: "Provider subcommand contract",
5361
+ readOnlyHint: true,
5362
+ destructiveHint: false,
5363
+ idempotentHint: true,
5364
+ openWorldHint: false,
5365
+ }, async ({ provider, commandPath }) => {
5366
+ const contract = getCliSubcommandContract(provider, commandPath);
5367
+ const payload = contract
5368
+ ? {
5369
+ schemaVersion: "provider-subcommand-contract.v1",
5370
+ contract: serializeCliSubcommandContract(provider, contract),
5371
+ }
5372
+ : {
5373
+ schemaVersion: "provider-subcommand-contract.v1",
5374
+ error: `No declared ${provider} subcommand contract for ${commandPath.join(" ")}`,
5375
+ };
5376
+ return { content: [{ type: "text", text: JSON.stringify(payload, null, 2) }] };
5377
+ });
5378
+ server.tool("provider_subcommand_drift", "Probe declared provider subcommand --help surfaces and return compact drift rows without raw help output.", {
5379
+ provider: z
5380
+ .preprocess(value => (value === "" || value === null ? undefined : value), CLI_TYPE_ENUM.optional())
5381
+ .describe("Optional provider filter (claude|codex|gemini|grok|mistral)"),
5382
+ includeClean: z
5383
+ .boolean()
5384
+ .default(false)
5385
+ .describe("When false, return only unavailable or drifted command paths"),
5386
+ }, {
5387
+ title: "Provider subcommand drift",
5388
+ readOnlyHint: true,
5389
+ destructiveHint: false,
5390
+ idempotentHint: true,
5391
+ openWorldHint: false,
5392
+ }, async ({ provider, includeClean }) => {
5393
+ const providers = provider ? [provider] : CLI_TYPES;
5394
+ const rows = providers.flatMap(cli => {
5395
+ const probe = probeInstalledCliContract(cli);
5396
+ return Object.values(probe.subcommands).flatMap(sub => {
5397
+ const drifted = !sub.available || sub.extraFlags.length > 0 || sub.missingFlags.length > 0;
5398
+ if (!includeClean && !drifted)
5399
+ return [];
5400
+ return [
5401
+ {
5402
+ provider: cli,
5403
+ commandPath: sub.commandPath,
5404
+ driftStatus: drifted ? "drift" : "clean",
5405
+ available: sub.available,
5406
+ extraVsContract: sub.extraFlags,
5407
+ missingFromBinary: sub.missingFlags,
5408
+ helpHash: sub.helpHash ?? null,
5409
+ risk: sub.risk,
5410
+ exposure: sub.exposure,
5411
+ tier: sub.tier,
5412
+ summary: sub.summary,
5413
+ warnings: sub.warnings,
5414
+ },
5415
+ ];
5416
+ });
5417
+ });
5418
+ return {
5419
+ content: [
5420
+ {
5421
+ type: "text",
5422
+ text: JSON.stringify({
5423
+ schemaVersion: "provider-subcommand-drift.v1",
5424
+ total: rows.length,
5425
+ rows,
5426
+ }),
5427
+ },
5428
+ ],
5429
+ };
5430
+ });
4749
5431
  server.tool("cli_upgrade", "Plan (dryRun, default true) or execute an upgrade for one provider CLI using its native update mechanism.", {
4750
5432
  cli: z.enum(["claude", "codex", "gemini", "grok", "mistral"]).describe("CLI to upgrade"),
4751
5433
  target: z
@@ -4764,6 +5446,12 @@ export function createGatewayServer(deps = {}) {
4764
5446
  .max(3_600_000)
4765
5447
  .optional()
4766
5448
  .describe("Upgrade timeout in ms when dryRun=false"),
5449
+ }, {
5450
+ title: "Upgrade provider CLI",
5451
+ readOnlyHint: false,
5452
+ destructiveHint: true,
5453
+ idempotentHint: false,
5454
+ openWorldHint: true,
4767
5455
  }, async ({ cli, target, dryRun, timeoutMs }) => {
4768
5456
  try {
4769
5457
  const result = await runCliUpgrade({ cli, target, dryRun, timeoutMs, logger });
@@ -4795,10 +5483,16 @@ export function createGatewayServer(deps = {}) {
4795
5483
  };
4796
5484
  }
4797
5485
  });
4798
- server.tool("session_create", "Create a gateway session record for a provider CLI. NOTE: this is gateway bookkeeping (gw-* ID), not a provider-native session — Codex resume needs a real Codex UUID.", {
4799
- cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
5486
+ server.tool("session_create", "Create a gateway session record for a provider. NOTE: this is gateway bookkeeping (gw-* ID), not a provider-native session — Codex resume needs a real Codex UUID.", {
5487
+ cli: SESSION_PROVIDER_ENUM.describe("Provider type (claude|codex|gemini|grok|mistral|grok-api)"),
4800
5488
  description: z.string().optional().describe("Session description"),
4801
5489
  setAsActive: z.boolean().default(true).describe("Set as active session"),
5490
+ }, {
5491
+ title: "Create session record",
5492
+ readOnlyHint: false,
5493
+ destructiveHint: false,
5494
+ idempotentHint: false,
5495
+ openWorldHint: false,
4802
5496
  }, async ({ cli, description, setAsActive }) => {
4803
5497
  try {
4804
5498
  const session = await sessionManager.createSession(cli, description);
@@ -4828,18 +5522,21 @@ export function createGatewayServer(deps = {}) {
4828
5522
  return createErrorResponse("session_create", 1, "", undefined, error);
4829
5523
  }
4830
5524
  });
4831
- server.tool("session_list", "List gateway session records and the active session per CLI, optionally filtered by CLI.", {
4832
- cli: SESSION_PROVIDER_ENUM.optional().describe("CLI filter (claude|codex|gemini|grok|mistral)"),
5525
+ server.tool("session_list", "List gateway session records and the active session per provider, optionally filtered by provider.", {
5526
+ cli: SESSION_PROVIDER_ENUM.optional().describe("Provider filter (claude|codex|gemini|grok|mistral|grok-api)"),
5527
+ }, {
5528
+ title: "List sessions",
5529
+ readOnlyHint: true,
5530
+ destructiveHint: false,
5531
+ idempotentHint: true,
5532
+ openWorldHint: false,
4833
5533
  }, async ({ cli }) => {
4834
5534
  try {
4835
5535
  const sessions = await sessionManager.listSessions(cli);
4836
- const activeSessions = {
4837
- claude: await sessionManager.getActiveSession("claude"),
4838
- codex: await sessionManager.getActiveSession("codex"),
4839
- gemini: await sessionManager.getActiveSession("gemini"),
4840
- grok: await sessionManager.getActiveSession("grok"),
4841
- mistral: await sessionManager.getActiveSession("mistral"),
4842
- };
5536
+ const activeSessions = Object.fromEntries(await Promise.all(SESSION_PROVIDER_VALUES.map(async (provider) => [
5537
+ provider,
5538
+ await sessionManager.getActiveSession(provider),
5539
+ ])));
4843
5540
  const sessionList = sessions.map(s => ({
4844
5541
  id: s.id,
4845
5542
  cli: s.cli,
@@ -4855,13 +5552,10 @@ export function createGatewayServer(deps = {}) {
4855
5552
  text: JSON.stringify({
4856
5553
  total: sessionList.length,
4857
5554
  sessions: sessionList,
4858
- activeSessions: {
4859
- claude: activeSessions.claude?.id || null,
4860
- codex: activeSessions.codex?.id || null,
4861
- gemini: activeSessions.gemini?.id || null,
4862
- grok: activeSessions.grok?.id || null,
4863
- mistral: activeSessions.mistral?.id || null,
4864
- },
5555
+ activeSessions: Object.fromEntries(SESSION_PROVIDER_VALUES.map(provider => [
5556
+ provider,
5557
+ activeSessions[provider]?.id || null,
5558
+ ])),
4865
5559
  }, null, 2),
4866
5560
  },
4867
5561
  ],
@@ -4871,9 +5565,15 @@ export function createGatewayServer(deps = {}) {
4871
5565
  return createErrorResponse("session_list", 1, "", undefined, error);
4872
5566
  }
4873
5567
  });
4874
- server.tool("session_set_active", "Set or clear the active session for a CLI; the active session is used when a request omits sessionId.", {
4875
- cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
5568
+ server.tool("session_set_active", "Set or clear the active session for a provider; the active session is used when a request omits sessionId.", {
5569
+ cli: SESSION_PROVIDER_ENUM.describe("Provider type (claude|codex|gemini|grok|mistral|grok-api)"),
4876
5570
  sessionId: z.string().nullable().describe("Session ID (null to clear)"),
5571
+ }, {
5572
+ title: "Set active session",
5573
+ readOnlyHint: false,
5574
+ destructiveHint: false,
5575
+ idempotentHint: true,
5576
+ openWorldHint: false,
4877
5577
  }, async ({ cli, sessionId }) => {
4878
5578
  try {
4879
5579
  const success = await sessionManager.setActiveSession(cli, sessionId || null);
@@ -4884,7 +5584,7 @@ export function createGatewayServer(deps = {}) {
4884
5584
  type: "text",
4885
5585
  text: JSON.stringify({
4886
5586
  success: false,
4887
- error: "Session not found or does not belong to the specified CLI",
5587
+ error: "Session not found or does not belong to the specified provider",
4888
5588
  }, null, 2),
4889
5589
  },
4890
5590
  ],
@@ -4911,6 +5611,12 @@ export function createGatewayServer(deps = {}) {
4911
5611
  });
4912
5612
  server.tool("session_delete", "Delete a gateway session record by ID (also removes any gateway-owned worktree attached to it).", {
4913
5613
  sessionId: z.string().describe("Session ID"),
5614
+ }, {
5615
+ title: "Delete session",
5616
+ readOnlyHint: false,
5617
+ destructiveHint: true,
5618
+ idempotentHint: true,
5619
+ openWorldHint: false,
4914
5620
  }, async ({ sessionId }) => {
4915
5621
  try {
4916
5622
  const session = await sessionManager.getSession(sessionId);
@@ -4952,6 +5658,12 @@ export function createGatewayServer(deps = {}) {
4952
5658
  });
4953
5659
  server.tool("session_get", "Get one gateway session record by session ID, including recent request history when available.", {
4954
5660
  sessionId: z.string().describe("Session ID"),
5661
+ }, {
5662
+ title: "Get session",
5663
+ readOnlyHint: true,
5664
+ destructiveHint: false,
5665
+ idempotentHint: true,
5666
+ openWorldHint: false,
4955
5667
  }, async ({ sessionId }) => {
4956
5668
  try {
4957
5669
  const session = await sessionManager.getSession(sessionId);
@@ -5013,8 +5725,14 @@ export function createGatewayServer(deps = {}) {
5013
5725
  return createErrorResponse("session_get", 1, "", undefined, error);
5014
5726
  }
5015
5727
  });
5016
- server.tool("session_clear_all", "Delete all gateway session records, optionally scoped to one CLI.", {
5017
- cli: SESSION_PROVIDER_ENUM.optional().describe("CLI filter (claude|codex|gemini|grok|mistral)"),
5728
+ server.tool("session_clear_all", "Delete all gateway session records, optionally scoped to one provider.", {
5729
+ cli: SESSION_PROVIDER_ENUM.optional().describe("Provider filter (claude|codex|gemini|grok|mistral|grok-api)"),
5730
+ }, {
5731
+ title: "Clear sessions",
5732
+ readOnlyHint: false,
5733
+ destructiveHint: true,
5734
+ idempotentHint: true,
5735
+ openWorldHint: false,
5018
5736
  }, async ({ cli }) => {
5019
5737
  try {
5020
5738
  const count = await sessionManager.clearAllSessions(cli);
@@ -5164,9 +5882,7 @@ async function main() {
5164
5882
  if (args[0] === "contracts") {
5165
5883
  if (args.includes("--json")) {
5166
5884
  const cliArg = args.find(arg => arg.startsWith("--cli="))?.split("=")[1];
5167
- const cli = SESSION_PROVIDER_VALUES.includes(cliArg)
5168
- ? cliArg
5169
- : undefined;
5885
+ const cli = CLI_TYPES.includes(cliArg) ? cliArg : undefined;
5170
5886
  if (cliArg && !cli) {
5171
5887
  process.stderr.write(`Unsupported --cli value: ${cliArg}\n`);
5172
5888
  process.exit(2);