llm-cli-gateway 2.1.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,47 @@ All notable changes to the llm-cli-gateway project.
4
4
 
5
5
  ## Unreleased
6
6
 
7
+ ## [2.3.0] - 2026-06-08: MCP tool annotations and client safety hints
8
+
9
+ ### Added
10
+
11
+ - MCP tool annotations for all 37 tools (per MCP spec + tool-design best
12
+ practice): display `title` plus `readOnlyHint`/`destructiveHint`/
13
+ `idempotentHint`/`openWorldHint` on every registration. 14 pure-read tools
14
+ marked read-only/closed-world; `cli_upgrade`, `session_delete`,
15
+ `session_clear_all`, `llm_job_cancel` marked destructive; every
16
+ provider-spawning tool (requests, fork, validation) marked open-world with
17
+ destructive potential (spawned agentic CLIs can modify the environment).
18
+ Clients can use the hints for confirmation UX and safe auto-approval. New
19
+ invariant test pins titles, the exact destructive/read-only/open-world
20
+ sets, and the readOnly+destructive contradiction ban.
21
+
22
+ ## [2.2.0] - 2026-06-07: MCP tool-surface usability — self-describing tools
23
+
24
+ ### Added
25
+
26
+ - MCP tool-surface usability (4-seat cross-LLM review): all 37 tools now carry
27
+ action descriptions (previously none had tool-level descriptions — clients
28
+ that rank, search, or defer tools by description saw bare names); sync
29
+ `*_request` descriptions state the prompt/promptParts exactly-one rule and
30
+ conditional deferral; `job_status`/`job_result` vs `llm_job_*` and the
31
+ local-only `compare_answers` are disambiguated; session/`sessionId`
32
+ describes gain per-provider resume semantics parity.
33
+
34
+ ### Fixed
35
+
36
+ - Codex gateway-bookkeeping sessions are now created with the reserved `gw-`
37
+ prefix (4 sites), so resuming a gateway ID fails fast with an actionable
38
+ error instead of reaching `codex exec resume` and dying with "no rollout
39
+ found" (root cause of real-world resume failures).
40
+ - Server instructions are now built per-server from the same derived gate as
41
+ tool registration (backend, asyncJobsEnabled, hasStore()), so a
42
+ `backend = "none"` gateway no longer advertises unregistered
43
+ `*_request_async`/`llm_job_*` tools.
44
+ - Sync auto-deferral is disabled when async jobs are unavailable — previously
45
+ a request could defer into an in-memory job whose polling tools were not
46
+ registered (dead-end jobId).
47
+
7
48
  ## [2.1.0] - 2026-06-07: Grok Build 0.2.32, probe drift acknowledgement, docs currency
8
49
 
9
50
  ### Added
@@ -221,7 +262,7 @@ to end with a verdaccio reproduction.
221
262
  - Consumer `npm ls` exits ELSPROBLEMS: the pinned `tar-stream@3.1.7` sits
222
263
  outside `tar-fs`'s `^2.1.4` range. Inherent to the out-of-range pin; disappears
223
264
  in 2.0.0 (Phase B / node:sqlite) when the `better-sqlite3 → prebuild-install
224
- → tar-fs` chain leaves the prod graph entirely.
265
+ → tar-fs` chain leaves the prod graph entirely.
225
266
  - Local-tarball installs still resolve `tar-stream@2.2.0` (shrinkwrap ignored on
226
267
  that path); the audit's advisory carve-out stays until Phase B.
227
268
 
package/dist/index.d.ts CHANGED
@@ -44,6 +44,7 @@ declare const logger: {
44
44
  debug: (message: string, ...args: any[]) => void;
45
45
  };
46
46
  type GatewayLogger = typeof logger;
47
+ export declare function buildServerInstructions(asyncJobsEnabled: boolean): string;
47
48
  export declare const MAX_TURNS_SCHEMA: z.ZodNumber;
48
49
  export declare const MAX_TOKENS_SCHEMA: z.ZodNumber;
49
50
  export declare const MAX_PRICE_SCHEMA: z.ZodNumber;
package/dist/index.js CHANGED
@@ -141,16 +141,21 @@ function loadSkills() {
141
141
  return skills;
142
142
  }
143
143
  const loadedSkills = loadSkills();
144
- const SERVER_INSTRUCTIONS = `llm-cli-gateway: Multi-LLM orchestration via MCP.
144
+ export function buildServerInstructions(asyncJobsEnabled) {
145
+ const asyncToolsNote = asyncJobsEnabled ? " | *_request_async (async)" : "";
146
+ const jobsLine = asyncJobsEnabled ? "Jobs: llm_job_status, llm_job_result, llm_job_cancel\n" : "";
147
+ const deferralLine = asyncJobsEnabled
148
+ ? `- Sync auto-defers at ${SYNC_DEADLINE_MS}ms. Poll deferred jobs via llm_job_status/llm_job_result.`
149
+ : '- Async jobs are DISABLED (persistence.backend = "none"): *_request_async and llm_job_* tools are not registered, and sync requests run to completion (no auto-deferral).';
150
+ return `llm-cli-gateway: Multi-LLM orchestration via MCP.
145
151
 
146
- Tools: claude_request, codex_request, gemini_request, grok_request, mistral_request (sync) | *_request_async (async) | codex_fork_session (fork a Codex session into a new branch)
152
+ Tools: claude_request, codex_request, gemini_request, grok_request, mistral_request (sync)${asyncToolsNote} | codex_fork_session (fork a Codex session into a new branch)
147
153
  Validation: validate_with_models, second_opinion, compare_answers, red_team_review, consensus_check, ask_model, synthesize_validation, list_available_models | job_status/job_result (validation jobs)
148
- Jobs: llm_job_status, llm_job_result, llm_job_cancel
149
- Sessions: session_create, session_list, session_set_active, session_get, session_delete, session_clear_all
154
+ ${jobsLine}Sessions: session_create, session_list, session_set_active, session_get, session_delete, session_clear_all
150
155
  Other: list_models, cli_versions, upstream_contracts (use --probe-installed after CLI upgrades to detect drift), cli_upgrade, approval_list, llm_process_health, llm_request_result (read back any persisted request — sync or async — by correlationId)
151
156
 
152
157
  Key behaviors:
153
- - Sync auto-defers at ${SYNC_DEADLINE_MS}ms. Poll deferred jobs via llm_job_status/llm_job_result.
158
+ ${deferralLine}
154
159
  - Sessions: Claude --continue, Gemini --resume, Grok --resume/--continue, Mistral --resume/--continue (current Vibe defaults session logging on; doctor flags explicit session_logging.enabled=false), Codex \`exec resume <ID>\` / \`exec resume --last\` (all real CLI continuity). For Codex, sessionId must be a real Codex UUID (from ~/.codex/sessions/); gateway-generated gw-* IDs are rejected.
155
160
  - Approval gates: opt-in via approvalStrategy:"mcp_managed".
156
161
  - Upstream drift detection: After upgrading any provider CLI (especially grok), use the upstream_contracts tool with probeInstalled: true (or the CLI command "llm-cli-gateway contracts --json --probe-installed"). This is the primary reliable way to detect when an installed binary has gained or lost flags compared to the gateway's declared contract. The probe is safe and read-only.
@@ -158,8 +163,9 @@ Key behaviors:
158
163
 
159
164
  Skills (full docs via MCP resources):
160
165
  ${loadedSkills.map(s => `- skills://${s.name} — ${s.description}`).join("\n")}`;
161
- function newGatewayMcpServer() {
162
- return new McpServer({ name: "llm-cli-gateway", version: packageVersion() }, { instructions: SERVER_INSTRUCTIONS });
166
+ }
167
+ function newGatewayMcpServer(asyncJobsEnabled = true) {
168
+ return new McpServer({ name: "llm-cli-gateway", version: packageVersion() }, { instructions: buildServerInstructions(asyncJobsEnabled) });
163
169
  }
164
170
  let sessionManager;
165
171
  let db = null;
@@ -307,7 +313,10 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, f
307
313
  consumeOnComplete();
308
314
  throw err;
309
315
  }
310
- if (SYNC_DEADLINE_MS === 0) {
316
+ const deferralAvailable = runtime.persistence.backend !== "none" &&
317
+ runtime.persistence.asyncJobsEnabled &&
318
+ runtime.asyncJobManager.hasStore();
319
+ if (SYNC_DEADLINE_MS === 0 || !deferralAvailable) {
311
320
  const command = cli === "mistral" ? "vibe" : cli;
312
321
  try {
313
322
  return await executeCli(command, args, {
@@ -2503,7 +2512,7 @@ export async function handleCodexRequestAsync(deps, params) {
2503
2512
  effectiveSessionId = activeSession.id;
2504
2513
  }
2505
2514
  else {
2506
- const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
2515
+ const newSession = await deps.sessionManager.createSession("codex", "Codex Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
2507
2516
  effectiveSessionId = newSession.id;
2508
2517
  }
2509
2518
  }
@@ -2511,7 +2520,7 @@ export async function handleCodexRequestAsync(deps, params) {
2511
2520
  await deps.sessionManager.updateSessionUsage(params.sessionId);
2512
2521
  }
2513
2522
  else if (params.createNewSession) {
2514
- const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
2523
+ const newSession = await deps.sessionManager.createSession("codex", "Codex Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
2515
2524
  effectiveSessionId = newSession.id;
2516
2525
  }
2517
2526
  let worktreeResolution = {};
@@ -2567,10 +2576,10 @@ export function createGatewayServer(deps = {}) {
2567
2576
  void flightRecorder;
2568
2577
  void cacheAwareness;
2569
2578
  const asyncJobsEnabled = persistence.backend !== "none" && persistence.asyncJobsEnabled && asyncJobManager.hasStore();
2570
- const server = newGatewayMcpServer();
2579
+ const server = newGatewayMcpServer(asyncJobsEnabled);
2571
2580
  registerBaseResources(server, runtime);
2572
2581
  registerValidationTools(server, { asyncJobManager });
2573
- server.tool("claude_request", {
2582
+ server.tool("claude_request", "Run a Claude Code CLI request synchronously (when async jobs are enabled, auto-defers to a pollable job past the sync deadline; otherwise runs to completion). Requires exactly one of prompt or promptParts.", {
2574
2583
  prompt: z
2575
2584
  .string()
2576
2585
  .min(1, "Prompt cannot be empty")
@@ -2586,8 +2595,14 @@ export function createGatewayServer(deps = {}) {
2586
2595
  .enum(["text", "json", "stream-json"])
2587
2596
  .default("stream-json")
2588
2597
  .describe("Output format (text|json|stream-json). DEFAULT: stream-json — the gateway parses NDJSON usage events to extract input/output/cache_read/cache_creation tokens + cost + model, persists them to the flight recorder for cache_state aggregates, and still returns the assistant text. Override to 'text' only when you truly want unparsed stdout (loses observability)."),
2589
- sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
2590
- continueSession: z.boolean().default(false).describe("Continue active session"),
2598
+ sessionId: z
2599
+ .string()
2600
+ .optional()
2601
+ .describe("Gateway session record to associate (uses the active session if omitted). Claude continuity itself is via continueSession (--continue); this ID is gateway bookkeeping, not a Claude-native session."),
2602
+ continueSession: z
2603
+ .boolean()
2604
+ .default(false)
2605
+ .describe("Continue the most recent Claude conversation in this cwd (emits --continue; real CLI continuity)."),
2591
2606
  createNewSession: z.boolean().default(false).describe("Force new session"),
2592
2607
  allowedTools: z
2593
2608
  .array(z.string())
@@ -2703,6 +2718,12 @@ export function createGatewayServer(deps = {}) {
2703
2718
  .boolean()
2704
2719
  .default(false)
2705
2720
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2721
+ }, {
2722
+ title: "Claude Code request",
2723
+ readOnlyHint: false,
2724
+ destructiveHint: true,
2725
+ idempotentHint: false,
2726
+ openWorldHint: true,
2706
2727
  }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, noSessionPersistence, settingSources, settings, tools, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2707
2728
  const startTime = Date.now();
2708
2729
  if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
@@ -2897,7 +2918,7 @@ export function createGatewayServer(deps = {}) {
2897
2918
  performanceMetrics.recordRequest("claude", finalizedDurationMs, wasSuccessful);
2898
2919
  }
2899
2920
  });
2900
- server.tool("codex_request", {
2921
+ server.tool("codex_request", "Run an OpenAI Codex CLI request synchronously (when async jobs are enabled, auto-defers to a pollable job past the sync deadline; otherwise runs to completion). Requires exactly one of prompt or promptParts.", {
2901
2922
  prompt: z
2902
2923
  .string()
2903
2924
  .min(1, "Prompt cannot be empty")
@@ -3004,6 +3025,12 @@ export function createGatewayServer(deps = {}) {
3004
3025
  .optional()
3005
3026
  .describe("Codex --add-dir <DIR>: additional writable workspace directories. Emitted once per entry on new sessions only; resume inherits the original session's writable-dir policy."),
3006
3027
  worktree: WORKTREE_SCHEMA.optional(),
3028
+ }, {
3029
+ title: "Codex request",
3030
+ readOnlyHint: false,
3031
+ destructiveHint: true,
3032
+ idempotentHint: false,
3033
+ openWorldHint: true,
3007
3034
  }, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, workingDir, addDir, worktree, }) => {
3008
3035
  const startTime = Date.now();
3009
3036
  const prep = prepareCodexRequest({
@@ -3089,7 +3116,7 @@ export function createGatewayServer(deps = {}) {
3089
3116
  effectiveSessionId = activeSession.id;
3090
3117
  }
3091
3118
  else {
3092
- const newSession = await sessionManager.createSession("codex", "Codex Session");
3119
+ const newSession = await sessionManager.createSession("codex", "Codex Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
3093
3120
  effectiveSessionId = newSession.id;
3094
3121
  }
3095
3122
  }
@@ -3097,7 +3124,7 @@ export function createGatewayServer(deps = {}) {
3097
3124
  await sessionManager.updateSessionUsage(sessionId);
3098
3125
  }
3099
3126
  else if (createNewSession) {
3100
- const newSession = await sessionManager.createSession("codex", "Codex Session");
3127
+ const newSession = await sessionManager.createSession("codex", "Codex Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
3101
3128
  effectiveSessionId = newSession.id;
3102
3129
  }
3103
3130
  logger.info(`[${corrId}] codex_request completed successfully in ${durationMs}ms`);
@@ -3145,7 +3172,7 @@ export function createGatewayServer(deps = {}) {
3145
3172
  performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
3146
3173
  }
3147
3174
  });
3148
- server.tool("codex_fork_session", {
3175
+ server.tool("codex_fork_session", "Fork an existing Codex session into a new branch (codex fork <ID|--last>) and run a prompt against the fork without mutating the original.", {
3149
3176
  prompt: z
3150
3177
  .string()
3151
3178
  .min(1, "Prompt cannot be empty")
@@ -3176,6 +3203,12 @@ export function createGatewayServer(deps = {}) {
3176
3203
  .max(3_600_000)
3177
3204
  .optional()
3178
3205
  .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3206
+ }, {
3207
+ title: "Fork Codex session",
3208
+ readOnlyHint: false,
3209
+ destructiveHint: true,
3210
+ idempotentHint: false,
3211
+ openWorldHint: true,
3179
3212
  }, async ({ prompt, sessionId, forkLast, model, sandboxMode, askForApproval, correlationId, idleTimeoutMs, }) => {
3180
3213
  const corrId = correlationId || randomUUID();
3181
3214
  const startTime = Date.now();
@@ -3232,7 +3265,7 @@ export function createGatewayServer(deps = {}) {
3232
3265
  performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
3233
3266
  }
3234
3267
  });
3235
- server.tool("gemini_request", {
3268
+ server.tool("gemini_request", "Run a Google Gemini CLI request synchronously (when async jobs are enabled, auto-defers to a pollable job past the sync deadline; otherwise runs to completion). Requires exactly one of prompt or promptParts.", {
3236
3269
  prompt: z
3237
3270
  .string()
3238
3271
  .min(1, "Prompt cannot be empty")
@@ -3244,7 +3277,10 @@ export function createGatewayServer(deps = {}) {
3244
3277
  .string()
3245
3278
  .optional()
3246
3279
  .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
3247
- sessionId: z.string().optional().describe("Session ID or 'latest'"),
3280
+ sessionId: z
3281
+ .string()
3282
+ .optional()
3283
+ .describe("Gemini session ID to resume (emits --resume <id>), or 'latest' for the most recent session in this cwd"),
3248
3284
  resumeLatest: z.boolean().default(false).describe("Resume latest session"),
3249
3285
  createNewSession: z.boolean().default(false).describe("Force new session"),
3250
3286
  approvalMode: z
@@ -3299,6 +3335,12 @@ export function createGatewayServer(deps = {}) {
3299
3335
  .optional()
3300
3336
  .describe("Emit `--yolo` to auto-approve all actions. Equivalent to approvalMode 'yolo'; routed through the same approval gate. Under mcp_managed the gate still decides."),
3301
3337
  worktree: WORKTREE_SCHEMA.optional(),
3338
+ }, {
3339
+ title: "Gemini request",
3340
+ readOnlyHint: false,
3341
+ destructiveHint: true,
3342
+ idempotentHint: false,
3343
+ openWorldHint: true,
3302
3344
  }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, yolo, worktree, }) => {
3303
3345
  return handleGeminiRequest({ sessionManager, logger, runtime }, {
3304
3346
  prompt,
@@ -3328,7 +3370,7 @@ export function createGatewayServer(deps = {}) {
3328
3370
  worktree,
3329
3371
  });
3330
3372
  });
3331
- server.tool("grok_request", {
3373
+ server.tool("grok_request", "Run an xAI Grok CLI request synchronously (when async jobs are enabled, auto-defers to a pollable job past the sync deadline; otherwise runs to completion). Requires exactly one of prompt or promptParts.", {
3332
3374
  prompt: z
3333
3375
  .string()
3334
3376
  .min(1, "Prompt cannot be empty")
@@ -3344,7 +3386,7 @@ export function createGatewayServer(deps = {}) {
3344
3386
  sessionId: z
3345
3387
  .string()
3346
3388
  .optional()
3347
- .describe("Session ID (user-provided CLI handle for --resume)"),
3389
+ .describe("Provider-native session ID to resume (emits --resume <id>; use resumeLatest for --continue)"),
3348
3390
  resumeLatest: z
3349
3391
  .boolean()
3350
3392
  .default(false)
@@ -3503,6 +3545,12 @@ export function createGatewayServer(deps = {}) {
3503
3545
  .optional()
3504
3546
  .describe("Grok -w/--worktree: native CLI worktree flag (`true` → bare `--worktree`, string → named). NOT gateway slice λ `worktree`."),
3505
3547
  worktree: WORKTREE_SCHEMA.optional(),
3548
+ }, {
3549
+ title: "Grok request",
3550
+ readOnlyHint: false,
3551
+ destructiveHint: true,
3552
+ idempotentHint: false,
3553
+ openWorldHint: true,
3506
3554
  }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, compactionMode, compactionDetail, agent, bestOfN, check, disableWebSearch, todoGate, verbatim, agents, promptFile, promptJson, single, experimentalMemory, noAltScreen, noMemory, noPlan, noSubagents, oauth, restoreCode, leaderSocket, nativeWorktree, worktree, }) => {
3507
3555
  return handleGrokRequest({ sessionManager, logger, runtime }, {
3508
3556
  prompt,
@@ -3557,7 +3605,7 @@ export function createGatewayServer(deps = {}) {
3557
3605
  worktree,
3558
3606
  });
3559
3607
  });
3560
- server.tool("mistral_request", {
3608
+ server.tool("mistral_request", "Run a Mistral Vibe CLI request synchronously (when async jobs are enabled, auto-defers to a pollable job past the sync deadline; otherwise runs to completion). Requires exactly one of prompt or promptParts.", {
3561
3609
  prompt: z
3562
3610
  .string()
3563
3611
  .min(1, "Prompt cannot be empty")
@@ -3637,6 +3685,12 @@ export function createGatewayServer(deps = {}) {
3637
3685
  .optional()
3638
3686
  .describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance (Vibe states this flag may be specified multiple times)."),
3639
3687
  worktree: WORKTREE_SCHEMA.optional(),
3688
+ }, {
3689
+ title: "Mistral Vibe request",
3690
+ readOnlyHint: false,
3691
+ destructiveHint: true,
3692
+ idempotentHint: false,
3693
+ openWorldHint: true,
3640
3694
  }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, maxTokens, workingDir, addDir, worktree, }) => {
3641
3695
  return handleMistralRequest({ sessionManager, logger, runtime }, {
3642
3696
  prompt,
@@ -3667,7 +3721,7 @@ export function createGatewayServer(deps = {}) {
3667
3721
  });
3668
3722
  });
3669
3723
  if (asyncJobsEnabled) {
3670
- server.tool("claude_request_async", {
3724
+ server.tool("claude_request_async", "Start a Claude Code CLI request as a durable background job. Poll with llm_job_status, collect with llm_job_result.", {
3671
3725
  prompt: z
3672
3726
  .string()
3673
3727
  .min(1, "Prompt cannot be empty")
@@ -3683,8 +3737,14 @@ export function createGatewayServer(deps = {}) {
3683
3737
  .enum(["text", "json", "stream-json"])
3684
3738
  .default("stream-json")
3685
3739
  .describe("Output format (text|json|stream-json). DEFAULT: stream-json — same rationale as claude_request: keeps usage/cache/cost observable for cache_state aggregates. Override to 'text' only when raw stdout is required (loses observability)."),
3686
- sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
3687
- continueSession: z.boolean().default(false).describe("Continue active session"),
3740
+ sessionId: z
3741
+ .string()
3742
+ .optional()
3743
+ .describe("Gateway session record to associate (uses the active session if omitted). Claude continuity itself is via continueSession (--continue); this ID is gateway bookkeeping, not a Claude-native session."),
3744
+ continueSession: z
3745
+ .boolean()
3746
+ .default(false)
3747
+ .describe("Continue the most recent Claude conversation in this cwd (emits --continue; real CLI continuity)."),
3688
3748
  createNewSession: z.boolean().default(false).describe("Force new session"),
3689
3749
  allowedTools: z
3690
3750
  .array(z.string())
@@ -3799,6 +3859,12 @@ export function createGatewayServer(deps = {}) {
3799
3859
  .boolean()
3800
3860
  .default(false)
3801
3861
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3862
+ }, {
3863
+ title: "Claude Code request (async job)",
3864
+ readOnlyHint: false,
3865
+ destructiveHint: true,
3866
+ idempotentHint: false,
3867
+ openWorldHint: true,
3802
3868
  }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, noSessionPersistence, settingSources, settings, tools, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3803
3869
  if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
3804
3870
  return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
@@ -3920,7 +3986,7 @@ export function createGatewayServer(deps = {}) {
3920
3986
  return createErrorResponse("claude_request_async", 1, "", corrId, error);
3921
3987
  }
3922
3988
  });
3923
- server.tool("codex_request_async", {
3989
+ server.tool("codex_request_async", "Start an OpenAI Codex CLI request as a durable background job. Poll with llm_job_status, collect with llm_job_result.", {
3924
3990
  prompt: z
3925
3991
  .string()
3926
3992
  .min(1, "Prompt cannot be empty")
@@ -4011,6 +4077,12 @@ export function createGatewayServer(deps = {}) {
4011
4077
  .optional()
4012
4078
  .describe("Codex --add-dir <DIR>: additional writable workspace directories (repeat per entry). New sessions only."),
4013
4079
  worktree: WORKTREE_SCHEMA.optional(),
4080
+ }, {
4081
+ title: "Codex request (async job)",
4082
+ readOnlyHint: false,
4083
+ destructiveHint: true,
4084
+ idempotentHint: false,
4085
+ openWorldHint: true,
4014
4086
  }, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, workingDir, addDir, worktree, }) => {
4015
4087
  return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
4016
4088
  prompt,
@@ -4045,7 +4117,7 @@ export function createGatewayServer(deps = {}) {
4045
4117
  worktree,
4046
4118
  });
4047
4119
  });
4048
- server.tool("gemini_request_async", {
4120
+ server.tool("gemini_request_async", "Start a Google Gemini CLI request as a durable background job. Poll with llm_job_status, collect with llm_job_result.", {
4049
4121
  prompt: z
4050
4122
  .string()
4051
4123
  .min(1, "Prompt cannot be empty")
@@ -4060,7 +4132,7 @@ export function createGatewayServer(deps = {}) {
4060
4132
  sessionId: z
4061
4133
  .string()
4062
4134
  .optional()
4063
- .describe("Session ID (user-provided CLI handle for --resume)"),
4135
+ .describe("Gemini session ID to resume (emits --resume <id>), or 'latest' for the most recent session in this cwd"),
4064
4136
  resumeLatest: z.boolean().default(false).describe("Resume latest session"),
4065
4137
  createNewSession: z.boolean().default(false).describe("Force new session"),
4066
4138
  approvalMode: z
@@ -4114,6 +4186,12 @@ export function createGatewayServer(deps = {}) {
4114
4186
  .optional()
4115
4187
  .describe("Emit `--yolo` to auto-approve all actions. Equivalent to approvalMode 'yolo'; routed through the same approval gate. Under mcp_managed the gate still decides."),
4116
4188
  worktree: WORKTREE_SCHEMA.optional(),
4189
+ }, {
4190
+ title: "Gemini request (async job)",
4191
+ readOnlyHint: false,
4192
+ destructiveHint: true,
4193
+ idempotentHint: false,
4194
+ openWorldHint: true,
4117
4195
  }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, yolo, worktree, }) => {
4118
4196
  return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
4119
4197
  prompt,
@@ -4142,7 +4220,7 @@ export function createGatewayServer(deps = {}) {
4142
4220
  worktree,
4143
4221
  });
4144
4222
  });
4145
- server.tool("grok_request_async", {
4223
+ server.tool("grok_request_async", "Start an xAI Grok CLI request as a durable background job. Poll with llm_job_status, collect with llm_job_result.", {
4146
4224
  prompt: z
4147
4225
  .string()
4148
4226
  .min(1, "Prompt cannot be empty")
@@ -4158,7 +4236,7 @@ export function createGatewayServer(deps = {}) {
4158
4236
  sessionId: z
4159
4237
  .string()
4160
4238
  .optional()
4161
- .describe("Session ID (user-provided CLI handle for --resume)"),
4239
+ .describe("Provider-native session ID to resume (emits --resume <id>; use resumeLatest for --continue)"),
4162
4240
  resumeLatest: z
4163
4241
  .boolean()
4164
4242
  .default(false)
@@ -4319,6 +4397,12 @@ export function createGatewayServer(deps = {}) {
4319
4397
  .optional()
4320
4398
  .describe("Grok -w/--worktree: native CLI worktree flag (`true` → bare `--worktree`, string → named). NOT gateway slice λ `worktree`."),
4321
4399
  worktree: WORKTREE_SCHEMA.optional(),
4400
+ }, {
4401
+ title: "Grok request (async job)",
4402
+ readOnlyHint: false,
4403
+ destructiveHint: true,
4404
+ idempotentHint: false,
4405
+ openWorldHint: true,
4322
4406
  }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, compactionMode, compactionDetail, agent, bestOfN, check, disableWebSearch, todoGate, verbatim, agents, promptFile, promptJson, single, experimentalMemory, noAltScreen, noMemory, noPlan, noSubagents, oauth, restoreCode, leaderSocket, nativeWorktree, worktree, }) => {
4323
4407
  return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
4324
4408
  prompt,
@@ -4372,7 +4456,7 @@ export function createGatewayServer(deps = {}) {
4372
4456
  worktree,
4373
4457
  });
4374
4458
  });
4375
- server.tool("mistral_request_async", {
4459
+ server.tool("mistral_request_async", "Start a Mistral Vibe CLI request as a durable background job. Poll with llm_job_status, collect with llm_job_result.", {
4376
4460
  prompt: z
4377
4461
  .string()
4378
4462
  .min(1, "Prompt cannot be empty")
@@ -4451,6 +4535,12 @@ export function createGatewayServer(deps = {}) {
4451
4535
  .optional()
4452
4536
  .describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance."),
4453
4537
  worktree: WORKTREE_SCHEMA.optional(),
4538
+ }, {
4539
+ title: "Mistral Vibe request (async job)",
4540
+ readOnlyHint: false,
4541
+ destructiveHint: true,
4542
+ idempotentHint: false,
4543
+ openWorldHint: true,
4454
4544
  }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, maxTokens, workingDir, addDir, worktree, }) => {
4455
4545
  return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
4456
4546
  prompt,
@@ -4479,8 +4569,14 @@ export function createGatewayServer(deps = {}) {
4479
4569
  worktree,
4480
4570
  });
4481
4571
  });
4482
- server.tool("llm_job_status", {
4572
+ server.tool("llm_job_status", "Check lifecycle status (running|completed|failed|canceled|orphaned) of a gateway async or deferred-sync job by jobId.", {
4483
4573
  jobId: z.string().describe("Async job ID from *_request_async"),
4574
+ }, {
4575
+ title: "Async job status",
4576
+ readOnlyHint: true,
4577
+ destructiveHint: false,
4578
+ idempotentHint: true,
4579
+ openWorldHint: false,
4484
4580
  }, async ({ jobId }) => {
4485
4581
  const job = asyncJobManager.getJobSnapshot(jobId);
4486
4582
  if (!job) {
@@ -4510,7 +4606,7 @@ export function createGatewayServer(deps = {}) {
4510
4606
  ],
4511
4607
  };
4512
4608
  });
4513
- server.tool("llm_job_result", {
4609
+ server.tool("llm_job_result", "Retrieve captured stdout/stderr for a gateway async or deferred-sync job by jobId.", {
4514
4610
  jobId: z.string().describe("Async job ID from *_request_async"),
4515
4611
  maxChars: z
4516
4612
  .number()
@@ -4519,6 +4615,12 @@ export function createGatewayServer(deps = {}) {
4519
4615
  .max(2000000)
4520
4616
  .default(200000)
4521
4617
  .describe("Max chars returned per stream"),
4618
+ }, {
4619
+ title: "Async job result",
4620
+ readOnlyHint: true,
4621
+ destructiveHint: false,
4622
+ idempotentHint: true,
4623
+ openWorldHint: false,
4522
4624
  }, async ({ jobId, maxChars }) => {
4523
4625
  const result = asyncJobManager.getJobResult(jobId, maxChars);
4524
4626
  if (!result) {
@@ -4564,8 +4666,14 @@ export function createGatewayServer(deps = {}) {
4564
4666
  ],
4565
4667
  };
4566
4668
  });
4567
- server.tool("llm_job_cancel", {
4669
+ server.tool("llm_job_cancel", "Cancel a running gateway async or deferred-sync job by jobId.", {
4568
4670
  jobId: z.string().describe("Async job ID from *_request_async"),
4671
+ }, {
4672
+ title: "Cancel async job",
4673
+ readOnlyHint: false,
4674
+ destructiveHint: true,
4675
+ idempotentHint: true,
4676
+ openWorldHint: false,
4569
4677
  }, async ({ jobId }) => {
4570
4678
  const cancel = asyncJobManager.cancelJob(jobId);
4571
4679
  if (!cancel.canceled) {
@@ -4596,7 +4704,7 @@ export function createGatewayServer(deps = {}) {
4596
4704
  };
4597
4705
  });
4598
4706
  }
4599
- server.tool("llm_request_result", {
4707
+ server.tool("llm_request_result", "Read back any persisted request (sync or async) from the flight recorder by correlationId, including prompt and response.", {
4600
4708
  correlationId: z
4601
4709
  .string()
4602
4710
  .min(1)
@@ -4612,6 +4720,12 @@ export function createGatewayServer(deps = {}) {
4612
4720
  .boolean()
4613
4721
  .default(false)
4614
4722
  .describe("Include the full persisted prompt text in the result"),
4723
+ }, {
4724
+ title: "Persisted request lookup",
4725
+ readOnlyHint: true,
4726
+ destructiveHint: false,
4727
+ idempotentHint: true,
4728
+ openWorldHint: false,
4615
4729
  }, async ({ correlationId, maxChars, includePrompt }) => {
4616
4730
  const record = readPersistedRequest(flightRecorder, correlationId, {
4617
4731
  maxChars,
@@ -4642,7 +4756,13 @@ export function createGatewayServer(deps = {}) {
4642
4756
  ],
4643
4757
  };
4644
4758
  });
4645
- server.tool("llm_process_health", {}, async () => {
4759
+ server.tool("llm_process_health", "Report gateway process health: async-job manager state plus the resolved persistence configuration and paths.", {}, {
4760
+ title: "Gateway process health",
4761
+ readOnlyHint: true,
4762
+ destructiveHint: false,
4763
+ idempotentHint: true,
4764
+ openWorldHint: false,
4765
+ }, async () => {
4646
4766
  const health = asyncJobManager.getJobHealth();
4647
4767
  const persistenceBlock = {
4648
4768
  backend: persistence.backend,
@@ -4666,7 +4786,7 @@ export function createGatewayServer(deps = {}) {
4666
4786
  ],
4667
4787
  };
4668
4788
  });
4669
- server.tool("approval_list", {
4789
+ server.tool("approval_list", "List recent MCP-managed approval decisions recorded by the gateway (approvalStrategy: mcp_managed).", {
4670
4790
  limit: z
4671
4791
  .number()
4672
4792
  .int()
@@ -4678,6 +4798,12 @@ export function createGatewayServer(deps = {}) {
4678
4798
  .enum(["claude", "codex", "gemini", "grok", "mistral"])
4679
4799
  .optional()
4680
4800
  .describe("Optional CLI filter"),
4801
+ }, {
4802
+ title: "Approval decisions",
4803
+ readOnlyHint: true,
4804
+ destructiveHint: false,
4805
+ idempotentHint: true,
4806
+ openWorldHint: false,
4681
4807
  }, async ({ limit, cli }) => {
4682
4808
  const approvals = approvalManager.list(limit, cli);
4683
4809
  return {
@@ -4693,24 +4819,36 @@ export function createGatewayServer(deps = {}) {
4693
4819
  ],
4694
4820
  };
4695
4821
  });
4696
- server.tool("list_models", {
4822
+ server.tool("list_models", "List models, aliases, and defaults for one provider CLI (claude|codex|gemini|grok|mistral).", {
4697
4823
  cli: z
4698
4824
  .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
4699
4825
  .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
4826
+ }, {
4827
+ title: "Provider models",
4828
+ readOnlyHint: true,
4829
+ destructiveHint: false,
4830
+ idempotentHint: true,
4831
+ openWorldHint: false,
4700
4832
  }, async ({ cli }) => {
4701
4833
  const cliInfo = getAvailableCliInfo();
4702
4834
  const result = cli ? { [cli]: cliInfo[cli] } : cliInfo;
4703
4835
  return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
4704
4836
  });
4705
- server.tool("cli_versions", {
4837
+ server.tool("cli_versions", "Report installed provider CLI versions, availability, and login status for all five providers or one.", {
4706
4838
  cli: z
4707
4839
  .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
4708
4840
  .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
4841
+ }, {
4842
+ title: "Provider CLI versions",
4843
+ readOnlyHint: true,
4844
+ destructiveHint: false,
4845
+ idempotentHint: true,
4846
+ openWorldHint: false,
4709
4847
  }, async ({ cli }) => {
4710
4848
  const versions = await getCliVersions(cli);
4711
4849
  return { content: [{ type: "text", text: JSON.stringify({ versions }, null, 2) }] };
4712
4850
  });
4713
- server.tool("upstream_contracts", {
4851
+ server.tool("upstream_contracts", "Return the gateway's declared provider CLI contracts; with probeInstalled true, diff against installed --help surfaces to detect flag drift.", {
4714
4852
  cli: z
4715
4853
  .preprocess(value => (value === "" || value === null ? undefined : value), SESSION_PROVIDER_ENUM.optional())
4716
4854
  .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
@@ -4718,11 +4856,17 @@ export function createGatewayServer(deps = {}) {
4718
4856
  .boolean()
4719
4857
  .default(false)
4720
4858
  .describe("When true, run local --help probes and compare advertised flags against the declared contract. Strongly recommended after any provider CLI upgrade to detect drift."),
4859
+ }, {
4860
+ title: "Provider CLI contracts",
4861
+ readOnlyHint: true,
4862
+ destructiveHint: false,
4863
+ idempotentHint: true,
4864
+ openWorldHint: false,
4721
4865
  }, async ({ cli, probeInstalled }) => {
4722
4866
  const report = buildUpstreamContractReport({ cli, probeInstalled });
4723
4867
  return { content: [{ type: "text", text: JSON.stringify(report, null, 2) }] };
4724
4868
  });
4725
- server.tool("cli_upgrade", {
4869
+ server.tool("cli_upgrade", "Plan (dryRun, default true) or execute an upgrade for one provider CLI using its native update mechanism.", {
4726
4870
  cli: z.enum(["claude", "codex", "gemini", "grok", "mistral"]).describe("CLI to upgrade"),
4727
4871
  target: z
4728
4872
  .string()
@@ -4740,6 +4884,12 @@ export function createGatewayServer(deps = {}) {
4740
4884
  .max(3_600_000)
4741
4885
  .optional()
4742
4886
  .describe("Upgrade timeout in ms when dryRun=false"),
4887
+ }, {
4888
+ title: "Upgrade provider CLI",
4889
+ readOnlyHint: false,
4890
+ destructiveHint: true,
4891
+ idempotentHint: false,
4892
+ openWorldHint: true,
4743
4893
  }, async ({ cli, target, dryRun, timeoutMs }) => {
4744
4894
  try {
4745
4895
  const result = await runCliUpgrade({ cli, target, dryRun, timeoutMs, logger });
@@ -4771,10 +4921,16 @@ export function createGatewayServer(deps = {}) {
4771
4921
  };
4772
4922
  }
4773
4923
  });
4774
- server.tool("session_create", {
4924
+ server.tool("session_create", "Create a gateway session record for a provider CLI. NOTE: this is gateway bookkeeping (gw-* ID), not a provider-native session — Codex resume needs a real Codex UUID.", {
4775
4925
  cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
4776
4926
  description: z.string().optional().describe("Session description"),
4777
4927
  setAsActive: z.boolean().default(true).describe("Set as active session"),
4928
+ }, {
4929
+ title: "Create session record",
4930
+ readOnlyHint: false,
4931
+ destructiveHint: false,
4932
+ idempotentHint: false,
4933
+ openWorldHint: false,
4778
4934
  }, async ({ cli, description, setAsActive }) => {
4779
4935
  try {
4780
4936
  const session = await sessionManager.createSession(cli, description);
@@ -4804,8 +4960,14 @@ export function createGatewayServer(deps = {}) {
4804
4960
  return createErrorResponse("session_create", 1, "", undefined, error);
4805
4961
  }
4806
4962
  });
4807
- server.tool("session_list", {
4963
+ server.tool("session_list", "List gateway session records and the active session per CLI, optionally filtered by CLI.", {
4808
4964
  cli: SESSION_PROVIDER_ENUM.optional().describe("CLI filter (claude|codex|gemini|grok|mistral)"),
4965
+ }, {
4966
+ title: "List sessions",
4967
+ readOnlyHint: true,
4968
+ destructiveHint: false,
4969
+ idempotentHint: true,
4970
+ openWorldHint: false,
4809
4971
  }, async ({ cli }) => {
4810
4972
  try {
4811
4973
  const sessions = await sessionManager.listSessions(cli);
@@ -4847,9 +5009,15 @@ export function createGatewayServer(deps = {}) {
4847
5009
  return createErrorResponse("session_list", 1, "", undefined, error);
4848
5010
  }
4849
5011
  });
4850
- server.tool("session_set_active", {
5012
+ server.tool("session_set_active", "Set or clear the active session for a CLI; the active session is used when a request omits sessionId.", {
4851
5013
  cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
4852
5014
  sessionId: z.string().nullable().describe("Session ID (null to clear)"),
5015
+ }, {
5016
+ title: "Set active session",
5017
+ readOnlyHint: false,
5018
+ destructiveHint: false,
5019
+ idempotentHint: true,
5020
+ openWorldHint: false,
4853
5021
  }, async ({ cli, sessionId }) => {
4854
5022
  try {
4855
5023
  const success = await sessionManager.setActiveSession(cli, sessionId || null);
@@ -4885,8 +5053,14 @@ export function createGatewayServer(deps = {}) {
4885
5053
  return createErrorResponse("session_set_active", 1, "", undefined, error);
4886
5054
  }
4887
5055
  });
4888
- server.tool("session_delete", {
5056
+ server.tool("session_delete", "Delete a gateway session record by ID (also removes any gateway-owned worktree attached to it).", {
4889
5057
  sessionId: z.string().describe("Session ID"),
5058
+ }, {
5059
+ title: "Delete session",
5060
+ readOnlyHint: false,
5061
+ destructiveHint: true,
5062
+ idempotentHint: true,
5063
+ openWorldHint: false,
4890
5064
  }, async ({ sessionId }) => {
4891
5065
  try {
4892
5066
  const session = await sessionManager.getSession(sessionId);
@@ -4926,8 +5100,14 @@ export function createGatewayServer(deps = {}) {
4926
5100
  return createErrorResponse("session_delete", 1, "", undefined, error);
4927
5101
  }
4928
5102
  });
4929
- server.tool("session_get", {
5103
+ server.tool("session_get", "Get one gateway session record by session ID, including recent request history when available.", {
4930
5104
  sessionId: z.string().describe("Session ID"),
5105
+ }, {
5106
+ title: "Get session",
5107
+ readOnlyHint: true,
5108
+ destructiveHint: false,
5109
+ idempotentHint: true,
5110
+ openWorldHint: false,
4931
5111
  }, async ({ sessionId }) => {
4932
5112
  try {
4933
5113
  const session = await sessionManager.getSession(sessionId);
@@ -4989,8 +5169,14 @@ export function createGatewayServer(deps = {}) {
4989
5169
  return createErrorResponse("session_get", 1, "", undefined, error);
4990
5170
  }
4991
5171
  });
4992
- server.tool("session_clear_all", {
5172
+ server.tool("session_clear_all", "Delete all gateway session records, optionally scoped to one CLI.", {
4993
5173
  cli: SESSION_PROVIDER_ENUM.optional().describe("CLI filter (claude|codex|gemini|grok|mistral)"),
5174
+ }, {
5175
+ title: "Clear sessions",
5176
+ readOnlyHint: false,
5177
+ destructiveHint: true,
5178
+ idempotentHint: true,
5179
+ openWorldHint: false,
4994
5180
  }, async ({ cli }) => {
4995
5181
  try {
4996
5182
  const count = await sessionManager.clearAllSessions(cli);
@@ -47,7 +47,7 @@ function findHumanReadableReport(value) {
47
47
  return null;
48
48
  }
49
49
  export function registerValidationTools(server, deps) {
50
- server.tool("validate_with_models", {
50
+ server.tool("validate_with_models", "Ask two or more provider CLIs to independently validate a question. Starts validation jobs — poll with job_status, collect with job_result (not llm_job_*).", {
51
51
  question: z.string().min(1).describe("Question or content to validate."),
52
52
  models: providerListSchema.describe("Providers to ask. Defaults to Claude and Codex."),
53
53
  focus: z
@@ -57,6 +57,12 @@ export function registerValidationTools(server, deps) {
57
57
  judgeModel: providerSchema
58
58
  .optional()
59
59
  .describe("Optional provider to run an explicit judge synthesis job."),
60
+ }, {
61
+ title: "Multi-model validation",
62
+ readOnlyHint: false,
63
+ destructiveHint: true,
64
+ idempotentHint: false,
65
+ openWorldHint: true,
60
66
  }, async ({ question, models, focus, judgeModel }) => textResponse({
61
67
  success: true,
62
68
  tool: "validate_with_models",
@@ -69,10 +75,16 @@ export function registerValidationTools(server, deps) {
69
75
  judgeProvider: judgeModel,
70
76
  }),
71
77
  }));
72
- server.tool("second_opinion", {
78
+ server.tool("second_opinion", "Ask one provider CLI to review an answer (starts a validation job; poll job_status, collect job_result).", {
73
79
  answer: z.string().min(1).describe("Answer to review."),
74
80
  question: z.string().optional().describe("Original question, if available."),
75
81
  model: providerSchema.default("codex").describe("Provider to ask for the second opinion."),
82
+ }, {
83
+ title: "Second opinion",
84
+ readOnlyHint: false,
85
+ destructiveHint: true,
86
+ idempotentHint: false,
87
+ openWorldHint: true,
76
88
  }, async ({ answer, question, model }) => textResponse({
77
89
  success: true,
78
90
  tool: "second_opinion",
@@ -84,9 +96,15 @@ export function registerValidationTools(server, deps) {
84
96
  providers: [model],
85
97
  }),
86
98
  }));
87
- server.tool("compare_answers", {
99
+ server.tool("compare_answers", "Summarize agreement/differences between caller-provided answers LOCALLY — does not call any provider.", {
88
100
  question: z.string().min(1).describe("Question the answers respond to."),
89
101
  answers: z.array(z.string().min(1)).min(2).describe("Two or more answers to compare."),
102
+ }, {
103
+ title: "Compare answers (local)",
104
+ readOnlyHint: true,
105
+ destructiveHint: false,
106
+ idempotentHint: true,
107
+ openWorldHint: false,
90
108
  }, async ({ question, answers }) => textResponse({
91
109
  success: true,
92
110
  tool: "compare_answers",
@@ -99,13 +117,19 @@ export function registerValidationTools(server, deps) {
99
117
  note: "Use validate_with_models when independent provider review is needed.",
100
118
  },
101
119
  }));
102
- server.tool("red_team_review", {
120
+ server.tool("red_team_review", "Challenge a plan, answer, or document for risks and failure modes via provider CLIs (starts validation jobs).", {
103
121
  content: z.string().min(1).describe("Plan, answer, or document to challenge."),
104
122
  riskLevel: z
105
123
  .enum(["normal", "high"])
106
124
  .default("normal")
107
125
  .describe("How aggressively to review."),
108
126
  models: providerListSchema.describe("Providers to ask for adversarial review."),
127
+ }, {
128
+ title: "Red-team review",
129
+ readOnlyHint: false,
130
+ destructiveHint: true,
131
+ idempotentHint: false,
132
+ openWorldHint: true,
109
133
  }, async ({ content, riskLevel, models }) => textResponse({
110
134
  success: true,
111
135
  tool: "red_team_review",
@@ -117,9 +141,15 @@ export function registerValidationTools(server, deps) {
117
141
  riskLevel,
118
142
  }),
119
143
  }));
120
- server.tool("consensus_check", {
144
+ server.tool("consensus_check", "Ask provider CLIs whether they agree or disagree with a claim (starts validation jobs).", {
121
145
  claim: z.string().min(1).describe("Claim to check across providers."),
122
146
  models: providerListSchema.describe("Providers to ask for agreement or disagreement."),
147
+ }, {
148
+ title: "Consensus check",
149
+ readOnlyHint: false,
150
+ destructiveHint: true,
151
+ idempotentHint: false,
152
+ openWorldHint: true,
123
153
  }, async ({ claim, models }) => textResponse({
124
154
  success: true,
125
155
  tool: "consensus_check",
@@ -130,9 +160,15 @@ export function registerValidationTools(server, deps) {
130
160
  providers: models,
131
161
  }),
132
162
  }));
133
- server.tool("ask_model", {
163
+ server.tool("ask_model", "Ask one provider CLI a question through the simplified validation surface (starts a validation job).", {
134
164
  question: z.string().min(1).describe("Question for one provider."),
135
165
  model: providerSchema.default("claude").describe("Provider to ask."),
166
+ }, {
167
+ title: "Ask one model",
168
+ readOnlyHint: false,
169
+ destructiveHint: true,
170
+ idempotentHint: false,
171
+ openWorldHint: true,
136
172
  }, async ({ question, model }) => textResponse({
137
173
  success: true,
138
174
  tool: "ask_model",
@@ -143,13 +179,19 @@ export function registerValidationTools(server, deps) {
143
179
  providers: [model],
144
180
  }),
145
181
  }));
146
- server.tool("synthesize_validation", {
182
+ server.tool("synthesize_validation", "Run an explicit judge model over already-collected validation results to produce a synthesis.", {
147
183
  question: z.string().min(1).describe("Original request that was validated."),
148
184
  providerResults: z
149
185
  .array(normalizedProviderResultSchema)
150
186
  .min(1)
151
187
  .describe("Terminal normalized provider results from job_result."),
152
188
  judgeModel: providerSchema.default("codex").describe("Provider to run the judge synthesis."),
189
+ }, {
190
+ title: "Synthesize validation",
191
+ readOnlyHint: false,
192
+ destructiveHint: true,
193
+ idempotentHint: false,
194
+ openWorldHint: true,
153
195
  }, async ({ question, providerResults, judgeModel }) => textResponse({
154
196
  success: true,
155
197
  tool: "synthesize_validation",
@@ -160,9 +202,21 @@ export function registerValidationTools(server, deps) {
160
202
  judgeProvider: judgeModel,
161
203
  }),
162
204
  }));
163
- server.tool("list_available_models", {}, async () => textResponse({ success: true, models: getAvailableCliInfo() }));
164
- server.tool("job_status", {
205
+ server.tool("list_available_models", "List models and capabilities for every available provider CLI (takes no arguments; complements per-provider list_models).", {}, {
206
+ title: "All provider models",
207
+ readOnlyHint: true,
208
+ destructiveHint: false,
209
+ idempotentHint: true,
210
+ openWorldHint: false,
211
+ }, async () => textResponse({ success: true, models: getAvailableCliInfo() }));
212
+ server.tool("job_status", "Check a VALIDATION job's status (jobs started by validate_with_models/ask_model/etc.) — distinct from llm_job_status, which tracks provider request jobs.", {
165
213
  jobId: z.string().min(1).describe("Validation job ID."),
214
+ }, {
215
+ title: "Validation job status",
216
+ readOnlyHint: true,
217
+ destructiveHint: false,
218
+ idempotentHint: true,
219
+ openWorldHint: false,
166
220
  }, async ({ jobId }) => {
167
221
  const job = deps.asyncJobManager.getJobSnapshot(jobId);
168
222
  if (!job) {
@@ -170,7 +224,7 @@ export function registerValidationTools(server, deps) {
170
224
  }
171
225
  return textResponse({ success: true, job });
172
226
  });
173
- server.tool("job_result", {
227
+ server.tool("job_result", "Collect a VALIDATION job's normalized provider output — distinct from llm_job_result, which returns raw provider request job output.", {
174
228
  jobId: z.string().min(1).describe("Validation job ID."),
175
229
  provider: providerSchema
176
230
  .optional()
@@ -182,6 +236,12 @@ export function registerValidationTools(server, deps) {
182
236
  .max(2000000)
183
237
  .default(200000)
184
238
  .describe("Maximum result size."),
239
+ }, {
240
+ title: "Validation job result",
241
+ readOnlyHint: true,
242
+ destructiveHint: false,
243
+ idempotentHint: true,
244
+ openWorldHint: false,
185
245
  }, async ({ jobId, provider, maxChars }) => {
186
246
  const result = deps.asyncJobManager.getJobResult(jobId, maxChars);
187
247
  if (!result) {
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "llm-cli-gateway",
3
- "version": "2.0.0",
3
+ "version": "2.3.0",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "llm-cli-gateway",
9
- "version": "2.0.0",
9
+ "version": "2.3.0",
10
10
  "license": "MIT",
11
11
  "dependencies": {
12
12
  "@modelcontextprotocol/sdk": "^1.29.0",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-cli-gateway",
3
- "version": "2.1.0",
3
+ "version": "2.3.0",
4
4
  "mcpName": "io.github.verivus-oss/llm-cli-gateway",
5
5
  "description": "MCP server providing unified access to Claude Code, Codex, Gemini, Grok, and Mistral Vibe CLIs with session management, retry logic, async job orchestration, durable job results, and cross-LLM validation.",
6
6
  "license": "MIT",