llm-cli-gateway 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,32 @@ All notable changes to the llm-cli-gateway project.
4
4
 
5
5
  ## Unreleased
6
6
 
7
+ ## [2.2.0] - 2026-06-07: MCP tool-surface usability — self-describing tools
8
+
9
+ ### Added
10
+
11
+ - MCP tool-surface usability (4-seat cross-LLM review): all 37 tools now carry
12
+ action descriptions (previously none had tool-level descriptions — clients
13
+ that rank, search, or defer tools by description saw bare names); sync
14
+ `*_request` descriptions state the prompt/promptParts exactly-one rule and
15
+ conditional deferral; `job_status`/`job_result` vs `llm_job_*` and the
16
+ local-only `compare_answers` are disambiguated; session/`sessionId`
17
+ describes gain per-provider resume semantics parity.
18
+
19
+ ### Fixed
20
+
21
+ - Codex gateway-bookkeeping sessions are now created with the reserved `gw-`
22
+ prefix (4 sites), so resuming a gateway ID fails fast with an actionable
23
+ error instead of reaching `codex exec resume` and dying with "no rollout
24
+ found" (root cause of real-world resume failures).
25
+ - Server instructions are now built per-server from the same derived gate as
26
+ tool registration (backend, asyncJobsEnabled, hasStore()), so a
27
+ `backend = "none"` gateway no longer advertises unregistered
28
+ `*_request_async`/`llm_job_*` tools.
29
+ - Sync auto-deferral is disabled when async jobs are unavailable — previously
30
+ a request could defer into an in-memory job whose polling tools were not
31
+ registered (dead-end jobId).
32
+
7
33
  ## [2.1.0] - 2026-06-07: Grok Build 0.2.32, probe drift acknowledgement, docs currency
8
34
 
9
35
  ### Added
package/dist/index.d.ts CHANGED
@@ -44,6 +44,7 @@ declare const logger: {
44
44
  debug: (message: string, ...args: any[]) => void;
45
45
  };
46
46
  type GatewayLogger = typeof logger;
47
+ export declare function buildServerInstructions(asyncJobsEnabled: boolean): string;
47
48
  export declare const MAX_TURNS_SCHEMA: z.ZodNumber;
48
49
  export declare const MAX_TOKENS_SCHEMA: z.ZodNumber;
49
50
  export declare const MAX_PRICE_SCHEMA: z.ZodNumber;
package/dist/index.js CHANGED
@@ -141,16 +141,21 @@ function loadSkills() {
141
141
  return skills;
142
142
  }
143
143
  const loadedSkills = loadSkills();
144
- const SERVER_INSTRUCTIONS = `llm-cli-gateway: Multi-LLM orchestration via MCP.
144
+ export function buildServerInstructions(asyncJobsEnabled) {
145
+ const asyncToolsNote = asyncJobsEnabled ? " | *_request_async (async)" : "";
146
+ const jobsLine = asyncJobsEnabled ? "Jobs: llm_job_status, llm_job_result, llm_job_cancel\n" : "";
147
+ const deferralLine = asyncJobsEnabled
148
+ ? `- Sync auto-defers at ${SYNC_DEADLINE_MS}ms. Poll deferred jobs via llm_job_status/llm_job_result.`
149
+ : '- Async jobs are DISABLED (persistence.backend = "none"): *_request_async and llm_job_* tools are not registered, and sync requests run to completion (no auto-deferral).';
150
+ return `llm-cli-gateway: Multi-LLM orchestration via MCP.
145
151
 
146
- Tools: claude_request, codex_request, gemini_request, grok_request, mistral_request (sync) | *_request_async (async) | codex_fork_session (fork a Codex session into a new branch)
152
+ Tools: claude_request, codex_request, gemini_request, grok_request, mistral_request (sync)${asyncToolsNote} | codex_fork_session (fork a Codex session into a new branch)
147
153
  Validation: validate_with_models, second_opinion, compare_answers, red_team_review, consensus_check, ask_model, synthesize_validation, list_available_models | job_status/job_result (validation jobs)
148
- Jobs: llm_job_status, llm_job_result, llm_job_cancel
149
- Sessions: session_create, session_list, session_set_active, session_get, session_delete, session_clear_all
154
+ ${jobsLine}Sessions: session_create, session_list, session_set_active, session_get, session_delete, session_clear_all
150
155
  Other: list_models, cli_versions, upstream_contracts (use --probe-installed after CLI upgrades to detect drift), cli_upgrade, approval_list, llm_process_health, llm_request_result (read back any persisted request — sync or async — by correlationId)
151
156
 
152
157
  Key behaviors:
153
- - Sync auto-defers at ${SYNC_DEADLINE_MS}ms. Poll deferred jobs via llm_job_status/llm_job_result.
158
+ ${deferralLine}
154
159
  - Sessions: Claude --continue, Gemini --resume, Grok --resume/--continue, Mistral --resume/--continue (current Vibe defaults session logging on; doctor flags explicit session_logging.enabled=false), Codex \`exec resume <ID>\` / \`exec resume --last\` (all real CLI continuity). For Codex, sessionId must be a real Codex UUID (from ~/.codex/sessions/); gateway-generated gw-* IDs are rejected.
155
160
  - Approval gates: opt-in via approvalStrategy:"mcp_managed".
156
161
  - Upstream drift detection: After upgrading any provider CLI (especially grok), use the upstream_contracts tool with probeInstalled: true (or the CLI command "llm-cli-gateway contracts --json --probe-installed"). This is the primary reliable way to detect when an installed binary has gained or lost flags compared to the gateway's declared contract. The probe is safe and read-only.
@@ -158,8 +163,9 @@ Key behaviors:
158
163
 
159
164
  Skills (full docs via MCP resources):
160
165
  ${loadedSkills.map(s => `- skills://${s.name} — ${s.description}`).join("\n")}`;
161
- function newGatewayMcpServer() {
162
- return new McpServer({ name: "llm-cli-gateway", version: packageVersion() }, { instructions: SERVER_INSTRUCTIONS });
166
+ }
167
+ function newGatewayMcpServer(asyncJobsEnabled = true) {
168
+ return new McpServer({ name: "llm-cli-gateway", version: packageVersion() }, { instructions: buildServerInstructions(asyncJobsEnabled) });
163
169
  }
164
170
  let sessionManager;
165
171
  let db = null;
@@ -307,7 +313,10 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, f
307
313
  consumeOnComplete();
308
314
  throw err;
309
315
  }
310
- if (SYNC_DEADLINE_MS === 0) {
316
+ const deferralAvailable = runtime.persistence.backend !== "none" &&
317
+ runtime.persistence.asyncJobsEnabled &&
318
+ runtime.asyncJobManager.hasStore();
319
+ if (SYNC_DEADLINE_MS === 0 || !deferralAvailable) {
311
320
  const command = cli === "mistral" ? "vibe" : cli;
312
321
  try {
313
322
  return await executeCli(command, args, {
@@ -2503,7 +2512,7 @@ export async function handleCodexRequestAsync(deps, params) {
2503
2512
  effectiveSessionId = activeSession.id;
2504
2513
  }
2505
2514
  else {
2506
- const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
2515
+ const newSession = await deps.sessionManager.createSession("codex", "Codex Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
2507
2516
  effectiveSessionId = newSession.id;
2508
2517
  }
2509
2518
  }
@@ -2511,7 +2520,7 @@ export async function handleCodexRequestAsync(deps, params) {
2511
2520
  await deps.sessionManager.updateSessionUsage(params.sessionId);
2512
2521
  }
2513
2522
  else if (params.createNewSession) {
2514
- const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
2523
+ const newSession = await deps.sessionManager.createSession("codex", "Codex Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
2515
2524
  effectiveSessionId = newSession.id;
2516
2525
  }
2517
2526
  let worktreeResolution = {};
@@ -2567,10 +2576,10 @@ export function createGatewayServer(deps = {}) {
2567
2576
  void flightRecorder;
2568
2577
  void cacheAwareness;
2569
2578
  const asyncJobsEnabled = persistence.backend !== "none" && persistence.asyncJobsEnabled && asyncJobManager.hasStore();
2570
- const server = newGatewayMcpServer();
2579
+ const server = newGatewayMcpServer(asyncJobsEnabled);
2571
2580
  registerBaseResources(server, runtime);
2572
2581
  registerValidationTools(server, { asyncJobManager });
2573
- server.tool("claude_request", {
2582
+ server.tool("claude_request", "Run a Claude Code CLI request synchronously (when async jobs are enabled, auto-defers to a pollable job past the sync deadline; otherwise runs to completion). Requires exactly one of prompt or promptParts.", {
2574
2583
  prompt: z
2575
2584
  .string()
2576
2585
  .min(1, "Prompt cannot be empty")
@@ -2586,8 +2595,14 @@ export function createGatewayServer(deps = {}) {
2586
2595
  .enum(["text", "json", "stream-json"])
2587
2596
  .default("stream-json")
2588
2597
  .describe("Output format (text|json|stream-json). DEFAULT: stream-json — the gateway parses NDJSON usage events to extract input/output/cache_read/cache_creation tokens + cost + model, persists them to the flight recorder for cache_state aggregates, and still returns the assistant text. Override to 'text' only when you truly want unparsed stdout (loses observability)."),
2589
- sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
2590
- continueSession: z.boolean().default(false).describe("Continue active session"),
2598
+ sessionId: z
2599
+ .string()
2600
+ .optional()
2601
+ .describe("Gateway session record to associate (uses the active session if omitted). Claude continuity itself is via continueSession (--continue); this ID is gateway bookkeeping, not a Claude-native session."),
2602
+ continueSession: z
2603
+ .boolean()
2604
+ .default(false)
2605
+ .describe("Continue the most recent Claude conversation in this cwd (emits --continue; real CLI continuity)."),
2591
2606
  createNewSession: z.boolean().default(false).describe("Force new session"),
2592
2607
  allowedTools: z
2593
2608
  .array(z.string())
@@ -2897,7 +2912,7 @@ export function createGatewayServer(deps = {}) {
2897
2912
  performanceMetrics.recordRequest("claude", finalizedDurationMs, wasSuccessful);
2898
2913
  }
2899
2914
  });
2900
- server.tool("codex_request", {
2915
+ server.tool("codex_request", "Run an OpenAI Codex CLI request synchronously (when async jobs are enabled, auto-defers to a pollable job past the sync deadline; otherwise runs to completion). Requires exactly one of prompt or promptParts.", {
2901
2916
  prompt: z
2902
2917
  .string()
2903
2918
  .min(1, "Prompt cannot be empty")
@@ -3089,7 +3104,7 @@ export function createGatewayServer(deps = {}) {
3089
3104
  effectiveSessionId = activeSession.id;
3090
3105
  }
3091
3106
  else {
3092
- const newSession = await sessionManager.createSession("codex", "Codex Session");
3107
+ const newSession = await sessionManager.createSession("codex", "Codex Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
3093
3108
  effectiveSessionId = newSession.id;
3094
3109
  }
3095
3110
  }
@@ -3097,7 +3112,7 @@ export function createGatewayServer(deps = {}) {
3097
3112
  await sessionManager.updateSessionUsage(sessionId);
3098
3113
  }
3099
3114
  else if (createNewSession) {
3100
- const newSession = await sessionManager.createSession("codex", "Codex Session");
3115
+ const newSession = await sessionManager.createSession("codex", "Codex Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
3101
3116
  effectiveSessionId = newSession.id;
3102
3117
  }
3103
3118
  logger.info(`[${corrId}] codex_request completed successfully in ${durationMs}ms`);
@@ -3145,7 +3160,7 @@ export function createGatewayServer(deps = {}) {
3145
3160
  performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
3146
3161
  }
3147
3162
  });
3148
- server.tool("codex_fork_session", {
3163
+ server.tool("codex_fork_session", "Fork an existing Codex session into a new branch (codex fork <ID|--last>) and run a prompt against the fork without mutating the original.", {
3149
3164
  prompt: z
3150
3165
  .string()
3151
3166
  .min(1, "Prompt cannot be empty")
@@ -3232,7 +3247,7 @@ export function createGatewayServer(deps = {}) {
3232
3247
  performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
3233
3248
  }
3234
3249
  });
3235
- server.tool("gemini_request", {
3250
+ server.tool("gemini_request", "Run a Google Gemini CLI request synchronously (when async jobs are enabled, auto-defers to a pollable job past the sync deadline; otherwise runs to completion). Requires exactly one of prompt or promptParts.", {
3236
3251
  prompt: z
3237
3252
  .string()
3238
3253
  .min(1, "Prompt cannot be empty")
@@ -3244,7 +3259,10 @@ export function createGatewayServer(deps = {}) {
3244
3259
  .string()
3245
3260
  .optional()
3246
3261
  .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
3247
- sessionId: z.string().optional().describe("Session ID or 'latest'"),
3262
+ sessionId: z
3263
+ .string()
3264
+ .optional()
3265
+ .describe("Gemini session ID to resume (emits --resume <id>), or 'latest' for the most recent session in this cwd"),
3248
3266
  resumeLatest: z.boolean().default(false).describe("Resume latest session"),
3249
3267
  createNewSession: z.boolean().default(false).describe("Force new session"),
3250
3268
  approvalMode: z
@@ -3328,7 +3346,7 @@ export function createGatewayServer(deps = {}) {
3328
3346
  worktree,
3329
3347
  });
3330
3348
  });
3331
- server.tool("grok_request", {
3349
+ server.tool("grok_request", "Run an xAI Grok CLI request synchronously (when async jobs are enabled, auto-defers to a pollable job past the sync deadline; otherwise runs to completion). Requires exactly one of prompt or promptParts.", {
3332
3350
  prompt: z
3333
3351
  .string()
3334
3352
  .min(1, "Prompt cannot be empty")
@@ -3344,7 +3362,7 @@ export function createGatewayServer(deps = {}) {
3344
3362
  sessionId: z
3345
3363
  .string()
3346
3364
  .optional()
3347
- .describe("Session ID (user-provided CLI handle for --resume)"),
3365
+ .describe("Provider-native session ID to resume (emits --resume <id>; use resumeLatest for --continue)"),
3348
3366
  resumeLatest: z
3349
3367
  .boolean()
3350
3368
  .default(false)
@@ -3557,7 +3575,7 @@ export function createGatewayServer(deps = {}) {
3557
3575
  worktree,
3558
3576
  });
3559
3577
  });
3560
- server.tool("mistral_request", {
3578
+ server.tool("mistral_request", "Run a Mistral Vibe CLI request synchronously (when async jobs are enabled, auto-defers to a pollable job past the sync deadline; otherwise runs to completion). Requires exactly one of prompt or promptParts.", {
3561
3579
  prompt: z
3562
3580
  .string()
3563
3581
  .min(1, "Prompt cannot be empty")
@@ -3667,7 +3685,7 @@ export function createGatewayServer(deps = {}) {
3667
3685
  });
3668
3686
  });
3669
3687
  if (asyncJobsEnabled) {
3670
- server.tool("claude_request_async", {
3688
+ server.tool("claude_request_async", "Start a Claude Code CLI request as a durable background job. Poll with llm_job_status, collect with llm_job_result.", {
3671
3689
  prompt: z
3672
3690
  .string()
3673
3691
  .min(1, "Prompt cannot be empty")
@@ -3683,8 +3701,14 @@ export function createGatewayServer(deps = {}) {
3683
3701
  .enum(["text", "json", "stream-json"])
3684
3702
  .default("stream-json")
3685
3703
  .describe("Output format (text|json|stream-json). DEFAULT: stream-json — same rationale as claude_request: keeps usage/cache/cost observable for cache_state aggregates. Override to 'text' only when raw stdout is required (loses observability)."),
3686
- sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
3687
- continueSession: z.boolean().default(false).describe("Continue active session"),
3704
+ sessionId: z
3705
+ .string()
3706
+ .optional()
3707
+ .describe("Gateway session record to associate (uses the active session if omitted). Claude continuity itself is via continueSession (--continue); this ID is gateway bookkeeping, not a Claude-native session."),
3708
+ continueSession: z
3709
+ .boolean()
3710
+ .default(false)
3711
+ .describe("Continue the most recent Claude conversation in this cwd (emits --continue; real CLI continuity)."),
3688
3712
  createNewSession: z.boolean().default(false).describe("Force new session"),
3689
3713
  allowedTools: z
3690
3714
  .array(z.string())
@@ -3920,7 +3944,7 @@ export function createGatewayServer(deps = {}) {
3920
3944
  return createErrorResponse("claude_request_async", 1, "", corrId, error);
3921
3945
  }
3922
3946
  });
3923
- server.tool("codex_request_async", {
3947
+ server.tool("codex_request_async", "Start an OpenAI Codex CLI request as a durable background job. Poll with llm_job_status, collect with llm_job_result.", {
3924
3948
  prompt: z
3925
3949
  .string()
3926
3950
  .min(1, "Prompt cannot be empty")
@@ -4045,7 +4069,7 @@ export function createGatewayServer(deps = {}) {
4045
4069
  worktree,
4046
4070
  });
4047
4071
  });
4048
- server.tool("gemini_request_async", {
4072
+ server.tool("gemini_request_async", "Start a Google Gemini CLI request as a durable background job. Poll with llm_job_status, collect with llm_job_result.", {
4049
4073
  prompt: z
4050
4074
  .string()
4051
4075
  .min(1, "Prompt cannot be empty")
@@ -4060,7 +4084,7 @@ export function createGatewayServer(deps = {}) {
4060
4084
  sessionId: z
4061
4085
  .string()
4062
4086
  .optional()
4063
- .describe("Session ID (user-provided CLI handle for --resume)"),
4087
+ .describe("Gemini session ID to resume (emits --resume <id>), or 'latest' for the most recent session in this cwd"),
4064
4088
  resumeLatest: z.boolean().default(false).describe("Resume latest session"),
4065
4089
  createNewSession: z.boolean().default(false).describe("Force new session"),
4066
4090
  approvalMode: z
@@ -4142,7 +4166,7 @@ export function createGatewayServer(deps = {}) {
4142
4166
  worktree,
4143
4167
  });
4144
4168
  });
4145
- server.tool("grok_request_async", {
4169
+ server.tool("grok_request_async", "Start an xAI Grok CLI request as a durable background job. Poll with llm_job_status, collect with llm_job_result.", {
4146
4170
  prompt: z
4147
4171
  .string()
4148
4172
  .min(1, "Prompt cannot be empty")
@@ -4158,7 +4182,7 @@ export function createGatewayServer(deps = {}) {
4158
4182
  sessionId: z
4159
4183
  .string()
4160
4184
  .optional()
4161
- .describe("Session ID (user-provided CLI handle for --resume)"),
4185
+ .describe("Provider-native session ID to resume (emits --resume <id>; use resumeLatest for --continue)"),
4162
4186
  resumeLatest: z
4163
4187
  .boolean()
4164
4188
  .default(false)
@@ -4372,7 +4396,7 @@ export function createGatewayServer(deps = {}) {
4372
4396
  worktree,
4373
4397
  });
4374
4398
  });
4375
- server.tool("mistral_request_async", {
4399
+ server.tool("mistral_request_async", "Start a Mistral Vibe CLI request as a durable background job. Poll with llm_job_status, collect with llm_job_result.", {
4376
4400
  prompt: z
4377
4401
  .string()
4378
4402
  .min(1, "Prompt cannot be empty")
@@ -4479,7 +4503,7 @@ export function createGatewayServer(deps = {}) {
4479
4503
  worktree,
4480
4504
  });
4481
4505
  });
4482
- server.tool("llm_job_status", {
4506
+ server.tool("llm_job_status", "Check lifecycle status (running|completed|failed|canceled|orphaned) of a gateway async or deferred-sync job by jobId.", {
4483
4507
  jobId: z.string().describe("Async job ID from *_request_async"),
4484
4508
  }, async ({ jobId }) => {
4485
4509
  const job = asyncJobManager.getJobSnapshot(jobId);
@@ -4510,7 +4534,7 @@ export function createGatewayServer(deps = {}) {
4510
4534
  ],
4511
4535
  };
4512
4536
  });
4513
- server.tool("llm_job_result", {
4537
+ server.tool("llm_job_result", "Retrieve captured stdout/stderr for a gateway async or deferred-sync job by jobId.", {
4514
4538
  jobId: z.string().describe("Async job ID from *_request_async"),
4515
4539
  maxChars: z
4516
4540
  .number()
@@ -4564,7 +4588,7 @@ export function createGatewayServer(deps = {}) {
4564
4588
  ],
4565
4589
  };
4566
4590
  });
4567
- server.tool("llm_job_cancel", {
4591
+ server.tool("llm_job_cancel", "Cancel a running gateway async or deferred-sync job by jobId.", {
4568
4592
  jobId: z.string().describe("Async job ID from *_request_async"),
4569
4593
  }, async ({ jobId }) => {
4570
4594
  const cancel = asyncJobManager.cancelJob(jobId);
@@ -4596,7 +4620,7 @@ export function createGatewayServer(deps = {}) {
4596
4620
  };
4597
4621
  });
4598
4622
  }
4599
- server.tool("llm_request_result", {
4623
+ server.tool("llm_request_result", "Read back any persisted request (sync or async) from the flight recorder by correlationId, including prompt and response.", {
4600
4624
  correlationId: z
4601
4625
  .string()
4602
4626
  .min(1)
@@ -4642,7 +4666,7 @@ export function createGatewayServer(deps = {}) {
4642
4666
  ],
4643
4667
  };
4644
4668
  });
4645
- server.tool("llm_process_health", {}, async () => {
4669
+ server.tool("llm_process_health", "Report gateway process health: async-job manager state plus the resolved persistence configuration and paths.", {}, async () => {
4646
4670
  const health = asyncJobManager.getJobHealth();
4647
4671
  const persistenceBlock = {
4648
4672
  backend: persistence.backend,
@@ -4666,7 +4690,7 @@ export function createGatewayServer(deps = {}) {
4666
4690
  ],
4667
4691
  };
4668
4692
  });
4669
- server.tool("approval_list", {
4693
+ server.tool("approval_list", "List recent MCP-managed approval decisions recorded by the gateway (approvalStrategy: mcp_managed).", {
4670
4694
  limit: z
4671
4695
  .number()
4672
4696
  .int()
@@ -4693,7 +4717,7 @@ export function createGatewayServer(deps = {}) {
4693
4717
  ],
4694
4718
  };
4695
4719
  });
4696
- server.tool("list_models", {
4720
+ server.tool("list_models", "List models, aliases, and defaults for one provider CLI (claude|codex|gemini|grok|mistral).", {
4697
4721
  cli: z
4698
4722
  .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
4699
4723
  .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
@@ -4702,7 +4726,7 @@ export function createGatewayServer(deps = {}) {
4702
4726
  const result = cli ? { [cli]: cliInfo[cli] } : cliInfo;
4703
4727
  return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
4704
4728
  });
4705
- server.tool("cli_versions", {
4729
+ server.tool("cli_versions", "Report installed provider CLI versions, availability, and login status for all five providers or one.", {
4706
4730
  cli: z
4707
4731
  .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
4708
4732
  .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
@@ -4710,7 +4734,7 @@ export function createGatewayServer(deps = {}) {
4710
4734
  const versions = await getCliVersions(cli);
4711
4735
  return { content: [{ type: "text", text: JSON.stringify({ versions }, null, 2) }] };
4712
4736
  });
4713
- server.tool("upstream_contracts", {
4737
+ server.tool("upstream_contracts", "Return the gateway's declared provider CLI contracts; with probeInstalled true, diff against installed --help surfaces to detect flag drift.", {
4714
4738
  cli: z
4715
4739
  .preprocess(value => (value === "" || value === null ? undefined : value), SESSION_PROVIDER_ENUM.optional())
4716
4740
  .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
@@ -4722,7 +4746,7 @@ export function createGatewayServer(deps = {}) {
4722
4746
  const report = buildUpstreamContractReport({ cli, probeInstalled });
4723
4747
  return { content: [{ type: "text", text: JSON.stringify(report, null, 2) }] };
4724
4748
  });
4725
- server.tool("cli_upgrade", {
4749
+ server.tool("cli_upgrade", "Plan (dryRun, default true) or execute an upgrade for one provider CLI using its native update mechanism.", {
4726
4750
  cli: z.enum(["claude", "codex", "gemini", "grok", "mistral"]).describe("CLI to upgrade"),
4727
4751
  target: z
4728
4752
  .string()
@@ -4771,7 +4795,7 @@ export function createGatewayServer(deps = {}) {
4771
4795
  };
4772
4796
  }
4773
4797
  });
4774
- server.tool("session_create", {
4798
+ server.tool("session_create", "Create a gateway session record for a provider CLI. NOTE: this is gateway bookkeeping (gw-* ID), not a provider-native session — Codex resume needs a real Codex UUID.", {
4775
4799
  cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
4776
4800
  description: z.string().optional().describe("Session description"),
4777
4801
  setAsActive: z.boolean().default(true).describe("Set as active session"),
@@ -4804,7 +4828,7 @@ export function createGatewayServer(deps = {}) {
4804
4828
  return createErrorResponse("session_create", 1, "", undefined, error);
4805
4829
  }
4806
4830
  });
4807
- server.tool("session_list", {
4831
+ server.tool("session_list", "List gateway session records and the active session per CLI, optionally filtered by CLI.", {
4808
4832
  cli: SESSION_PROVIDER_ENUM.optional().describe("CLI filter (claude|codex|gemini|grok|mistral)"),
4809
4833
  }, async ({ cli }) => {
4810
4834
  try {
@@ -4847,7 +4871,7 @@ export function createGatewayServer(deps = {}) {
4847
4871
  return createErrorResponse("session_list", 1, "", undefined, error);
4848
4872
  }
4849
4873
  });
4850
- server.tool("session_set_active", {
4874
+ server.tool("session_set_active", "Set or clear the active session for a CLI; the active session is used when a request omits sessionId.", {
4851
4875
  cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
4852
4876
  sessionId: z.string().nullable().describe("Session ID (null to clear)"),
4853
4877
  }, async ({ cli, sessionId }) => {
@@ -4885,7 +4909,7 @@ export function createGatewayServer(deps = {}) {
4885
4909
  return createErrorResponse("session_set_active", 1, "", undefined, error);
4886
4910
  }
4887
4911
  });
4888
- server.tool("session_delete", {
4912
+ server.tool("session_delete", "Delete a gateway session record by ID (also removes any gateway-owned worktree attached to it).", {
4889
4913
  sessionId: z.string().describe("Session ID"),
4890
4914
  }, async ({ sessionId }) => {
4891
4915
  try {
@@ -4926,7 +4950,7 @@ export function createGatewayServer(deps = {}) {
4926
4950
  return createErrorResponse("session_delete", 1, "", undefined, error);
4927
4951
  }
4928
4952
  });
4929
- server.tool("session_get", {
4953
+ server.tool("session_get", "Get one gateway session record by session ID, including recent request history when available.", {
4930
4954
  sessionId: z.string().describe("Session ID"),
4931
4955
  }, async ({ sessionId }) => {
4932
4956
  try {
@@ -4989,7 +5013,7 @@ export function createGatewayServer(deps = {}) {
4989
5013
  return createErrorResponse("session_get", 1, "", undefined, error);
4990
5014
  }
4991
5015
  });
4992
- server.tool("session_clear_all", {
5016
+ server.tool("session_clear_all", "Delete all gateway session records, optionally scoped to one CLI.", {
4993
5017
  cli: SESSION_PROVIDER_ENUM.optional().describe("CLI filter (claude|codex|gemini|grok|mistral)"),
4994
5018
  }, async ({ cli }) => {
4995
5019
  try {
@@ -47,7 +47,7 @@ function findHumanReadableReport(value) {
47
47
  return null;
48
48
  }
49
49
  export function registerValidationTools(server, deps) {
50
- server.tool("validate_with_models", {
50
+ server.tool("validate_with_models", "Ask two or more provider CLIs to independently validate a question. Starts validation jobs — poll with job_status, collect with job_result (not llm_job_*).", {
51
51
  question: z.string().min(1).describe("Question or content to validate."),
52
52
  models: providerListSchema.describe("Providers to ask. Defaults to Claude and Codex."),
53
53
  focus: z
@@ -69,7 +69,7 @@ export function registerValidationTools(server, deps) {
69
69
  judgeProvider: judgeModel,
70
70
  }),
71
71
  }));
72
- server.tool("second_opinion", {
72
+ server.tool("second_opinion", "Ask one provider CLI to review an answer (starts a validation job; poll job_status, collect job_result).", {
73
73
  answer: z.string().min(1).describe("Answer to review."),
74
74
  question: z.string().optional().describe("Original question, if available."),
75
75
  model: providerSchema.default("codex").describe("Provider to ask for the second opinion."),
@@ -84,7 +84,7 @@ export function registerValidationTools(server, deps) {
84
84
  providers: [model],
85
85
  }),
86
86
  }));
87
- server.tool("compare_answers", {
87
+ server.tool("compare_answers", "Summarize agreement/differences between caller-provided answers LOCALLY — does not call any provider.", {
88
88
  question: z.string().min(1).describe("Question the answers respond to."),
89
89
  answers: z.array(z.string().min(1)).min(2).describe("Two or more answers to compare."),
90
90
  }, async ({ question, answers }) => textResponse({
@@ -99,7 +99,7 @@ export function registerValidationTools(server, deps) {
99
99
  note: "Use validate_with_models when independent provider review is needed.",
100
100
  },
101
101
  }));
102
- server.tool("red_team_review", {
102
+ server.tool("red_team_review", "Challenge a plan, answer, or document for risks and failure modes via provider CLIs (starts validation jobs).", {
103
103
  content: z.string().min(1).describe("Plan, answer, or document to challenge."),
104
104
  riskLevel: z
105
105
  .enum(["normal", "high"])
@@ -117,7 +117,7 @@ export function registerValidationTools(server, deps) {
117
117
  riskLevel,
118
118
  }),
119
119
  }));
120
- server.tool("consensus_check", {
120
+ server.tool("consensus_check", "Ask provider CLIs whether they agree or disagree with a claim (starts validation jobs).", {
121
121
  claim: z.string().min(1).describe("Claim to check across providers."),
122
122
  models: providerListSchema.describe("Providers to ask for agreement or disagreement."),
123
123
  }, async ({ claim, models }) => textResponse({
@@ -130,7 +130,7 @@ export function registerValidationTools(server, deps) {
130
130
  providers: models,
131
131
  }),
132
132
  }));
133
- server.tool("ask_model", {
133
+ server.tool("ask_model", "Ask one provider CLI a question through the simplified validation surface (starts a validation job).", {
134
134
  question: z.string().min(1).describe("Question for one provider."),
135
135
  model: providerSchema.default("claude").describe("Provider to ask."),
136
136
  }, async ({ question, model }) => textResponse({
@@ -143,7 +143,7 @@ export function registerValidationTools(server, deps) {
143
143
  providers: [model],
144
144
  }),
145
145
  }));
146
- server.tool("synthesize_validation", {
146
+ server.tool("synthesize_validation", "Run an explicit judge model over already-collected validation results to produce a synthesis.", {
147
147
  question: z.string().min(1).describe("Original request that was validated."),
148
148
  providerResults: z
149
149
  .array(normalizedProviderResultSchema)
@@ -160,8 +160,8 @@ export function registerValidationTools(server, deps) {
160
160
  judgeProvider: judgeModel,
161
161
  }),
162
162
  }));
163
- server.tool("list_available_models", {}, async () => textResponse({ success: true, models: getAvailableCliInfo() }));
164
- server.tool("job_status", {
163
+ server.tool("list_available_models", "List models and capabilities for every available provider CLI (takes no arguments; complements per-provider list_models).", {}, async () => textResponse({ success: true, models: getAvailableCliInfo() }));
164
+ server.tool("job_status", "Check a VALIDATION job's status (jobs started by validate_with_models/ask_model/etc.) — distinct from llm_job_status, which tracks provider request jobs.", {
165
165
  jobId: z.string().min(1).describe("Validation job ID."),
166
166
  }, async ({ jobId }) => {
167
167
  const job = deps.asyncJobManager.getJobSnapshot(jobId);
@@ -170,7 +170,7 @@ export function registerValidationTools(server, deps) {
170
170
  }
171
171
  return textResponse({ success: true, job });
172
172
  });
173
- server.tool("job_result", {
173
+ server.tool("job_result", "Collect a VALIDATION job's normalized provider output — distinct from llm_job_result, which returns raw provider request job output.", {
174
174
  jobId: z.string().min(1).describe("Validation job ID."),
175
175
  provider: providerSchema
176
176
  .optional()
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-cli-gateway",
3
- "version": "2.1.0",
3
+ "version": "2.2.0",
4
4
  "mcpName": "io.github.verivus-oss/llm-cli-gateway",
5
5
  "description": "MCP server providing unified access to Claude Code, Codex, Gemini, Grok, and Mistral Vibe CLIs with session management, retry logic, async job orchestration, durable job results, and cross-LLM validation.",
6
6
  "license": "MIT",