llm-cli-gateway 1.4.0 → 1.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CHANGELOG.md +135 -1
  2. package/README.md +358 -15
  3. package/dist/approval-manager.d.ts +1 -1
  4. package/dist/async-job-manager.d.ts +32 -2
  5. package/dist/async-job-manager.js +101 -16
  6. package/dist/auth.d.ts +15 -0
  7. package/dist/auth.js +46 -0
  8. package/dist/cli-updater.d.ts +19 -2
  9. package/dist/cli-updater.js +110 -7
  10. package/dist/codex-json-parser.d.ts +34 -0
  11. package/dist/codex-json-parser.js +105 -0
  12. package/dist/config.d.ts +30 -0
  13. package/dist/config.js +167 -0
  14. package/dist/doctor.d.ts +110 -0
  15. package/dist/doctor.js +280 -0
  16. package/dist/endpoint-exposure.d.ts +22 -0
  17. package/dist/endpoint-exposure.js +231 -0
  18. package/dist/entrypoint-url.d.ts +1 -0
  19. package/dist/entrypoint-url.js +5 -0
  20. package/dist/executor.d.ts +9 -1
  21. package/dist/executor.js +52 -17
  22. package/dist/flight-recorder.d.ts +3 -1
  23. package/dist/flight-recorder.js +31 -2
  24. package/dist/gateway-server.d.ts +2 -0
  25. package/dist/gateway-server.js +1 -0
  26. package/dist/gemini-json-parser.d.ts +21 -0
  27. package/dist/gemini-json-parser.js +47 -0
  28. package/dist/health.d.ts +7 -0
  29. package/dist/health.js +22 -0
  30. package/dist/http-transport.d.ts +22 -0
  31. package/dist/http-transport.js +164 -0
  32. package/dist/index.d.ts +186 -2
  33. package/dist/index.js +2761 -1454
  34. package/dist/job-store.d.ts +118 -2
  35. package/dist/job-store.js +176 -5
  36. package/dist/logger.d.ts +9 -0
  37. package/dist/logger.js +14 -0
  38. package/dist/model-registry.js +40 -6
  39. package/dist/provider-login-guidance.d.ts +21 -0
  40. package/dist/provider-login-guidance.js +98 -0
  41. package/dist/provider-status.d.ts +41 -0
  42. package/dist/provider-status.js +203 -0
  43. package/dist/request-helpers.d.ts +484 -4
  44. package/dist/request-helpers.js +613 -0
  45. package/dist/resources.js +44 -0
  46. package/dist/session-manager-pg.js +1 -0
  47. package/dist/session-manager.d.ts +1 -1
  48. package/dist/session-manager.js +2 -1
  49. package/dist/upstream-contracts.d.ts +62 -0
  50. package/dist/upstream-contracts.js +620 -0
  51. package/dist/validation-normalizer.d.ts +23 -0
  52. package/dist/validation-normalizer.js +79 -0
  53. package/dist/validation-orchestrator.d.ts +47 -0
  54. package/dist/validation-orchestrator.js +145 -0
  55. package/dist/validation-prompts.d.ts +15 -0
  56. package/dist/validation-prompts.js +52 -0
  57. package/dist/validation-report.d.ts +57 -0
  58. package/dist/validation-report.js +129 -0
  59. package/dist/validation-tools.d.ts +7 -0
  60. package/dist/validation-tools.js +198 -0
  61. package/package.json +25 -10
  62. package/setup/status.schema.json +271 -0
package/dist/index.js CHANGED
@@ -2,32 +2,42 @@
2
2
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
3
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
4
  import { randomUUID } from "crypto";
5
- import { readFileSync, readdirSync, realpathSync } from "fs";
5
+ import { readFileSync, readdirSync } from "fs";
6
6
  import { dirname, join } from "path";
7
7
  import { fileURLToPath } from "url";
8
8
  import { z } from "zod";
9
9
  import { executeCli, killAllProcessGroups } from "./executor.js";
10
10
  import { parseStreamJson } from "./stream-json-parser.js";
11
+ import { parseCodexJsonStream } from "./codex-json-parser.js";
12
+ import { parseGeminiJson } from "./gemini-json-parser.js";
11
13
  import { createSessionManager } from "./session-manager.js";
12
14
  import { ResourceProvider } from "./resources.js";
13
15
  import { PerformanceMetrics } from "./metrics.js";
14
16
  import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse as optimizeResponseText, } from "./optimizer.js";
15
- import { loadConfig } from "./config.js";
17
+ import { loadConfig, loadPersistenceConfig } from "./config.js";
16
18
  import { checkHealth } from "./health.js";
17
19
  import { getCliInfo, resolveModelAlias } from "./model-registry.js";
18
20
  import { AsyncJobManager } from "./async-job-manager.js";
19
- import { JobStore, resolveJobStoreDbPath } from "./job-store.js";
21
+ import { createJobStore } from "./job-store.js";
20
22
  import { ApprovalManager } from "./approval-manager.js";
21
23
  import { checkReviewIntegrity } from "./review-integrity.js";
22
24
  import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES, } from "./claude-mcp-config.js";
23
- import { resolveSessionResumeArgs, resolveGrokSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, GATEWAY_SESSION_PREFIX, } from "./request-helpers.js";
25
+ import { resolveGrokSessionArgs, resolveMistralSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, prepareMistralRequest as buildMistralCliInvocation, MISTRAL_AGENT_MODES, GATEWAY_SESSION_PREFIX, resolveClaudePermissionFlags, resolveCodexSandboxFlags, CLAUDE_PERMISSION_MODES, GEMINI_APPROVAL_MODES, CODEX_SANDBOX_MODES, CODEX_ASK_FOR_APPROVAL_MODES, CLAUDE_EFFORT_LEVELS, prepareClaudeHighImpactFlags, validateClaudeAgentsMap, prepareCodexHighImpactFlags, prepareCodexForkRequest, CODEX_CONFIG_OVERRIDES_SCHEMA, prepareGeminiHighImpactFlags, prependGeminiAttachments, resolveGeminiSessionPlan, GEMINI_HIGH_IMPACT_PARAMS_SCHEMA, } from "./request-helpers.js";
24
26
  import { createFlightRecorder } from "./flight-recorder.js";
25
27
  import { getCliVersions, runCliUpgrade } from "./cli-updater.js";
28
+ import { startHttpGateway } from "./http-transport.js";
29
+ import { printDoctorJson } from "./doctor.js";
30
+ import { registerValidationTools } from "./validation-tools.js";
31
+ import { assertUpstreamCliArgs, assertUpstreamCliEnv, buildUpstreamContractReport, } from "./upstream-contracts.js";
32
+ import { entrypointFileURL } from "./entrypoint-url.js";
26
33
  // Simple logger that writes to stderr (stdout is used for MCP protocol)
27
34
  const logger = {
28
35
  info: (message, ...args) => {
29
36
  console.error(`[INFO] ${new Date().toISOString()} - ${message}`, ...args);
30
37
  },
38
+ warn: (message, ...args) => {
39
+ console.error(`[WARN] ${new Date().toISOString()} - ${message}`, ...args);
40
+ },
31
41
  error: (message, ...args) => {
32
42
  console.error(`[ERROR] ${new Date().toISOString()} - ${message}`, ...args);
33
43
  },
@@ -90,48 +100,87 @@ const loadedSkills = loadSkills();
90
100
  // system prompt at connection time. Covers key patterns + pointers to L2 resources.
91
101
  const SERVER_INSTRUCTIONS = `llm-cli-gateway: Multi-LLM orchestration via MCP.
92
102
 
93
- Tools: claude_request, codex_request, gemini_request, grok_request (sync) | *_request_async (async)
103
+ Tools: claude_request, codex_request, gemini_request, grok_request, mistral_request (sync) | *_request_async (async)
104
+ Validation: validate_with_models, second_opinion, compare_answers, red_team_review, consensus_check, ask_model, synthesize_validation
94
105
  Jobs: llm_job_status, llm_job_result, llm_job_cancel
95
106
  Sessions: session_create, session_list, session_set_active, session_get, session_delete, session_clear_all
96
- Other: list_models, cli_versions, cli_upgrade, approval_list, llm_process_health
107
+ Other: list_models, cli_versions, upstream_contracts, cli_upgrade, approval_list, llm_process_health
97
108
 
98
109
  Key behaviors:
99
110
  - Sync auto-defers at ${SYNC_DEADLINE_MS}ms. Poll deferred jobs via llm_job_status/llm_job_result.
100
- - Sessions: Claude --continue, Gemini --resume, Grok --resume/--continue, Codex \`exec resume <ID>\` / \`exec resume --last\` (all real CLI continuity). For Codex, sessionId must be a real Codex UUID (from ~/.codex/sessions/); gateway-generated gw-* IDs are rejected.
111
+ - Sessions: Claude --continue, Gemini --resume, Grok --resume/--continue, Mistral --resume/--continue (requires session_logging.enabled=true in ~/.vibe/config.toml), Codex \`exec resume <ID>\` / \`exec resume --last\` (all real CLI continuity). For Codex, sessionId must be a real Codex UUID (from ~/.codex/sessions/); gateway-generated gw-* IDs are rejected.
101
112
  - Approval gates: opt-in via approvalStrategy:"mcp_managed".
102
113
  - Idle timeout kills stuck processes (default 10min, configurable via idleTimeoutMs).
103
114
 
104
115
  Skills (full docs via MCP resources):
105
116
  ${loadedSkills.map(s => `- skills://${s.name} — ${s.description}`).join("\n")}`;
106
- const server = new McpServer({ name: "llm-cli-gateway", version: "1.0.0" }, { instructions: SERVER_INSTRUCTIONS });
117
+ function newGatewayMcpServer() {
118
+ return new McpServer({ name: "llm-cli-gateway", version: "1.0.0" }, { instructions: SERVER_INSTRUCTIONS });
119
+ }
107
120
  // Global state (initialized asynchronously)
108
121
  let sessionManager;
109
122
  let db = null;
110
123
  const performanceMetrics = new PerformanceMetrics();
111
124
  let resourceProvider;
112
125
  const flightRecorder = createFlightRecorder(logger);
113
- // Durable job store: persists every async job to ~/.llm-cli-gateway/logs.db so callers
114
- // can collect results across long polling gaps and gateway restarts, and so repeated
115
- // identical requests dedup onto the running/completed job instead of starting over.
126
+ // Resolved persistence config single source of truth for the async-job backend.
127
+ // Driven by ~/.llm-cli-gateway/config.toml (+ deprecated env-var overrides).
128
+ // When backend = "none", the JobStore is null AND *_request_async tools are not
129
+ // registered (see createGatewayServer), making silent in-memory loss
130
+ // structurally impossible.
131
+ const persistenceConfig = loadPersistenceConfig(logger);
116
132
  const jobStore = (() => {
117
- const dbPath = resolveJobStoreDbPath();
118
- if (!dbPath) {
119
- logger.info("Durable job store disabled (LLM_GATEWAY_LOGS_DB=none)");
120
- return null;
121
- }
122
133
  try {
123
- return new JobStore(dbPath, logger);
134
+ return createJobStore(persistenceConfig, logger);
124
135
  }
125
136
  catch (err) {
126
- logger.error("Failed to open durable job store; continuing in-memory only", err);
137
+ logger.error("Failed to open durable job store; async tools will be unavailable", err);
127
138
  return null;
128
139
  }
129
140
  })();
130
- const asyncJobManager = new AsyncJobManager(logger, (cli, durationMs, success) => {
131
- performanceMetrics.recordRequest(cli, durationMs, success);
132
- }, jobStore);
141
+ function newAsyncJobManager(metrics, runtimeLogger, store = jobStore) {
142
+ return new AsyncJobManager(runtimeLogger, (cli, durationMs, success) => {
143
+ metrics.recordRequest(cli, durationMs, success);
144
+ }, store);
145
+ }
146
+ const asyncJobManager = newAsyncJobManager(performanceMetrics, logger);
133
147
  const approvalManager = new ApprovalManager(undefined, logger);
134
148
  const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
149
+ // U22: Session-provider enum extended to five providers. The storage layer's
150
+ // CLI_TYPES already includes "mistral"; the MCP-tool layer mirrors that here so
151
+ // session_create / session_list / session_clear_all accept the fifth provider.
152
+ export const SESSION_PROVIDER_VALUES = ["claude", "codex", "gemini", "grok", "mistral"];
153
+ export const SESSION_PROVIDER_ENUM = z.enum(SESSION_PROVIDER_VALUES);
154
+ let activeServer = null;
155
+ let activeHttpGateway = null;
156
+ function resolveGatewayServerRuntime(deps = {}, options = {}) {
157
+ const runtimeLogger = deps.logger ?? logger;
158
+ const runtimeSessionManager = deps.sessionManager ?? sessionManager;
159
+ const runtimePerformanceMetrics = deps.performanceMetrics ??
160
+ (options.isolateState ? new PerformanceMetrics() : performanceMetrics);
161
+ const runtimeAsyncJobManager = deps.asyncJobManager ??
162
+ (options.isolateState
163
+ ? // Factory-created test/HTTP session servers must not mark another instance's
164
+ // durable jobs orphaned. Stdio startup injects the process-global manager.
165
+ newAsyncJobManager(runtimePerformanceMetrics, runtimeLogger, null)
166
+ : asyncJobManager);
167
+ const runtimeApprovalManager = deps.approvalManager ??
168
+ (options.isolateState ? new ApprovalManager(undefined, runtimeLogger) : approvalManager);
169
+ return {
170
+ sessionManager: runtimeSessionManager,
171
+ resourceProvider: deps.resourceProvider ??
172
+ (options.isolateState
173
+ ? new ResourceProvider(runtimeSessionManager, runtimePerformanceMetrics)
174
+ : resourceProvider),
175
+ db: "db" in deps ? (deps.db ?? null) : db,
176
+ performanceMetrics: runtimePerformanceMetrics,
177
+ asyncJobManager: runtimeAsyncJobManager,
178
+ approvalManager: runtimeApprovalManager,
179
+ flightRecorder: deps.flightRecorder ?? flightRecorder,
180
+ logger: runtimeLogger,
181
+ persistence: deps.persistence ?? persistenceConfig,
182
+ };
183
+ }
135
184
  // Per-CLI idle timeouts: kill process if no stdout/stderr activity for this duration.
136
185
  // Claude idle timeout only applies in stream-json mode (with --include-partial-messages).
137
186
  // In text/json mode, Claude produces no output until done, so idle timeout would false-positive.
@@ -140,6 +189,7 @@ const CLI_IDLE_TIMEOUTS = {
140
189
  codex: 600_000, // 10 minutes — Codex streams stderr progress
141
190
  gemini: 600_000, // 10 minutes — Gemini streams stdout in real-time
142
191
  grok: 600_000, // 10 minutes — Grok streams stderr/stdout activity in headless mode
192
+ mistral: 600_000, // 10 minutes — Vibe streams stdout/stderr in headless mode
143
193
  };
144
194
  function resolveIdleTimeout(cli, override) {
145
195
  if (override !== undefined)
@@ -151,40 +201,91 @@ const SYNC_POLL_INTERVAL_MS = 1_000;
151
201
  * Start an async job and poll until completion or deadline.
152
202
  * Returns the job result if it finishes in time, or a deferral marker.
153
203
  */
154
- async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh) {
204
+ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete) {
205
+ // U26 fix: ownership of onComplete is a contract. Once this function returns
206
+ // OR throws, the caller MUST consider onComplete consumed — i.e. it has
207
+ // either been run, or the AsyncJobManager has taken ownership of it. The
208
+ // caller never needs to reclaim.
209
+ let onCompleteOwnedByCaller = onComplete !== undefined;
210
+ const consumeOnComplete = () => {
211
+ if (!onCompleteOwnedByCaller || !onComplete)
212
+ return;
213
+ onCompleteOwnedByCaller = false;
214
+ try {
215
+ onComplete();
216
+ }
217
+ catch (err) {
218
+ runtime.logger.error(`awaitJobOrDefer onComplete (${cli}) threw`, err);
219
+ }
220
+ };
221
+ try {
222
+ assertUpstreamCliArgs(cli, args);
223
+ assertUpstreamCliEnv(cli, env);
224
+ }
225
+ catch (err) {
226
+ consumeOnComplete();
227
+ throw err;
228
+ }
155
229
  if (SYNC_DEADLINE_MS === 0) {
156
230
  // Disabled — fall through to direct execution.
157
231
  // Note: direct execution bypasses dedup. forceRefresh is implied.
158
- return executeCli(cli, args, { idleTimeout: idleTimeoutMs, logger });
232
+ const command = cli === "mistral" ? "vibe" : cli;
233
+ try {
234
+ return await executeCli(command, args, {
235
+ idleTimeout: idleTimeoutMs,
236
+ logger: runtime.logger,
237
+ env: env ? { ...process.env, ...env } : undefined,
238
+ });
239
+ }
240
+ finally {
241
+ // Direct-execution path completes inline; release per-request resources
242
+ // (e.g. outputSchema temp files) here.
243
+ consumeOnComplete();
244
+ }
245
+ }
246
+ let outcome;
247
+ try {
248
+ outcome = runtime.asyncJobManager.startJobWithDedup(cli, args, corrId, {
249
+ idleTimeoutMs,
250
+ outputFormat,
251
+ forceRefresh,
252
+ env,
253
+ onComplete,
254
+ });
255
+ // Handoff succeeded: AsyncJobManager owns onComplete (it'll fire via
256
+ // fireOnComplete on terminal status, or run inline immediately for dedup).
257
+ onCompleteOwnedByCaller = false;
258
+ }
259
+ catch (err) {
260
+ // Spawn or pre-spawn failure inside AsyncJobManager. The record was never
261
+ // registered, so onComplete will never be called by the manager. Reclaim
262
+ // here so the temp file is not leaked.
263
+ consumeOnComplete();
264
+ throw err;
159
265
  }
160
- const outcome = asyncJobManager.startJobWithDedup(cli, args, corrId, {
161
- idleTimeoutMs,
162
- outputFormat,
163
- forceRefresh,
164
- });
165
266
  const job = outcome.snapshot;
166
267
  if (outcome.deduped) {
167
- logger.info(`[${corrId}] sync request deduped onto running job ${job.id} (original corrId=${outcome.originalCorrelationId})`);
268
+ runtime.logger.info(`[${corrId}] sync request deduped onto running job ${job.id} (original corrId=${outcome.originalCorrelationId})`);
168
269
  }
169
270
  const deadline = Date.now() + SYNC_DEADLINE_MS;
170
271
  while (Date.now() < deadline) {
171
- const snapshot = asyncJobManager.getJobSnapshot(job.id);
272
+ const snapshot = runtime.asyncJobManager.getJobSnapshot(job.id);
172
273
  if (snapshot && snapshot.status !== "running") {
173
274
  // Job finished within deadline — extract result
174
- const result = asyncJobManager.getJobResult(job.id);
275
+ const result = runtime.asyncJobManager.getJobResult(job.id);
175
276
  if (!result) {
176
277
  return { stdout: "", stderr: "Job result unavailable", code: 1 };
177
278
  }
178
279
  return {
179
280
  stdout: result.stdout,
180
- stderr: result.stderr,
281
+ stderr: result.stderr || result.error || "",
181
282
  code: result.exitCode ?? 1,
182
283
  };
183
284
  }
184
285
  await new Promise(resolve => setTimeout(resolve, SYNC_POLL_INTERVAL_MS));
185
286
  }
186
287
  // Deadline exceeded — return deferral
187
- logger.info(`[${corrId}] ${cli} sync deadline exceeded (${SYNC_DEADLINE_MS}ms), deferring to async job ${job.id}`);
288
+ runtime.logger.info(`[${corrId}] ${cli} sync deadline exceeded (${SYNC_DEADLINE_MS}ms), deferring to async job ${job.id}`);
188
289
  return {
189
290
  deferred: true,
190
291
  jobId: job.id,
@@ -262,28 +363,60 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
262
363
  function extractUsageAndCost(cli, output, outputFormat) {
263
364
  if (cli === "claude" && outputFormat === "stream-json") {
264
365
  const parsed = parseStreamJson(output);
366
+ if (!parsed.usage) {
367
+ return { costUsd: parsed.costUsd ?? undefined };
368
+ }
265
369
  return {
266
- inputTokens: parsed.usage?.inputTokens,
267
- outputTokens: parsed.usage?.outputTokens,
370
+ inputTokens: parsed.usage.inputTokens,
371
+ outputTokens: parsed.usage.outputTokens,
372
+ cacheReadTokens: parsed.usage.cacheReadInputTokens || undefined,
373
+ cacheCreationTokens: parsed.usage.cacheCreationInputTokens || undefined,
268
374
  costUsd: parsed.costUsd ?? undefined,
269
375
  };
270
376
  }
377
+ if (cli === "codex" && outputFormat === "json") {
378
+ const parsed = parseCodexJsonStream(output);
379
+ if (!parsed.usage) {
380
+ return {};
381
+ }
382
+ return {
383
+ inputTokens: parsed.usage.input_tokens,
384
+ outputTokens: parsed.usage.output_tokens,
385
+ cacheReadTokens: parsed.usage.cache_read_tokens,
386
+ cacheCreationTokens: parsed.usage.cache_creation_tokens,
387
+ costUsd: parsed.usage.cost_usd,
388
+ };
389
+ }
390
+ if (cli === "gemini" && outputFormat === "json") {
391
+ const parsed = parseGeminiJson(output);
392
+ if (!parsed || !parsed.usage) {
393
+ return {};
394
+ }
395
+ return {
396
+ inputTokens: parsed.usage.input_tokens,
397
+ outputTokens: parsed.usage.output_tokens,
398
+ cacheReadTokens: parsed.usage.cache_read_tokens,
399
+ };
400
+ }
401
+ // Mistral/Vibe: does not surface usage in its stdout/stream-json output. A
402
+ // future unit can read it from `~/.vibe/logs/session/<id>/metadata.json`
403
+ // once we resolve the session id post-run.
271
404
  return {};
272
405
  }
273
- function safeFlightStart(entry) {
406
+ function safeFlightStart(entry, runtime = resolveGatewayServerRuntime()) {
274
407
  try {
275
- flightRecorder.logStart(entry);
408
+ runtime.flightRecorder.logStart(entry);
276
409
  }
277
410
  catch (error) {
278
- logger.error("Flight recorder logStart failed", error);
411
+ runtime.logger.error("Flight recorder logStart failed", error);
279
412
  }
280
413
  }
281
- function safeFlightComplete(correlationId, result) {
414
+ function safeFlightComplete(correlationId, result, runtime = resolveGatewayServerRuntime()) {
282
415
  try {
283
- flightRecorder.logComplete(correlationId, result);
416
+ runtime.flightRecorder.logComplete(correlationId, result);
284
417
  }
285
418
  catch (error) {
286
- logger.error("Flight recorder logComplete failed", error);
419
+ runtime.logger.error("Flight recorder logComplete failed", error);
287
420
  }
288
421
  }
289
422
  function createApprovalDeniedResponse(operation, decision) {
@@ -350,124 +483,146 @@ function resolveClaudeMcpConfig(operation, correlationId, requestedMcpServers, s
350
483
  //──────────────────────────────────────────────────────────────────────────────
351
484
  // MCP Resources
352
485
  //──────────────────────────────────────────────────────────────────────────────
353
- // Register skill resources (L2: full docs, read on demand)
354
- for (const skill of loadedSkills) {
355
- server.registerResource(`skill-${skill.name}`, `skills://${skill.name}`, {
356
- title: skill.name,
357
- description: skill.description,
358
- mimeType: "text/markdown",
359
- }, async () => ({
360
- contents: [
361
- {
362
- uri: `skills://${skill.name}`,
363
- mimeType: "text/markdown",
364
- text: skill.content,
365
- },
366
- ],
367
- }));
486
+ function registerBaseResources(server, runtime) {
487
+ // Register skill resources (L2: full docs, read on demand)
488
+ for (const skill of loadedSkills) {
489
+ server.registerResource(`skill-${skill.name}`, `skills://${skill.name}`, {
490
+ title: skill.name,
491
+ description: skill.description,
492
+ mimeType: "text/markdown",
493
+ }, async () => ({
494
+ contents: [
495
+ {
496
+ uri: `skills://${skill.name}`,
497
+ mimeType: "text/markdown",
498
+ text: skill.content,
499
+ },
500
+ ],
501
+ }));
502
+ }
503
+ runtime.logger.info(`Registered ${loadedSkills.length} skill resources`);
504
+ // Register all sessions resource
505
+ server.registerResource("all-sessions", "sessions://all", {
506
+ title: "📋 All Sessions",
507
+ description: "All conversation sessions across CLIs",
508
+ mimeType: "application/json",
509
+ }, async (uri) => {
510
+ runtime.logger.debug("Reading all sessions resource");
511
+ const contents = await runtime.resourceProvider.readResource(uri.href);
512
+ return { contents: contents ? [contents] : [] };
513
+ });
514
+ // Register Claude sessions resource
515
+ server.registerResource("claude-sessions", "sessions://claude", {
516
+ title: "🤖 Claude Sessions",
517
+ description: "Claude conversation sessions",
518
+ mimeType: "application/json",
519
+ }, async (uri) => {
520
+ runtime.logger.debug("Reading Claude sessions resource");
521
+ const contents = await runtime.resourceProvider.readResource(uri.href);
522
+ return { contents: contents ? [contents] : [] };
523
+ });
524
+ // Register Codex sessions resource
525
+ server.registerResource("codex-sessions", "sessions://codex", {
526
+ title: "💻 Codex Sessions",
527
+ description: "Codex conversation sessions",
528
+ mimeType: "application/json",
529
+ }, async (uri) => {
530
+ runtime.logger.debug("Reading Codex sessions resource");
531
+ const contents = await runtime.resourceProvider.readResource(uri.href);
532
+ return { contents: contents ? [contents] : [] };
533
+ });
534
+ // Register Gemini sessions resource
535
+ server.registerResource("gemini-sessions", "sessions://gemini", {
536
+ title: "✨ Gemini Sessions",
537
+ description: "Gemini conversation sessions",
538
+ mimeType: "application/json",
539
+ }, async (uri) => {
540
+ runtime.logger.debug("Reading Gemini sessions resource");
541
+ const contents = await runtime.resourceProvider.readResource(uri.href);
542
+ return { contents: contents ? [contents] : [] };
543
+ });
544
+ // Register Grok sessions resource
545
+ server.registerResource("grok-sessions", "sessions://grok", {
546
+ title: "⚡ Grok Sessions",
547
+ description: "Grok conversation sessions",
548
+ mimeType: "application/json",
549
+ }, async (uri) => {
550
+ runtime.logger.debug("Reading Grok sessions resource");
551
+ const contents = await runtime.resourceProvider.readResource(uri.href);
552
+ return { contents: contents ? [contents] : [] };
553
+ });
554
+ // Register Mistral sessions resource
555
+ server.registerResource("mistral-sessions", "sessions://mistral", {
556
+ title: "🌬 Mistral Sessions",
557
+ description: "Mistral Vibe conversation sessions",
558
+ mimeType: "application/json",
559
+ }, async (uri) => {
560
+ runtime.logger.debug("Reading Mistral sessions resource");
561
+ const contents = await runtime.resourceProvider.readResource(uri.href);
562
+ return { contents: contents ? [contents] : [] };
563
+ });
564
+ // Register Claude models resource
565
+ server.registerResource("claude-models", "models://claude", {
566
+ title: "🧠 Claude Models",
567
+ description: "Claude models and capabilities",
568
+ mimeType: "application/json",
569
+ }, async (uri) => {
570
+ runtime.logger.debug("Reading Claude models resource");
571
+ const contents = await runtime.resourceProvider.readResource(uri.href);
572
+ return { contents: contents ? [contents] : [] };
573
+ });
574
+ // Register Codex models resource
575
+ server.registerResource("codex-models", "models://codex", {
576
+ title: "🔧 Codex Models",
577
+ description: "Codex models and capabilities",
578
+ mimeType: "application/json",
579
+ }, async (uri) => {
580
+ runtime.logger.debug("Reading Codex models resource");
581
+ const contents = await runtime.resourceProvider.readResource(uri.href);
582
+ return { contents: contents ? [contents] : [] };
583
+ });
584
+ // Register Gemini models resource
585
+ server.registerResource("gemini-models", "models://gemini", {
586
+ title: "🌟 Gemini Models",
587
+ description: "Gemini models and capabilities",
588
+ mimeType: "application/json",
589
+ }, async (uri) => {
590
+ runtime.logger.debug("Reading Gemini models resource");
591
+ const contents = await runtime.resourceProvider.readResource(uri.href);
592
+ return { contents: contents ? [contents] : [] };
593
+ });
594
+ // Register Grok models resource
595
+ server.registerResource("grok-models", "models://grok", {
596
+ title: "⚡ Grok Models",
597
+ description: "Grok models and capabilities",
598
+ mimeType: "application/json",
599
+ }, async (uri) => {
600
+ runtime.logger.debug("Reading Grok models resource");
601
+ const contents = await runtime.resourceProvider.readResource(uri.href);
602
+ return { contents: contents ? [contents] : [] };
603
+ });
604
+ // Register Mistral models resource
605
+ server.registerResource("mistral-models", "models://mistral", {
606
+ title: "🌬 Mistral Models",
607
+ description: "Mistral Vibe models and capabilities",
608
+ mimeType: "application/json",
609
+ }, async (uri) => {
610
+ runtime.logger.debug("Reading Mistral models resource");
611
+ const contents = await runtime.resourceProvider.readResource(uri.href);
612
+ return { contents: contents ? [contents] : [] };
613
+ });
614
+ // Register performance metrics resource
615
+ server.registerResource("performance-metrics", "metrics://performance", {
616
+ title: "📈 Performance Metrics",
617
+ description: "Request counts, latency, success/failure rates",
618
+ mimeType: "application/json",
619
+ }, async (uri) => {
620
+ runtime.logger.debug("Reading performance metrics resource");
621
+ const contents = await runtime.resourceProvider.readResource(uri.href);
622
+ return { contents: contents ? [contents] : [] };
623
+ });
368
624
  }
369
- logger.info(`Registered ${loadedSkills.length} skill resources`);
370
- // Register all sessions resource
371
- server.registerResource("all-sessions", "sessions://all", {
372
- title: "📋 All Sessions",
373
- description: "All conversation sessions across CLIs",
374
- mimeType: "application/json",
375
- }, async (uri) => {
376
- logger.debug("Reading all sessions resource");
377
- const contents = await resourceProvider.readResource(uri.href);
378
- return { contents: contents ? [contents] : [] };
379
- });
380
- // Register Claude sessions resource
381
- server.registerResource("claude-sessions", "sessions://claude", {
382
- title: "🤖 Claude Sessions",
383
- description: "Claude conversation sessions",
384
- mimeType: "application/json",
385
- }, async (uri) => {
386
- logger.debug("Reading Claude sessions resource");
387
- const contents = await resourceProvider.readResource(uri.href);
388
- return { contents: contents ? [contents] : [] };
389
- });
390
- // Register Codex sessions resource
391
- server.registerResource("codex-sessions", "sessions://codex", {
392
- title: "💻 Codex Sessions",
393
- description: "Codex conversation sessions",
394
- mimeType: "application/json",
395
- }, async (uri) => {
396
- logger.debug("Reading Codex sessions resource");
397
- const contents = await resourceProvider.readResource(uri.href);
398
- return { contents: contents ? [contents] : [] };
399
- });
400
- // Register Gemini sessions resource
401
- server.registerResource("gemini-sessions", "sessions://gemini", {
402
- title: "✨ Gemini Sessions",
403
- description: "Gemini conversation sessions",
404
- mimeType: "application/json",
405
- }, async (uri) => {
406
- logger.debug("Reading Gemini sessions resource");
407
- const contents = await resourceProvider.readResource(uri.href);
408
- return { contents: contents ? [contents] : [] };
409
- });
410
- // Register Grok sessions resource
411
- server.registerResource("grok-sessions", "sessions://grok", {
412
- title: "⚡ Grok Sessions",
413
- description: "Grok conversation sessions",
414
- mimeType: "application/json",
415
- }, async (uri) => {
416
- logger.debug("Reading Grok sessions resource");
417
- const contents = await resourceProvider.readResource(uri.href);
418
- return { contents: contents ? [contents] : [] };
419
- });
420
- // Register Claude models resource
421
- server.registerResource("claude-models", "models://claude", {
422
- title: "🧠 Claude Models",
423
- description: "Claude models and capabilities",
424
- mimeType: "application/json",
425
- }, async (uri) => {
426
- logger.debug("Reading Claude models resource");
427
- const contents = await resourceProvider.readResource(uri.href);
428
- return { contents: contents ? [contents] : [] };
429
- });
430
- // Register Codex models resource
431
- server.registerResource("codex-models", "models://codex", {
432
- title: "🔧 Codex Models",
433
- description: "Codex models and capabilities",
434
- mimeType: "application/json",
435
- }, async (uri) => {
436
- logger.debug("Reading Codex models resource");
437
- const contents = await resourceProvider.readResource(uri.href);
438
- return { contents: contents ? [contents] : [] };
439
- });
440
- // Register Gemini models resource
441
- server.registerResource("gemini-models", "models://gemini", {
442
- title: "🌟 Gemini Models",
443
- description: "Gemini models and capabilities",
444
- mimeType: "application/json",
445
- }, async (uri) => {
446
- logger.debug("Reading Gemini models resource");
447
- const contents = await resourceProvider.readResource(uri.href);
448
- return { contents: contents ? [contents] : [] };
449
- });
450
- // Register Grok models resource
451
- server.registerResource("grok-models", "models://grok", {
452
- title: "⚡ Grok Models",
453
- description: "Grok models and capabilities",
454
- mimeType: "application/json",
455
- }, async (uri) => {
456
- logger.debug("Reading Grok models resource");
457
- const contents = await resourceProvider.readResource(uri.href);
458
- return { contents: contents ? [contents] : [] };
459
- });
460
- // Register performance metrics resource
461
- server.registerResource("performance-metrics", "metrics://performance", {
462
- title: "📈 Performance Metrics",
463
- description: "Request counts, latency, success/failure rates",
464
- mimeType: "application/json",
465
- }, async (uri) => {
466
- logger.debug("Reading performance metrics resource");
467
- const contents = await resourceProvider.readResource(uri.href);
468
- return { contents: contents ? [contents] : [] };
469
- });
470
- function prepareClaudeRequest(params) {
625
+ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRuntime()) {
471
626
  const corrId = params.correlationId || randomUUID();
472
627
  const cliInfo = getCliInfo();
473
628
  const resolvedModel = resolveModelAlias("claude", params.model, cliInfo);
@@ -478,7 +633,7 @@ function prepareClaudeRequest(params) {
478
633
  disallowedTools: params.disallowedTools,
479
634
  });
480
635
  if (reviewIntegrity.violations.length > 0) {
481
- logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
636
+ runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
482
637
  cli: "claude",
483
638
  operation: params.operation,
484
639
  score: reviewIntegrity.totalScore,
@@ -498,7 +653,7 @@ function prepareClaudeRequest(params) {
498
653
  const mcpConfig = mcpConfigResolution.config;
499
654
  let approvalDecision = null;
500
655
  if (params.approvalStrategy === "mcp_managed") {
501
- approvalDecision = approvalManager.decide({
656
+ approvalDecision = runtime.approvalManager.decide({
502
657
  cli: "claude",
503
658
  operation: params.operation,
504
659
  prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
@@ -535,8 +690,15 @@ function prepareClaudeRequest(params) {
535
690
  if (params.approvalStrategy === "mcp_managed") {
536
691
  args.push("--permission-mode", "bypassPermissions");
537
692
  }
538
- else if (params.dangerouslySkipPermissions) {
539
- args.push("--permission-mode", "bypassPermissions");
693
+ else {
694
+ const permFlags = resolveClaudePermissionFlags({
695
+ permissionMode: params.permissionMode,
696
+ dangerouslySkipPermissions: params.dangerouslySkipPermissions,
697
+ });
698
+ if (permFlags.warning) {
699
+ runtime.logger.warn(`[${corrId}] ${permFlags.warning}`);
700
+ }
701
+ args.push(...permFlags.args);
540
702
  }
541
703
  if (params.strictMcpConfig || mcpConfig.enabled.length > 0) {
542
704
  args.push("--mcp-config", mcpConfig.path);
@@ -544,6 +706,26 @@ function prepareClaudeRequest(params) {
544
706
  args.push("--strict-mcp-config");
545
707
  }
546
708
  }
709
+ // U25: Claude high-impact features (agent, agents, fork, system-prompt, budget, effort, …)
710
+ let validatedAgents;
711
+ if (params.agents && Object.keys(params.agents).length > 0) {
712
+ const result = validateClaudeAgentsMap(params.agents);
713
+ if (!result.ok) {
714
+ return createErrorResponse("claude", 1, "", corrId, new Error(result.message));
715
+ }
716
+ validatedAgents = result.value;
717
+ }
718
+ args.push(...prepareClaudeHighImpactFlags({
719
+ agent: params.agent,
720
+ agents: validatedAgents,
721
+ forkSession: params.forkSession,
722
+ systemPrompt: params.systemPrompt,
723
+ appendSystemPrompt: params.appendSystemPrompt,
724
+ maxBudgetUsd: params.maxBudgetUsd,
725
+ maxTurns: params.maxTurns,
726
+ effort: params.effort,
727
+ excludeDynamicSystemPromptSections: params.excludeDynamicSystemPromptSections,
728
+ }));
547
729
  return {
548
730
  corrId,
549
731
  effectivePrompt,
@@ -555,14 +737,14 @@ function prepareClaudeRequest(params) {
555
737
  args,
556
738
  };
557
739
  }
558
- function prepareCodexRequest(params) {
740
+ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntime()) {
559
741
  const corrId = params.correlationId || randomUUID();
560
742
  const cliInfo = getCliInfo();
561
743
  const resolvedModel = resolveModelAlias("codex", params.model, cliInfo);
562
744
  // Review integrity check on raw prompt (before optimization)
563
745
  const reviewIntegrity = checkReviewIntegrity({ prompt: params.prompt });
564
746
  if (reviewIntegrity.violations.length > 0) {
565
- logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
747
+ runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
566
748
  cli: "codex",
567
749
  operation: params.operation,
568
750
  score: reviewIntegrity.totalScore,
@@ -577,7 +759,7 @@ function prepareCodexRequest(params) {
577
759
  const requestedMcpServers = normalizeMcpServers(params.mcpServers);
578
760
  let approvalDecision = null;
579
761
  if (params.approvalStrategy === "mcp_managed") {
580
- approvalDecision = approvalManager.decide({
762
+ approvalDecision = runtime.approvalManager.decide({
581
763
  cli: "codex",
582
764
  operation: params.operation,
583
765
  prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
@@ -615,13 +797,73 @@ function prepareCodexRequest(params) {
615
797
  }
616
798
  if (resolvedModel)
617
799
  args.push("--model", resolvedModel);
618
- if (sessionPlan.mode === "new" && params.fullAuto) {
619
- args.push("--full-auto");
800
+ // Codex sandbox / approval: resolve modern flags + legacy fullAuto shorthand.
801
+ // `codex exec resume` rejects all of these (the original session's policy is
802
+ // inherited), so we only emit them when starting a NEW session.
803
+ if (sessionPlan.mode === "new") {
804
+ const sandboxFlags = resolveCodexSandboxFlags({
805
+ sandboxMode: params.sandboxMode,
806
+ askForApproval: params.askForApproval,
807
+ fullAuto: params.fullAuto,
808
+ useLegacyFullAutoFlag: params.useLegacyFullAutoFlag,
809
+ });
810
+ if (sandboxFlags.warning) {
811
+ runtime.logger.warn(`[${corrId}] ${sandboxFlags.warning}`);
812
+ }
813
+ args.push(...sandboxFlags.args);
620
814
  }
621
815
  if (params.dangerouslyBypassApprovalsAndSandbox) {
622
816
  args.push("--dangerously-bypass-approvals-and-sandbox");
623
817
  }
818
+ // U23 fix: emit `--json` when the caller asked for JSON output so the
819
+ // codex-json-parser actually receives JSONL events. This is what makes
820
+ // extractUsageAndCost() reachable from the tool surface; without it, the
821
+ // U23 parser is dead code.
822
+ if (params.outputFormat === "json") {
823
+ args.push("--json");
824
+ }
624
825
  args.push("--skip-git-repo-check");
826
+ // U26: High-impact feature flags. Some of these (`--output-schema`,
827
+ // `--search`, `-C`, `--add-dir`) are rejected by `codex exec resume`, so we
828
+ // only emit them on a NEW session. Images / ephemeral / profile /
829
+ // ignore-rules / ignore-user-config are allowed on resume per the audited
830
+ // CLI help; we emit them in both branches.
831
+ let highImpactCleanup;
832
+ if (sessionPlan.mode === "new") {
833
+ const high = prepareCodexHighImpactFlags({
834
+ outputSchema: params.outputSchema,
835
+ search: params.search,
836
+ profile: params.profile,
837
+ configOverrides: params.configOverrides,
838
+ ephemeral: params.ephemeral,
839
+ images: params.images,
840
+ ignoreUserConfig: params.ignoreUserConfig,
841
+ ignoreRules: params.ignoreRules,
842
+ });
843
+ if (high.missingImagePath) {
844
+ return createErrorResponse(params.operation, 1, "", corrId, new Error(`images: path does not exist: ${high.missingImagePath}`));
845
+ }
846
+ args.push(...high.args);
847
+ highImpactCleanup = high.cleanup;
848
+ }
849
+ else {
850
+ // On resume, emit only the resume-safe subset (profile, ephemeral,
851
+ // images, ignoreUserConfig, ignoreRules). outputSchema, search, and
852
+ // configOverrides are dropped silently to mirror existing behavior for
853
+ // sandbox/ask-for-approval on resume.
854
+ const high = prepareCodexHighImpactFlags({
855
+ profile: params.profile,
856
+ ephemeral: params.ephemeral,
857
+ images: params.images,
858
+ ignoreUserConfig: params.ignoreUserConfig,
859
+ ignoreRules: params.ignoreRules,
860
+ });
861
+ if (high.missingImagePath) {
862
+ return createErrorResponse(params.operation, 1, "", corrId, new Error(`images: path does not exist: ${high.missingImagePath}`));
863
+ }
864
+ args.push(...high.args);
865
+ highImpactCleanup = high.cleanup;
866
+ }
625
867
  if (sessionPlan.mode === "resume-by-id" && sessionPlan.sessionId) {
626
868
  args.push(sessionPlan.sessionId);
627
869
  }
@@ -634,9 +876,10 @@ function prepareCodexRequest(params) {
634
876
  approvalDecision,
635
877
  reviewIntegrity,
636
878
  args,
879
+ cleanup: highImpactCleanup,
637
880
  };
638
881
  }
639
- function prepareGeminiRequest(params) {
882
+ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRuntime()) {
640
883
  const corrId = params.correlationId || randomUUID();
641
884
  const cliInfo = getCliInfo();
642
885
  const resolvedModel = resolveModelAlias("gemini", params.model, cliInfo);
@@ -646,7 +889,7 @@ function prepareGeminiRequest(params) {
646
889
  allowedTools: params.allowedTools,
647
890
  });
648
891
  if (reviewIntegrity.violations.length > 0) {
649
- logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
892
+ runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
650
893
  cli: "gemini",
651
894
  operation: params.operation,
652
895
  score: reviewIntegrity.totalScore,
@@ -661,7 +904,7 @@ function prepareGeminiRequest(params) {
661
904
  const requestedMcpServers = normalizeMcpServers(params.mcpServers);
662
905
  let approvalDecision = null;
663
906
  if (params.approvalStrategy === "mcp_managed") {
664
- approvalDecision = approvalManager.decide({
907
+ approvalDecision = runtime.approvalManager.decide({
665
908
  cli: "gemini",
666
909
  operation: params.operation,
667
910
  prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
@@ -678,7 +921,29 @@ function prepareGeminiRequest(params) {
678
921
  }
679
922
  }
680
923
  const effectiveApprovalMode = params.approvalStrategy === "mcp_managed" ? "yolo" : params.approvalMode;
681
- const args = [effectivePrompt];
924
+ // U27: Validate high-impact policy paths and prepend attachment tokens
925
+ // BEFORE the `-p` pair is emitted, preserving the U21 ordering invariant.
926
+ const highImpact = prepareGeminiHighImpactFlags({
927
+ sandbox: params.sandbox,
928
+ policyFiles: params.policyFiles,
929
+ adminPolicyFiles: params.adminPolicyFiles,
930
+ });
931
+ if (highImpact.missingPolicyPath) {
932
+ return createErrorResponse(params.operation, 1, "", corrId, new Error(`${highImpact.missingPolicyField}: path does not exist: ${highImpact.missingPolicyPath}`));
933
+ }
934
+ if (params.attachments && params.attachments.length > 0) {
935
+ try {
936
+ effectivePrompt = prependGeminiAttachments(effectivePrompt, params.attachments);
937
+ }
938
+ catch (err) {
939
+ return createErrorResponse(params.operation, 1, "", corrId, err instanceof Error ? err : new Error(String(err)));
940
+ }
941
+ }
942
+ // U21: Emit the prompt via -p/--prompt rather than as a positional argument.
943
+ // Positional prompts depend on Gemini's TTY/mode-detection heuristics; -p is
944
+ // the documented non-interactive flag and is robust against future CLI mode
945
+ // changes.
946
+ const args = ["-p", effectivePrompt];
682
947
  if (resolvedModel)
683
948
  args.push("--model", resolvedModel);
684
949
  if (effectiveApprovalMode)
@@ -695,6 +960,15 @@ function prepareGeminiRequest(params) {
695
960
  sanitizeCliArgValues(params.includeDirs, "includeDirs");
696
961
  params.includeDirs.forEach(dir => args.push("--include-directories", dir));
697
962
  }
963
+ // U27 high-impact flags (-s / --policy / --admin-policy) appended after the
964
+ // existing flag set so positional ordering relative to `-p` is preserved.
965
+ args.push(...highImpact.args);
966
+ // U23 fix: emit `-o json` when the caller asked for JSON output. The Gemini
967
+ // JSON parser is otherwise unreachable from the tool surface and the
968
+ // structured usageMetadata is silently dropped.
969
+ if (params.outputFormat === "json") {
970
+ args.push("-o", "json");
971
+ }
698
972
  return {
699
973
  corrId,
700
974
  effectivePrompt,
@@ -705,7 +979,7 @@ function prepareGeminiRequest(params) {
705
979
  args,
706
980
  };
707
981
  }
708
- function prepareGrokRequest(params) {
982
+ function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime()) {
709
983
  const corrId = params.correlationId || randomUUID();
710
984
  const cliInfo = getCliInfo();
711
985
  const resolvedModel = resolveModelAlias("grok", params.model, cliInfo);
@@ -716,7 +990,7 @@ function prepareGrokRequest(params) {
716
990
  disallowedTools: params.disallowedTools,
717
991
  });
718
992
  if (reviewIntegrity.violations.length > 0) {
719
- logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
993
+ runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
720
994
  cli: "grok",
721
995
  operation: params.operation,
722
996
  score: reviewIntegrity.totalScore,
@@ -731,7 +1005,7 @@ function prepareGrokRequest(params) {
731
1005
  const requestedMcpServers = normalizeMcpServers(params.mcpServers);
732
1006
  let approvalDecision = null;
733
1007
  if (params.approvalStrategy === "mcp_managed") {
734
- approvalDecision = approvalManager.decide({
1008
+ approvalDecision = runtime.approvalManager.decide({
735
1009
  cli: "grok",
736
1010
  operation: params.operation,
737
1011
  prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
@@ -780,6 +1054,78 @@ function prepareGrokRequest(params) {
780
1054
  args,
781
1055
  };
782
1056
  }
1057
+ function prepareMistralRequest(params, runtime = resolveGatewayServerRuntime()) {
1058
+ const corrId = params.correlationId || randomUUID();
1059
+ const cliInfo = getCliInfo();
1060
+ const resolvedModel = resolveModelAlias("mistral", params.model, cliInfo) || "devstral-medium";
1061
+ const reviewIntegrity = checkReviewIntegrity({
1062
+ prompt: params.prompt,
1063
+ allowedTools: params.allowedTools,
1064
+ disallowedTools: params.disallowedTools,
1065
+ });
1066
+ if (reviewIntegrity.violations.length > 0) {
1067
+ runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
1068
+ cli: "mistral",
1069
+ operation: params.operation,
1070
+ score: reviewIntegrity.totalScore,
1071
+ });
1072
+ }
1073
+ let effectivePrompt = params.prompt;
1074
+ if (params.optimizePrompt) {
1075
+ const optimized = optimizePromptText(effectivePrompt);
1076
+ logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
1077
+ effectivePrompt = optimized;
1078
+ }
1079
+ const requestedMcpServers = normalizeMcpServers(params.mcpServers);
1080
+ let approvalDecision = null;
1081
+ if (params.approvalStrategy === "mcp_managed") {
1082
+ approvalDecision = runtime.approvalManager.decide({
1083
+ cli: "mistral",
1084
+ operation: params.operation,
1085
+ prompt: params.prompt,
1086
+ bypassRequested: params.permissionMode === "auto-approve",
1087
+ fullAuto: false,
1088
+ requestedMcpServers,
1089
+ allowedTools: params.allowedTools,
1090
+ disallowedTools: params.disallowedTools,
1091
+ policy: params.approvalPolicy,
1092
+ metadata: { model: resolvedModel, vibeActiveModelEnv: true },
1093
+ reviewIntegrity,
1094
+ });
1095
+ if (approvalDecision.status !== "approved") {
1096
+ return createApprovalDeniedResponse(params.operation, approvalDecision);
1097
+ }
1098
+ }
1099
+ // Under mcp_managed, force --agent auto-approve so the approval gate's
1100
+ // verdict carries through to the CLI invocation (mirrors Grok's --always-approve
1101
+ // forcing under mcp_managed).
1102
+ const effectivePermissionMode = params.approvalStrategy === "mcp_managed"
1103
+ ? "auto-approve"
1104
+ : (params.permissionMode ?? "auto-approve");
1105
+ const prep = buildMistralCliInvocation({
1106
+ prompt: effectivePrompt,
1107
+ resolvedModel,
1108
+ outputFormat: params.outputFormat,
1109
+ permissionMode: effectivePermissionMode,
1110
+ effort: params.effort,
1111
+ reasoningEffort: params.reasoningEffort,
1112
+ allowedTools: params.allowedTools,
1113
+ disallowedTools: params.disallowedTools,
1114
+ });
1115
+ if (prep.ignoredDisallowedTools) {
1116
+ runtime.logger.info(`[${corrId}] Mistral does not support disallowedTools; ignoring (caller passed ${params.disallowedTools?.length ?? 0} entries)`);
1117
+ }
1118
+ return {
1119
+ corrId,
1120
+ effectivePrompt,
1121
+ resolvedModel,
1122
+ requestedMcpServers,
1123
+ approvalDecision,
1124
+ reviewIntegrity,
1125
+ args: prep.args,
1126
+ mistralEnv: prep.env,
1127
+ };
1128
+ }
783
1129
  function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat) {
784
1130
  let finalStdout = stdout;
785
1131
  // Skip response optimization for JSON output to prevent corrupting structured data
@@ -831,7 +1177,26 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
831
1177
  }
832
1178
  return response;
833
1179
  }
1180
+ function resolveHandlerRuntime(deps) {
1181
+ if (deps.runtime)
1182
+ return deps.runtime;
1183
+ const asyncDeps = deps;
1184
+ // Older HandlerDeps callers may not provide `warn`; default-route to `info`.
1185
+ const depLogger = deps.logger;
1186
+ const normalizedLogger = {
1187
+ info: depLogger.info,
1188
+ warn: depLogger.warn ?? ((msg, ...rest) => depLogger.info(`[WARN] ${msg}`, ...rest)),
1189
+ error: depLogger.error,
1190
+ debug: depLogger.debug,
1191
+ };
1192
+ return resolveGatewayServerRuntime({
1193
+ sessionManager: deps.sessionManager,
1194
+ logger: normalizedLogger,
1195
+ asyncJobManager: asyncDeps.asyncJobManager,
1196
+ });
1197
+ }
834
1198
  export async function handleGeminiRequest(deps, params) {
1199
+ const runtime = resolveHandlerRuntime(deps);
835
1200
  const startTime = Date.now();
836
1201
  const prep = prepareGeminiRequest({
837
1202
  prompt: params.prompt,
@@ -845,7 +1210,12 @@ export async function handleGeminiRequest(deps, params) {
845
1210
  correlationId: params.correlationId,
846
1211
  optimizePrompt: params.optimizePrompt,
847
1212
  operation: "gemini_request",
848
- });
1213
+ outputFormat: params.outputFormat,
1214
+ sandbox: params.sandbox,
1215
+ policyFiles: params.policyFiles,
1216
+ adminPolicyFiles: params.adminPolicyFiles,
1217
+ attachments: params.attachments,
1218
+ }, runtime);
849
1219
  if (!("args" in prep))
850
1220
  return prep;
851
1221
  const { corrId, args } = prep;
@@ -857,20 +1227,24 @@ export async function handleGeminiRequest(deps, params) {
857
1227
  model: prep.resolvedModel || "default",
858
1228
  prompt: params.prompt,
859
1229
  sessionId: params.sessionId,
860
- });
1230
+ }, runtime);
861
1231
  deps.logger.info(`[${corrId}] gemini_request invoked with model=${prep.resolvedModel || "default"}, approvalMode=${params.approvalMode}, prompt length=${params.prompt.length}`);
862
1232
  try {
863
- // Session arg planning (pure, no I/O)
864
- const sessionResult = resolveSessionResumeArgs({
1233
+ // U27: Session arg planning. For fresh sessions, emit `--session-id <uuid>`
1234
+ // so the gateway and Gemini agree on the session identifier from turn 1.
1235
+ // For resume flows, fall back to `--resume <id>` (existing behavior).
1236
+ const sessionPlan = resolveGeminiSessionPlan({
865
1237
  sessionId: params.sessionId,
866
1238
  resumeLatest: params.resumeLatest,
867
1239
  createNewSession: params.createNewSession,
868
1240
  });
869
- args.push(...sessionResult.resumeArgs);
870
- const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), undefined, params.forceRefresh);
1241
+ args.push(...sessionPlan.args);
1242
+ const userProvidedSession = sessionPlan.resumed;
1243
+ const effectiveSessionIdHint = sessionPlan.emittedSessionId ?? params.sessionId;
1244
+ const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime);
871
1245
  // Deferred — job still running, return async reference
872
1246
  if (isDeferredResponse(result)) {
873
- return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
1247
+ return buildDeferredToolResponse(result, effectiveSessionIdHint);
874
1248
  }
875
1249
  const { stdout, stderr, code } = result;
876
1250
  durationMs = Math.max(0, Date.now() - startTime);
@@ -885,13 +1259,15 @@ export async function handleGeminiRequest(deps, params) {
885
1259
  exitCode: code,
886
1260
  errorMessage: stderr || `Exit code ${code}`,
887
1261
  status: "failed",
888
- });
1262
+ }, runtime);
889
1263
  return createErrorResponse("gemini", code, stderr, corrId);
890
1264
  }
891
1265
  wasSuccessful = true;
892
- // Post-success session I/O (sync handlers: no phantom sessions on CLI failure)
893
- let effectiveSessionId = sessionResult.effectiveSessionId;
894
- if (sessionResult.userProvidedSession && effectiveSessionId) {
1266
+ // U27 Post-success session I/O. Mirror the gateway store 1:1 to whatever
1267
+ // session id Gemini is using (either the user-supplied resume id or the
1268
+ // deterministic --session-id we emitted).
1269
+ let effectiveSessionId = effectiveSessionIdHint;
1270
+ if (effectiveSessionId) {
895
1271
  const existing = await deps.sessionManager.getSession(effectiveSessionId);
896
1272
  if (!existing) {
897
1273
  try {
@@ -905,12 +1281,9 @@ export async function handleGeminiRequest(deps, params) {
905
1281
  }
906
1282
  await deps.sessionManager.updateSessionUsage(effectiveSessionId);
907
1283
  }
908
- else if (!params.createNewSession && !effectiveSessionId) {
909
- const newSession = await deps.sessionManager.createSession("gemini", "Gemini Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
910
- effectiveSessionId = newSession.id;
911
- }
912
1284
  deps.logger.info(`[${corrId}] gemini_request completed successfully in ${durationMs}ms`);
913
- const response = buildCliResponse("gemini", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession);
1285
+ const response = buildCliResponse("gemini", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, userProvidedSession, params.outputFormat);
1286
+ const geminiUsage = extractUsageAndCost("gemini", stdout, params.outputFormat);
914
1287
  safeFlightComplete(corrId, {
915
1288
  response: stdout,
916
1289
  durationMs,
@@ -920,7 +1293,12 @@ export async function handleGeminiRequest(deps, params) {
920
1293
  optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
921
1294
  exitCode: 0,
922
1295
  status: "completed",
923
- });
1296
+ inputTokens: geminiUsage.inputTokens,
1297
+ outputTokens: geminiUsage.outputTokens,
1298
+ cacheReadTokens: geminiUsage.cacheReadTokens,
1299
+ cacheCreationTokens: geminiUsage.cacheCreationTokens,
1300
+ costUsd: geminiUsage.costUsd,
1301
+ }, runtime);
924
1302
  return response;
925
1303
  }
926
1304
  catch (error) {
@@ -935,15 +1313,16 @@ export async function handleGeminiRequest(deps, params) {
935
1313
  exitCode: 1,
936
1314
  errorMessage: error.message,
937
1315
  status: "failed",
938
- });
1316
+ }, runtime);
939
1317
  return createErrorResponse("gemini", 1, "", corrId, error);
940
1318
  }
941
1319
  finally {
942
1320
  const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
943
- performanceMetrics.recordRequest("gemini", finalizedDurationMs, wasSuccessful);
1321
+ runtime.performanceMetrics.recordRequest("gemini", finalizedDurationMs, wasSuccessful);
944
1322
  }
945
1323
  }
946
1324
  export async function handleGeminiRequestAsync(deps, params) {
1325
+ const runtime = resolveHandlerRuntime(deps);
947
1326
  const prep = prepareGeminiRequest({
948
1327
  prompt: params.prompt,
949
1328
  model: params.model,
@@ -956,21 +1335,26 @@ export async function handleGeminiRequestAsync(deps, params) {
956
1335
  correlationId: params.correlationId,
957
1336
  optimizePrompt: params.optimizePrompt,
958
1337
  operation: "gemini_request_async",
959
- });
1338
+ outputFormat: params.outputFormat,
1339
+ sandbox: params.sandbox,
1340
+ policyFiles: params.policyFiles,
1341
+ adminPolicyFiles: params.adminPolicyFiles,
1342
+ attachments: params.attachments,
1343
+ }, runtime);
960
1344
  if (!("args" in prep))
961
1345
  return prep;
962
1346
  const { corrId, args, requestedMcpServers, approvalDecision } = prep;
963
1347
  try {
964
- // Session arg planning (pure, no I/O)
965
- const sessionResult = resolveSessionResumeArgs({
1348
+ // U27: Session arg planning with deterministic --session-id for fresh sessions.
1349
+ const sessionPlan = resolveGeminiSessionPlan({
966
1350
  sessionId: params.sessionId,
967
1351
  resumeLatest: params.resumeLatest,
968
1352
  createNewSession: params.createNewSession,
969
1353
  });
970
- args.push(...sessionResult.resumeArgs);
1354
+ args.push(...sessionPlan.args);
971
1355
  // Pre-start session I/O (async handlers: prevent orphaned jobs)
972
- let effectiveSessionId = sessionResult.effectiveSessionId;
973
- if (sessionResult.userProvidedSession && effectiveSessionId) {
1356
+ let effectiveSessionId = sessionPlan.emittedSessionId ?? params.sessionId;
1357
+ if (effectiveSessionId) {
974
1358
  const existing = await deps.sessionManager.getSession(effectiveSessionId);
975
1359
  if (!existing) {
976
1360
  try {
@@ -984,18 +1368,18 @@ export async function handleGeminiRequestAsync(deps, params) {
984
1368
  }
985
1369
  await deps.sessionManager.updateSessionUsage(effectiveSessionId);
986
1370
  }
987
- else if (!params.createNewSession && !effectiveSessionId) {
988
- const newSession = await deps.sessionManager.createSession("gemini", "Gemini Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
989
- effectiveSessionId = newSession.id;
990
- }
991
- // Start job only after all session I/O succeeds
992
- const job = deps.asyncJobManager.startJob("gemini", args, corrId, undefined, resolveIdleTimeout("gemini", params.idleTimeoutMs), undefined, params.forceRefresh);
1371
+ // Start job only after all session I/O succeeds. U23: forward outputFormat
1372
+ // so AsyncJobManager records it in the durable store (the manager also
1373
+ // surfaces it in the snapshot).
1374
+ assertUpstreamCliArgs("gemini", args);
1375
+ assertUpstreamCliEnv("gemini", undefined);
1376
+ const job = deps.asyncJobManager.startJob("gemini", args, corrId, undefined, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh);
993
1377
  deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
994
1378
  const asyncResponse = {
995
1379
  success: true,
996
1380
  job,
997
1381
  sessionId: effectiveSessionId || null,
998
- resumable: sessionResult.userProvidedSession,
1382
+ resumable: sessionPlan.resumed,
999
1383
  approval: approvalDecision,
1000
1384
  mcpServers: { requested: requestedMcpServers },
1001
1385
  };
@@ -1016,6 +1400,7 @@ export async function handleGeminiRequestAsync(deps, params) {
1016
1400
  }
1017
1401
  }
1018
1402
  export async function handleGrokRequest(deps, params) {
1403
+ const runtime = resolveHandlerRuntime(deps);
1019
1404
  const startTime = Date.now();
1020
1405
  const prep = prepareGrokRequest({
1021
1406
  prompt: params.prompt,
@@ -1033,7 +1418,7 @@ export async function handleGrokRequest(deps, params) {
1033
1418
  correlationId: params.correlationId,
1034
1419
  optimizePrompt: params.optimizePrompt,
1035
1420
  operation: "grok_request",
1036
- });
1421
+ }, runtime);
1037
1422
  if (!("args" in prep))
1038
1423
  return prep;
1039
1424
  const { corrId, args } = prep;
@@ -1045,7 +1430,7 @@ export async function handleGrokRequest(deps, params) {
1045
1430
  model: prep.resolvedModel || "default",
1046
1431
  prompt: params.prompt,
1047
1432
  sessionId: params.sessionId,
1048
- });
1433
+ }, runtime);
1049
1434
  deps.logger.info(`[${corrId}] grok_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode}, prompt length=${params.prompt.length}`);
1050
1435
  try {
1051
1436
  // Session arg planning (pure, no I/O)
@@ -1055,7 +1440,7 @@ export async function handleGrokRequest(deps, params) {
1055
1440
  createNewSession: params.createNewSession,
1056
1441
  });
1057
1442
  args.push(...sessionResult.resumeArgs);
1058
- const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh);
1443
+ const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime);
1059
1444
  // Deferred — job still running, return async reference
1060
1445
  if (isDeferredResponse(result)) {
1061
1446
  return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
@@ -1073,7 +1458,7 @@ export async function handleGrokRequest(deps, params) {
1073
1458
  exitCode: code,
1074
1459
  errorMessage: stderr || `Exit code ${code}`,
1075
1460
  status: "failed",
1076
- });
1461
+ }, runtime);
1077
1462
  return createErrorResponse("grok", code, stderr, corrId);
1078
1463
  }
1079
1464
  wasSuccessful = true;
@@ -1108,7 +1493,7 @@ export async function handleGrokRequest(deps, params) {
1108
1493
  optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
1109
1494
  exitCode: 0,
1110
1495
  status: "completed",
1111
- });
1496
+ }, runtime);
1112
1497
  return response;
1113
1498
  }
1114
1499
  catch (error) {
@@ -1123,15 +1508,16 @@ export async function handleGrokRequest(deps, params) {
1123
1508
  exitCode: 1,
1124
1509
  errorMessage: error.message,
1125
1510
  status: "failed",
1126
- });
1511
+ }, runtime);
1127
1512
  return createErrorResponse("grok", 1, "", corrId, error);
1128
1513
  }
1129
1514
  finally {
1130
1515
  const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
1131
- performanceMetrics.recordRequest("grok", finalizedDurationMs, wasSuccessful);
1516
+ runtime.performanceMetrics.recordRequest("grok", finalizedDurationMs, wasSuccessful);
1132
1517
  }
1133
1518
  }
1134
1519
  export async function handleGrokRequestAsync(deps, params) {
1520
+ const runtime = resolveHandlerRuntime(deps);
1135
1521
  const prep = prepareGrokRequest({
1136
1522
  prompt: params.prompt,
1137
1523
  model: params.model,
@@ -1148,7 +1534,7 @@ export async function handleGrokRequestAsync(deps, params) {
1148
1534
  correlationId: params.correlationId,
1149
1535
  optimizePrompt: params.optimizePrompt,
1150
1536
  operation: "grok_request_async",
1151
- });
1537
+ }, runtime);
1152
1538
  if (!("args" in prep))
1153
1539
  return prep;
1154
1540
  const { corrId, args, requestedMcpServers, approvalDecision } = prep;
@@ -1181,6 +1567,8 @@ export async function handleGrokRequestAsync(deps, params) {
1181
1567
  effectiveSessionId = newSession.id;
1182
1568
  }
1183
1569
  // Start job only after all session I/O succeeds
1570
+ assertUpstreamCliArgs("grok", args);
1571
+ assertUpstreamCliEnv("grok", undefined);
1184
1572
  const job = deps.asyncJobManager.startJob("grok", args, corrId, undefined, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh);
1185
1573
  deps.logger.info(`[${corrId}] grok_request_async started job ${job.id}`);
1186
1574
  const asyncResponse = {
@@ -1207,694 +1595,288 @@ export async function handleGrokRequestAsync(deps, params) {
1207
1595
  return createErrorResponse("grok_request_async", 1, "", corrId, error);
1208
1596
  }
1209
1597
  }
1210
- export async function handleCodexRequestAsync(deps, params) {
1211
- const prep = prepareCodexRequest({
1598
+ export async function handleMistralRequest(deps, params) {
1599
+ const runtime = resolveHandlerRuntime(deps);
1600
+ const startTime = Date.now();
1601
+ const prep = prepareMistralRequest({
1212
1602
  prompt: params.prompt,
1213
1603
  model: params.model,
1214
- fullAuto: params.fullAuto,
1215
- dangerouslyBypassApprovalsAndSandbox: params.dangerouslyBypassApprovalsAndSandbox,
1604
+ outputFormat: params.outputFormat,
1605
+ permissionMode: params.permissionMode,
1606
+ effort: params.effort,
1607
+ reasoningEffort: params.reasoningEffort,
1608
+ allowedTools: params.allowedTools,
1609
+ disallowedTools: params.disallowedTools,
1216
1610
  approvalStrategy: params.approvalStrategy,
1217
1611
  approvalPolicy: params.approvalPolicy,
1218
1612
  mcpServers: params.mcpServers,
1219
- sessionId: params.sessionId,
1220
- resumeLatest: params.resumeLatest,
1221
- createNewSession: params.createNewSession,
1222
1613
  correlationId: params.correlationId,
1223
1614
  optimizePrompt: params.optimizePrompt,
1224
- operation: "codex_request_async",
1225
- });
1615
+ operation: "mistral_request",
1616
+ }, runtime);
1226
1617
  if (!("args" in prep))
1227
1618
  return prep;
1228
- const { corrId, args, requestedMcpServers, approvalDecision } = prep;
1619
+ const { corrId, args, mistralEnv } = prep;
1620
+ let durationMs = 0;
1621
+ let wasSuccessful = false;
1622
+ safeFlightStart({
1623
+ correlationId: corrId,
1624
+ cli: "mistral",
1625
+ model: prep.resolvedModel || "default",
1626
+ prompt: params.prompt,
1627
+ sessionId: params.sessionId,
1628
+ }, runtime);
1629
+ deps.logger.info(`[${corrId}] mistral_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode || "auto-approve"}, prompt length=${params.prompt.length}`);
1229
1630
  try {
1230
- // Pre-start session I/O (async handlers: prevent orphaned jobs)
1231
- let effectiveSessionId = params.sessionId;
1232
- if (!params.createNewSession && !params.sessionId) {
1233
- const activeSession = await deps.sessionManager.getActiveSession("codex");
1234
- if (activeSession) {
1235
- effectiveSessionId = activeSession.id;
1236
- }
1237
- else {
1238
- const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
1239
- effectiveSessionId = newSession.id;
1240
- }
1241
- }
1242
- else if (params.sessionId) {
1243
- await deps.sessionManager.updateSessionUsage(params.sessionId);
1631
+ const sessionResult = resolveMistralSessionArgs({
1632
+ sessionId: params.sessionId,
1633
+ resumeLatest: params.resumeLatest,
1634
+ createNewSession: params.createNewSession,
1635
+ });
1636
+ args.push(...sessionResult.resumeArgs);
1637
+ const result = await awaitJobOrDefer("mistral", args, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, mistralEnv);
1638
+ if (isDeferredResponse(result)) {
1639
+ return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
1244
1640
  }
1245
- else if (params.createNewSession) {
1246
- const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
1247
- effectiveSessionId = newSession.id;
1248
- }
1249
- // Start job only after all session I/O succeeds
1250
- const job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs), undefined, params.forceRefresh);
1251
- deps.logger.info(`[${corrId}] codex_request_async started job ${job.id}`);
1252
- const asyncResponse = {
1253
- success: true,
1254
- job,
1255
- sessionId: effectiveSessionId || null,
1256
- approval: approvalDecision,
1257
- mcpServers: { requested: requestedMcpServers },
1258
- };
1259
- if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
1260
- asyncResponse.reviewIntegrity = prep.reviewIntegrity;
1261
- }
1262
- return {
1263
- content: [
1264
- {
1265
- type: "text",
1266
- text: JSON.stringify(asyncResponse, null, 2),
1267
- },
1268
- ],
1269
- };
1270
- }
1271
- catch (error) {
1272
- return createErrorResponse("codex_request_async", 1, "", corrId, error);
1273
- }
1274
- }
1275
- //──────────────────────────────────────────────────────────────────────────────
1276
- // Claude Code Tool
1277
- //──────────────────────────────────────────────────────────────────────────────
1278
- server.tool("claude_request", {
1279
- prompt: z
1280
- .string()
1281
- .min(1, "Prompt cannot be empty")
1282
- .max(100000, "Prompt too long (max 100k chars)")
1283
- .describe("Prompt text for Claude"),
1284
- model: z
1285
- .string()
1286
- .optional()
1287
- .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
1288
- outputFormat: z
1289
- .enum(["text", "json", "stream-json"])
1290
- .default("text")
1291
- .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
1292
- sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
1293
- continueSession: z.boolean().default(false).describe("Continue active session"),
1294
- createNewSession: z.boolean().default(false).describe("Force new session"),
1295
- allowedTools: z
1296
- .array(z.string())
1297
- .optional()
1298
- .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
1299
- disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
1300
- dangerouslySkipPermissions: z
1301
- .boolean()
1302
- .default(false)
1303
- .describe("Bypass permissions (sandbox only)"),
1304
- approvalStrategy: z
1305
- .enum(["legacy", "mcp_managed"])
1306
- .default("legacy")
1307
- .describe("Approval strategy"),
1308
- approvalPolicy: z
1309
- .enum(["strict", "balanced", "permissive"])
1310
- .optional()
1311
- .describe("Approval policy override"),
1312
- mcpServers: z
1313
- .array(MCP_SERVER_ENUM)
1314
- .default(["sqry"])
1315
- .describe("MCP servers exposed to Claude"),
1316
- strictMcpConfig: z
1317
- .boolean()
1318
- .default(false)
1319
- .describe("Restrict Claude to provided MCP config only"),
1320
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1321
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1322
- optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
1323
- idleTimeoutMs: z
1324
- .number()
1325
- .int()
1326
- .min(30_000)
1327
- .max(3_600_000)
1328
- .optional()
1329
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1330
- forceRefresh: z
1331
- .boolean()
1332
- .default(false)
1333
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1334
- }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
1335
- const startTime = Date.now();
1336
- const prep = prepareClaudeRequest({
1337
- prompt,
1338
- model,
1339
- outputFormat,
1340
- allowedTools,
1341
- disallowedTools,
1342
- dangerouslySkipPermissions,
1343
- approvalStrategy,
1344
- approvalPolicy,
1345
- mcpServers,
1346
- strictMcpConfig,
1347
- correlationId,
1348
- optimizePrompt,
1349
- operation: "claude_request",
1350
- });
1351
- if (!("args" in prep))
1352
- return prep;
1353
- const { corrId, args } = prep;
1354
- let durationMs = 0;
1355
- let wasSuccessful = false;
1356
- safeFlightStart({
1357
- correlationId: corrId,
1358
- cli: "claude",
1359
- model: prep.resolvedModel || "default",
1360
- prompt,
1361
- sessionId,
1362
- });
1363
- logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prompt.length}, sessionId=${sessionId}`);
1364
- try {
1365
- // Session management
1366
- let effectiveSessionId = sessionId;
1367
- let useContinue = continueSession;
1368
- const activeSession = await sessionManager.getActiveSession("claude");
1369
- if (!createNewSession && !continueSession && !sessionId && activeSession) {
1370
- effectiveSessionId = activeSession.id;
1371
- useContinue = true;
1372
- }
1373
- if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
1374
- useContinue = true;
1375
- }
1376
- if (useContinue) {
1377
- args.push("--continue");
1378
- }
1379
- else if (effectiveSessionId) {
1380
- args.push("--session-id", effectiveSessionId);
1381
- await sessionManager.updateSessionUsage(effectiveSessionId);
1382
- }
1383
- // Idle timeout only for stream-json (text/json produce no output until done)
1384
- const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
1385
- const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh);
1386
- // Deferred — job still running, return async reference
1387
- if (isDeferredResponse(result)) {
1388
- return buildDeferredToolResponse(result, effectiveSessionId);
1389
- }
1390
- const { stdout, stderr, code } = result;
1391
- durationMs = Math.max(0, Date.now() - startTime);
1392
- if (code !== 0) {
1393
- logger.info(`[${corrId}] claude_request failed in ${durationMs}ms`);
1394
- safeFlightComplete(corrId, {
1395
- response: stderr || "",
1396
- durationMs,
1397
- retryCount: 0,
1398
- circuitBreakerState: "closed",
1399
- optimizationApplied: optimizePrompt || optimizeResponse,
1400
- exitCode: code,
1401
- errorMessage: stderr || `Exit code ${code}`,
1402
- status: "failed",
1403
- });
1404
- return createErrorResponse("claude", code, stderr, corrId);
1641
+ const { stdout, stderr, code } = result;
1642
+ durationMs = Math.max(0, Date.now() - startTime);
1643
+ if (code !== 0) {
1644
+ deps.logger.info(`[${corrId}] mistral_request failed in ${durationMs}ms`);
1645
+ safeFlightComplete(corrId, {
1646
+ response: stderr || "",
1647
+ durationMs,
1648
+ retryCount: 0,
1649
+ circuitBreakerState: "closed",
1650
+ optimizationApplied: false,
1651
+ exitCode: code,
1652
+ errorMessage: stderr || `Exit code ${code}`,
1653
+ status: "failed",
1654
+ }, runtime);
1655
+ return createErrorResponse("mistral", code, stderr, corrId);
1405
1656
  }
1406
1657
  wasSuccessful = true;
1407
- // If we used a session ID and it's not tracked yet, create a session record
1408
- if (effectiveSessionId) {
1409
- const existingSession = await sessionManager.getSession(effectiveSessionId);
1410
- if (!existingSession) {
1411
- await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
1658
+ let effectiveSessionId = sessionResult.effectiveSessionId;
1659
+ if (sessionResult.userProvidedSession && effectiveSessionId) {
1660
+ const existing = await deps.sessionManager.getSession(effectiveSessionId);
1661
+ if (!existing) {
1662
+ try {
1663
+ await deps.sessionManager.createSession("mistral", "Mistral Session", effectiveSessionId);
1664
+ }
1665
+ catch {
1666
+ const rechecked = await deps.sessionManager.getSession(effectiveSessionId);
1667
+ if (!rechecked)
1668
+ throw new Error(`Failed to create or find session ${effectiveSessionId}`);
1669
+ }
1412
1670
  }
1671
+ await deps.sessionManager.updateSessionUsage(effectiveSessionId);
1413
1672
  }
1414
- logger.info(`[${corrId}] claude_request completed successfully in ${durationMs}ms`);
1415
- // Parse stream-json NDJSON output to extract result text
1416
- if (outputFormat === "stream-json") {
1417
- const parsed = parseStreamJson(stdout);
1418
- if (parsed.costUsd !== null) {
1419
- logger.debug(`[${corrId}] stream-json cost=$${parsed.costUsd}, model=${parsed.model}, turns=${parsed.numTurns}`);
1420
- }
1421
- safeFlightComplete(corrId, {
1422
- response: parsed.text,
1423
- inputTokens: parsed.usage?.inputTokens,
1424
- outputTokens: parsed.usage?.outputTokens,
1425
- durationMs,
1426
- retryCount: 0,
1427
- circuitBreakerState: "closed",
1428
- costUsd: parsed.costUsd ?? undefined,
1429
- optimizationApplied: optimizePrompt || optimizeResponse,
1430
- exitCode: 0,
1431
- status: "completed",
1432
- });
1433
- return buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
1673
+ else if (!params.createNewSession && !effectiveSessionId) {
1674
+ const newSession = await deps.sessionManager.createSession("mistral", "Mistral Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
1675
+ effectiveSessionId = newSession.id;
1434
1676
  }
1677
+ deps.logger.info(`[${corrId}] mistral_request completed successfully in ${durationMs}ms`);
1678
+ const response = buildCliResponse("mistral", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession, params.outputFormat);
1435
1679
  safeFlightComplete(corrId, {
1436
1680
  response: stdout,
1437
1681
  durationMs,
1438
1682
  retryCount: 0,
1439
1683
  circuitBreakerState: "closed",
1440
- optimizationApplied: optimizePrompt || optimizeResponse,
1684
+ approvalDecision: prep.approvalDecision?.status,
1685
+ optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
1441
1686
  exitCode: 0,
1442
1687
  status: "completed",
1443
- });
1444
- return buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
1688
+ }, runtime);
1689
+ return response;
1445
1690
  }
1446
1691
  catch (error) {
1447
1692
  const elapsedMs = Math.max(0, Date.now() - startTime);
1448
- logger.info(`[${corrId}] claude_request threw exception after ${elapsedMs}ms`);
1693
+ deps.logger.info(`[${corrId}] mistral_request threw exception after ${elapsedMs}ms`);
1449
1694
  safeFlightComplete(corrId, {
1450
1695
  response: "",
1451
1696
  durationMs: elapsedMs,
1452
1697
  retryCount: 0,
1453
1698
  circuitBreakerState: "closed",
1454
- optimizationApplied: optimizePrompt || optimizeResponse,
1699
+ optimizationApplied: false,
1455
1700
  exitCode: 1,
1456
1701
  errorMessage: error.message,
1457
1702
  status: "failed",
1458
- });
1459
- return createErrorResponse("claude", 1, "", corrId, error);
1703
+ }, runtime);
1704
+ return createErrorResponse("mistral", 1, "", corrId, error);
1460
1705
  }
1461
1706
  finally {
1462
1707
  const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
1463
- performanceMetrics.recordRequest("claude", finalizedDurationMs, wasSuccessful);
1708
+ runtime.performanceMetrics.recordRequest("mistral", finalizedDurationMs, wasSuccessful);
1464
1709
  }
1465
- });
1466
- //──────────────────────────────────────────────────────────────────────────────
1467
- // Codex Tool
1468
- //──────────────────────────────────────────────────────────────────────────────
1469
- server.tool("codex_request", {
1470
- prompt: z
1471
- .string()
1472
- .min(1, "Prompt cannot be empty")
1473
- .max(100000, "Prompt too long (max 100k chars)")
1474
- .describe("Prompt text for Codex"),
1475
- model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
1476
- fullAuto: z.boolean().default(false).describe("Full-auto mode (sandboxed execution)"),
1477
- dangerouslyBypassApprovalsAndSandbox: z
1478
- .boolean()
1479
- .default(false)
1480
- .describe("Run Codex without approvals/sandbox"),
1481
- approvalStrategy: z
1482
- .enum(["legacy", "mcp_managed"])
1483
- .default("legacy")
1484
- .describe("Approval strategy"),
1485
- approvalPolicy: z
1486
- .enum(["strict", "balanced", "permissive"])
1487
- .optional()
1488
- .describe("Approval policy override"),
1489
- mcpServers: z
1490
- .array(MCP_SERVER_ENUM)
1491
- .default(["sqry"])
1492
- .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
1493
- sessionId: z
1494
- .string()
1495
- .optional()
1496
- .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
1497
- resumeLatest: z
1498
- .boolean()
1499
- .default(false)
1500
- .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
1501
- createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
1502
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1503
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1504
- optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
1505
- idleTimeoutMs: z
1506
- .number()
1507
- .int()
1508
- .min(30_000)
1509
- .max(3_600_000)
1510
- .optional()
1511
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1512
- forceRefresh: z
1513
- .boolean()
1514
- .default(false)
1515
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1516
- }, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
1517
- const startTime = Date.now();
1518
- const prep = prepareCodexRequest({
1519
- prompt,
1520
- model,
1521
- fullAuto,
1522
- dangerouslyBypassApprovalsAndSandbox,
1523
- approvalStrategy,
1524
- approvalPolicy,
1525
- mcpServers,
1526
- sessionId,
1527
- resumeLatest,
1528
- createNewSession,
1529
- correlationId,
1530
- optimizePrompt,
1531
- operation: "codex_request",
1532
- });
1710
+ }
1711
+ export async function handleMistralRequestAsync(deps, params) {
1712
+ const runtime = resolveHandlerRuntime(deps);
1713
+ const prep = prepareMistralRequest({
1714
+ prompt: params.prompt,
1715
+ model: params.model,
1716
+ outputFormat: params.outputFormat,
1717
+ permissionMode: params.permissionMode,
1718
+ effort: params.effort,
1719
+ reasoningEffort: params.reasoningEffort,
1720
+ allowedTools: params.allowedTools,
1721
+ disallowedTools: params.disallowedTools,
1722
+ approvalStrategy: params.approvalStrategy,
1723
+ approvalPolicy: params.approvalPolicy,
1724
+ mcpServers: params.mcpServers,
1725
+ correlationId: params.correlationId,
1726
+ optimizePrompt: params.optimizePrompt,
1727
+ operation: "mistral_request_async",
1728
+ }, runtime);
1533
1729
  if (!("args" in prep))
1534
1730
  return prep;
1535
- const { corrId, args } = prep;
1536
- let durationMs = 0;
1537
- let wasSuccessful = false;
1538
- safeFlightStart({
1539
- correlationId: corrId,
1540
- cli: "codex",
1541
- model: prep.resolvedModel || "default",
1542
- prompt,
1543
- sessionId,
1544
- });
1545
- logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prompt.length}`);
1731
+ const { corrId, args, requestedMcpServers, approvalDecision, mistralEnv } = prep;
1546
1732
  try {
1547
- const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), undefined, forceRefresh);
1548
- // Deferred — job still running, return async reference
1549
- if (isDeferredResponse(result)) {
1550
- return buildDeferredToolResponse(result, sessionId);
1551
- }
1552
- const { stdout, stderr, code } = result;
1553
- durationMs = Math.max(0, Date.now() - startTime);
1554
- if (code !== 0) {
1555
- logger.info(`[${corrId}] codex_request failed in ${durationMs}ms`);
1556
- safeFlightComplete(corrId, {
1557
- response: stderr || "",
1558
- durationMs,
1559
- retryCount: 0,
1560
- circuitBreakerState: "closed",
1561
- optimizationApplied: optimizePrompt || optimizeResponse,
1562
- exitCode: code,
1563
- errorMessage: stderr || `Exit code ${code}`,
1564
- status: "failed",
1565
- });
1566
- return createErrorResponse("codex", code, stderr, corrId);
1567
- }
1568
- wasSuccessful = true;
1569
- // Track session usage
1570
- let effectiveSessionId = sessionId;
1571
- if (!createNewSession && !sessionId) {
1572
- const activeSession = await sessionManager.getActiveSession("codex");
1573
- if (activeSession) {
1574
- effectiveSessionId = activeSession.id;
1575
- }
1576
- else {
1577
- const newSession = await sessionManager.createSession("codex", "Codex Session");
1578
- effectiveSessionId = newSession.id;
1733
+ const sessionResult = resolveMistralSessionArgs({
1734
+ sessionId: params.sessionId,
1735
+ resumeLatest: params.resumeLatest,
1736
+ createNewSession: params.createNewSession,
1737
+ });
1738
+ args.push(...sessionResult.resumeArgs);
1739
+ let effectiveSessionId = sessionResult.effectiveSessionId;
1740
+ if (sessionResult.userProvidedSession && effectiveSessionId) {
1741
+ const existing = await deps.sessionManager.getSession(effectiveSessionId);
1742
+ if (!existing) {
1743
+ try {
1744
+ await deps.sessionManager.createSession("mistral", "Mistral Session", effectiveSessionId);
1745
+ }
1746
+ catch {
1747
+ const rechecked = await deps.sessionManager.getSession(effectiveSessionId);
1748
+ if (!rechecked)
1749
+ throw new Error(`Failed to create or find session ${effectiveSessionId}`);
1750
+ }
1579
1751
  }
1752
+ await deps.sessionManager.updateSessionUsage(effectiveSessionId);
1580
1753
  }
1581
- else if (sessionId) {
1582
- await sessionManager.updateSessionUsage(sessionId);
1583
- }
1584
- else if (createNewSession) {
1585
- const newSession = await sessionManager.createSession("codex", "Codex Session");
1754
+ else if (!params.createNewSession && !effectiveSessionId) {
1755
+ const newSession = await deps.sessionManager.createSession("mistral", "Mistral Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
1586
1756
  effectiveSessionId = newSession.id;
1587
1757
  }
1588
- logger.info(`[${corrId}] codex_request completed successfully in ${durationMs}ms`);
1589
- safeFlightComplete(corrId, {
1590
- response: stdout,
1591
- durationMs,
1592
- retryCount: 0,
1593
- circuitBreakerState: "closed",
1594
- optimizationApplied: optimizePrompt || optimizeResponse,
1595
- exitCode: 0,
1596
- status: "completed",
1597
- });
1598
- return buildCliResponse("codex", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs);
1758
+ assertUpstreamCliArgs("mistral", args);
1759
+ assertUpstreamCliEnv("mistral", mistralEnv);
1760
+ const job = deps.asyncJobManager.startJob("mistral", args, corrId, undefined, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, mistralEnv);
1761
+ deps.logger.info(`[${corrId}] mistral_request_async started job ${job.id}`);
1762
+ const asyncResponse = {
1763
+ success: true,
1764
+ job,
1765
+ sessionId: effectiveSessionId || null,
1766
+ resumable: sessionResult.userProvidedSession,
1767
+ approval: approvalDecision,
1768
+ mcpServers: { requested: requestedMcpServers },
1769
+ };
1770
+ if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
1771
+ asyncResponse.reviewIntegrity = prep.reviewIntegrity;
1772
+ }
1773
+ return {
1774
+ content: [
1775
+ {
1776
+ type: "text",
1777
+ text: JSON.stringify(asyncResponse, null, 2),
1778
+ },
1779
+ ],
1780
+ };
1599
1781
  }
1600
1782
  catch (error) {
1601
- const elapsedMs = Math.max(0, Date.now() - startTime);
1602
- logger.info(`[${corrId}] codex_request threw exception after ${elapsedMs}ms`);
1603
- safeFlightComplete(corrId, {
1604
- response: "",
1605
- durationMs: elapsedMs,
1606
- retryCount: 0,
1607
- circuitBreakerState: "closed",
1608
- optimizationApplied: optimizePrompt || optimizeResponse,
1609
- exitCode: 1,
1610
- errorMessage: error.message,
1611
- status: "failed",
1612
- });
1613
- return createErrorResponse("codex", 1, "", corrId, error);
1783
+ return createErrorResponse("mistral_request_async", 1, "", corrId, error);
1614
1784
  }
1615
- finally {
1616
- const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
1617
- performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
1618
- }
1619
- });
1620
- //──────────────────────────────────────────────────────────────────────────────
1621
- // Gemini Tool
1622
- //──────────────────────────────────────────────────────────────────────────────
1623
- server.tool("gemini_request", {
1624
- prompt: z
1625
- .string()
1626
- .min(1, "Prompt cannot be empty")
1627
- .max(100000, "Prompt too long (max 100k chars)")
1628
- .describe("Prompt text for Gemini"),
1629
- model: z
1630
- .string()
1631
- .optional()
1632
- .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
1633
- sessionId: z.string().optional().describe("Session ID or 'latest'"),
1634
- resumeLatest: z.boolean().default(false).describe("Resume latest session"),
1635
- createNewSession: z.boolean().default(false).describe("Force new session"),
1636
- approvalMode: z
1637
- .enum(["default", "auto_edit", "yolo"])
1638
- .optional()
1639
- .describe("Approval: default|auto_edit|yolo"),
1640
- approvalStrategy: z
1641
- .enum(["legacy", "mcp_managed"])
1642
- .default("legacy")
1643
- .describe("Approval strategy"),
1644
- approvalPolicy: z
1645
- .enum(["strict", "balanced", "permissive"])
1646
- .optional()
1647
- .describe("Approval policy override"),
1648
- mcpServers: z
1649
- .array(MCP_SERVER_ENUM)
1650
- .default(["sqry"])
1651
- .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
1652
- allowedTools: z
1653
- .array(z.string())
1654
- .optional()
1655
- .describe("Allowed tools (['Write','Edit','Bash'])"),
1656
- includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
1657
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1658
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1659
- optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
1660
- idleTimeoutMs: z
1661
- .number()
1662
- .int()
1663
- .min(30_000)
1664
- .max(3_600_000)
1665
- .optional()
1666
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1667
- forceRefresh: z
1668
- .boolean()
1669
- .default(false)
1670
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1671
- }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
1672
- return handleGeminiRequest({ sessionManager, logger }, {
1673
- prompt,
1674
- model,
1675
- sessionId,
1676
- resumeLatest,
1677
- createNewSession,
1678
- approvalMode,
1679
- approvalStrategy,
1680
- approvalPolicy,
1681
- mcpServers,
1682
- allowedTools,
1683
- includeDirs,
1684
- correlationId,
1685
- optimizePrompt,
1686
- optimizeResponse,
1687
- idleTimeoutMs,
1688
- forceRefresh,
1689
- });
1690
- });
1691
- //──────────────────────────────────────────────────────────────────────────────
1692
- // Grok Tool
1693
- //──────────────────────────────────────────────────────────────────────────────
1694
- server.tool("grok_request", {
1695
- prompt: z
1696
- .string()
1697
- .min(1, "Prompt cannot be empty")
1698
- .max(100000, "Prompt too long (max 100k chars)")
1699
- .describe("Prompt text for Grok"),
1700
- model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
1701
- outputFormat: z
1702
- .enum(["plain", "json", "streaming-json"])
1703
- .optional()
1704
- .describe("Output format (plain|json|streaming-json). Grok default is plain."),
1705
- sessionId: z.string().optional().describe("Session ID (user-provided CLI handle for --resume)"),
1706
- resumeLatest: z
1707
- .boolean()
1708
- .default(false)
1709
- .describe("Resume most recent Grok session in cwd (--continue)"),
1710
- createNewSession: z.boolean().default(false).describe("Force new session"),
1711
- alwaysApprove: z
1712
- .boolean()
1713
- .default(false)
1714
- .describe("Auto-approve all tool executions (--always-approve)"),
1715
- permissionMode: z
1716
- .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
1717
- .optional()
1718
- .describe("Grok permission mode"),
1719
- effort: z
1720
- .enum(["low", "medium", "high", "xhigh", "max"])
1721
- .optional()
1722
- .describe("Grok effort level"),
1723
- reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
1724
- approvalStrategy: z
1725
- .enum(["legacy", "mcp_managed"])
1726
- .default("legacy")
1727
- .describe("Approval strategy"),
1728
- approvalPolicy: z
1729
- .enum(["strict", "balanced", "permissive"])
1730
- .optional()
1731
- .describe("Approval policy override"),
1732
- mcpServers: z
1733
- .array(MCP_SERVER_ENUM)
1734
- .default(["sqry"])
1735
- .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
1736
- allowedTools: z
1737
- .array(z.string())
1738
- .optional()
1739
- .describe("Allowed built-in tools (passed as --tools comma list)"),
1740
- disallowedTools: z
1741
- .array(z.string())
1742
- .optional()
1743
- .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
1744
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1745
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1746
- optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
1747
- idleTimeoutMs: z
1748
- .number()
1749
- .int()
1750
- .min(30_000)
1751
- .max(3_600_000)
1752
- .optional()
1753
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1754
- forceRefresh: z
1755
- .boolean()
1756
- .default(false)
1757
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1758
- }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
1759
- return handleGrokRequest({ sessionManager, logger }, {
1760
- prompt,
1761
- model,
1762
- outputFormat,
1763
- sessionId,
1764
- resumeLatest,
1765
- createNewSession,
1766
- alwaysApprove,
1767
- permissionMode,
1768
- effort,
1769
- reasoningEffort,
1770
- approvalStrategy,
1771
- approvalPolicy,
1772
- mcpServers,
1773
- allowedTools,
1774
- disallowedTools,
1775
- correlationId,
1776
- optimizePrompt,
1777
- optimizeResponse,
1778
- idleTimeoutMs,
1779
- forceRefresh,
1780
- });
1781
- });
1782
- //──────────────────────────────────────────────────────────────────────────────
1783
- // Async Long-Running Job Tools (No Time-Bound LLM Execution)
1784
- //──────────────────────────────────────────────────────────────────────────────
1785
- server.tool("claude_request_async", {
1786
- prompt: z
1787
- .string()
1788
- .min(1, "Prompt cannot be empty")
1789
- .max(100000, "Prompt too long (max 100k chars)")
1790
- .describe("Prompt text for Claude"),
1791
- model: z
1792
- .string()
1793
- .optional()
1794
- .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
1795
- outputFormat: z
1796
- .enum(["text", "json", "stream-json"])
1797
- .default("text")
1798
- .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
1799
- sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
1800
- continueSession: z.boolean().default(false).describe("Continue active session"),
1801
- createNewSession: z.boolean().default(false).describe("Force new session"),
1802
- allowedTools: z
1803
- .array(z.string())
1804
- .optional()
1805
- .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
1806
- disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
1807
- dangerouslySkipPermissions: z
1808
- .boolean()
1809
- .default(false)
1810
- .describe("Bypass permissions (sandbox only)"),
1811
- approvalStrategy: z
1812
- .enum(["legacy", "mcp_managed"])
1813
- .default("legacy")
1814
- .describe("Approval strategy"),
1815
- approvalPolicy: z
1816
- .enum(["strict", "balanced", "permissive"])
1817
- .optional()
1818
- .describe("Approval policy override"),
1819
- mcpServers: z
1820
- .array(MCP_SERVER_ENUM)
1821
- .default(["sqry"])
1822
- .describe("MCP servers exposed to Claude"),
1823
- strictMcpConfig: z
1824
- .boolean()
1825
- .default(false)
1826
- .describe("Restrict Claude to provided MCP config only"),
1827
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1828
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1829
- idleTimeoutMs: z
1830
- .number()
1831
- .int()
1832
- .min(30_000)
1833
- .max(3_600_000)
1834
- .optional()
1835
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1836
- forceRefresh: z
1837
- .boolean()
1838
- .default(false)
1839
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1840
- }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
1841
- const prep = prepareClaudeRequest({
1842
- prompt,
1843
- model,
1844
- outputFormat,
1845
- allowedTools,
1846
- disallowedTools,
1847
- dangerouslySkipPermissions,
1848
- approvalStrategy,
1849
- approvalPolicy,
1850
- mcpServers,
1851
- strictMcpConfig,
1852
- correlationId,
1853
- optimizePrompt,
1854
- operation: "claude_request_async",
1855
- });
1785
+ }
1786
+ export async function handleCodexRequestAsync(deps, params) {
1787
+ const runtime = resolveHandlerRuntime(deps);
1788
+ const prep = prepareCodexRequest({
1789
+ prompt: params.prompt,
1790
+ model: params.model,
1791
+ fullAuto: params.fullAuto,
1792
+ sandboxMode: params.sandboxMode,
1793
+ askForApproval: params.askForApproval,
1794
+ useLegacyFullAutoFlag: params.useLegacyFullAutoFlag,
1795
+ dangerouslyBypassApprovalsAndSandbox: params.dangerouslyBypassApprovalsAndSandbox,
1796
+ approvalStrategy: params.approvalStrategy,
1797
+ approvalPolicy: params.approvalPolicy,
1798
+ mcpServers: params.mcpServers,
1799
+ sessionId: params.sessionId,
1800
+ resumeLatest: params.resumeLatest,
1801
+ createNewSession: params.createNewSession,
1802
+ correlationId: params.correlationId,
1803
+ optimizePrompt: params.optimizePrompt,
1804
+ operation: "codex_request_async",
1805
+ outputFormat: params.outputFormat,
1806
+ outputSchema: params.outputSchema,
1807
+ search: params.search,
1808
+ profile: params.profile,
1809
+ configOverrides: params.configOverrides,
1810
+ ephemeral: params.ephemeral,
1811
+ images: params.images,
1812
+ ignoreUserConfig: params.ignoreUserConfig,
1813
+ ignoreRules: params.ignoreRules,
1814
+ }, runtime);
1856
1815
  if (!("args" in prep))
1857
1816
  return prep;
1858
- const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
1817
+ const { corrId, args, requestedMcpServers, approvalDecision } = prep;
1818
+ // U26 fix: outputSchema temp-file ownership. The cleanup callable lives in
1819
+ // exactly one place at a time: this scope until startJob succeeds, then
1820
+ // AsyncJobManager (via onComplete → persistComplete → fireOnComplete) once
1821
+ // the job is registered. Any code path that fails to hand it off MUST run
1822
+ // it locally.
1823
+ const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
1824
+ let prepCleanupOwnedHere = prepCleanup !== undefined;
1825
+ const runPrepCleanupLocally = () => {
1826
+ if (!prepCleanupOwnedHere || !prepCleanup)
1827
+ return;
1828
+ prepCleanupOwnedHere = false;
1829
+ try {
1830
+ prepCleanup();
1831
+ }
1832
+ catch (err) {
1833
+ deps.logger.error(`[${corrId}] codex_request_async outputSchema cleanup threw`, err);
1834
+ }
1835
+ };
1859
1836
  try {
1860
- // Session management (before job start for async)
1861
- let effectiveSessionId = sessionId;
1862
- let useContinue = continueSession;
1863
- const activeSession = await sessionManager.getActiveSession("claude");
1864
- if (!createNewSession && !continueSession && !sessionId && activeSession) {
1865
- effectiveSessionId = activeSession.id;
1866
- useContinue = true;
1837
+ // Pre-start session I/O (async handlers: prevent orphaned jobs)
1838
+ let effectiveSessionId = params.sessionId;
1839
+ if (!params.createNewSession && !params.sessionId) {
1840
+ const activeSession = await deps.sessionManager.getActiveSession("codex");
1841
+ if (activeSession) {
1842
+ effectiveSessionId = activeSession.id;
1843
+ }
1844
+ else {
1845
+ const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
1846
+ effectiveSessionId = newSession.id;
1847
+ }
1867
1848
  }
1868
- if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
1869
- useContinue = true;
1849
+ else if (params.sessionId) {
1850
+ await deps.sessionManager.updateSessionUsage(params.sessionId);
1870
1851
  }
1871
- if (useContinue) {
1872
- args.push("--continue");
1852
+ else if (params.createNewSession) {
1853
+ const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
1854
+ effectiveSessionId = newSession.id;
1873
1855
  }
1874
- else if (effectiveSessionId) {
1875
- args.push("--session-id", effectiveSessionId);
1876
- await sessionManager.updateSessionUsage(effectiveSessionId);
1856
+ // Start job only after all session I/O succeeds. If startJob throws before
1857
+ // registering the record, ownership stays here and we run it in the catch.
1858
+ assertUpstreamCliArgs("codex", args);
1859
+ assertUpstreamCliEnv("codex", undefined);
1860
+ let job;
1861
+ try {
1862
+ job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup);
1863
+ // Handoff succeeded: AsyncJobManager will fire prepCleanup on terminal
1864
+ // status. Release our local ownership claim so the catch path doesn't
1865
+ // double-fire.
1866
+ prepCleanupOwnedHere = false;
1877
1867
  }
1878
- if (effectiveSessionId) {
1879
- const existingSession = await sessionManager.getSession(effectiveSessionId);
1880
- if (!existingSession) {
1881
- await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
1882
- }
1868
+ catch (startErr) {
1869
+ // startJob never stored the record → manager won't call onComplete. We
1870
+ // still own the cleanup; let the outer catch run it.
1871
+ throw startErr;
1883
1872
  }
1884
- // Idle timeout only for stream-json (text/json produce no output until done)
1885
- const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
1886
- const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat, forceRefresh);
1887
- logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
1873
+ deps.logger.info(`[${corrId}] codex_request_async started job ${job.id}`);
1888
1874
  const asyncResponse = {
1889
1875
  success: true,
1890
1876
  job,
1891
- sessionId: effectiveSessionId || activeSession?.id || null,
1877
+ sessionId: effectiveSessionId || null,
1892
1878
  approval: approvalDecision,
1893
- mcpServers: {
1894
- requested: requestedMcpServers,
1895
- enabled: mcpConfig?.enabled,
1896
- missing: mcpConfig?.missing,
1897
- },
1879
+ mcpServers: { requested: requestedMcpServers },
1898
1880
  };
1899
1881
  if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
1900
1882
  asyncResponse.reviewIntegrity = prep.reviewIntegrity;
@@ -1909,667 +1891,1928 @@ server.tool("claude_request_async", {
1909
1891
  };
1910
1892
  }
1911
1893
  catch (error) {
1912
- return createErrorResponse("claude_request_async", 1, "", corrId, error);
1913
- }
1914
- });
1915
- server.tool("codex_request_async", {
1916
- prompt: z
1917
- .string()
1918
- .min(1, "Prompt cannot be empty")
1919
- .max(100000, "Prompt too long (max 100k chars)")
1920
- .describe("Prompt text for Codex"),
1921
- model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
1922
- fullAuto: z.boolean().default(false).describe("Full-auto mode (sandboxed execution)"),
1923
- dangerouslyBypassApprovalsAndSandbox: z
1924
- .boolean()
1925
- .default(false)
1926
- .describe("Run Codex without approvals/sandbox"),
1927
- approvalStrategy: z
1928
- .enum(["legacy", "mcp_managed"])
1929
- .default("legacy")
1930
- .describe("Approval strategy"),
1931
- approvalPolicy: z
1932
- .enum(["strict", "balanced", "permissive"])
1933
- .optional()
1934
- .describe("Approval policy override"),
1935
- mcpServers: z
1936
- .array(MCP_SERVER_ENUM)
1937
- .default(["sqry"])
1938
- .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
1939
- sessionId: z
1940
- .string()
1941
- .optional()
1942
- .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
1943
- resumeLatest: z
1944
- .boolean()
1945
- .default(false)
1946
- .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
1947
- createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
1948
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1949
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1950
- idleTimeoutMs: z
1951
- .number()
1952
- .int()
1953
- .min(30_000)
1954
- .max(3_600_000)
1955
- .optional()
1956
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1957
- forceRefresh: z
1958
- .boolean()
1959
- .default(false)
1960
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1961
- }, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
1962
- return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger }, {
1963
- prompt,
1964
- model,
1965
- fullAuto,
1966
- dangerouslyBypassApprovalsAndSandbox,
1967
- approvalStrategy,
1968
- approvalPolicy,
1969
- mcpServers,
1970
- sessionId,
1971
- resumeLatest,
1972
- createNewSession,
1973
- correlationId,
1974
- optimizePrompt,
1975
- idleTimeoutMs,
1976
- forceRefresh,
1977
- });
1978
- });
1979
- server.tool("gemini_request_async", {
1980
- prompt: z
1981
- .string()
1982
- .min(1, "Prompt cannot be empty")
1983
- .max(100000, "Prompt too long (max 100k chars)")
1984
- .describe("Prompt text for Gemini"),
1985
- model: z
1986
- .string()
1987
- .optional()
1988
- .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
1989
- sessionId: z.string().optional().describe("Session ID (user-provided CLI handle for --resume)"),
1990
- resumeLatest: z.boolean().default(false).describe("Resume latest session"),
1991
- createNewSession: z.boolean().default(false).describe("Force new session"),
1992
- approvalMode: z
1993
- .enum(["default", "auto_edit", "yolo"])
1994
- .optional()
1995
- .describe("Approval: default|auto_edit|yolo"),
1996
- approvalStrategy: z
1997
- .enum(["legacy", "mcp_managed"])
1998
- .default("legacy")
1999
- .describe("Approval strategy"),
2000
- approvalPolicy: z
2001
- .enum(["strict", "balanced", "permissive"])
2002
- .optional()
2003
- .describe("Approval policy override"),
2004
- mcpServers: z
2005
- .array(MCP_SERVER_ENUM)
2006
- .default(["sqry"])
2007
- .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
2008
- allowedTools: z
2009
- .array(z.string())
2010
- .optional()
2011
- .describe("Allowed tools (['Write','Edit','Bash'])"),
2012
- includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
2013
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2014
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2015
- idleTimeoutMs: z
2016
- .number()
2017
- .int()
2018
- .min(30_000)
2019
- .max(3_600_000)
2020
- .optional()
2021
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2022
- forceRefresh: z
2023
- .boolean()
2024
- .default(false)
2025
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2026
- }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
2027
- return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger }, {
2028
- prompt,
2029
- model,
2030
- sessionId,
2031
- resumeLatest,
2032
- createNewSession,
2033
- approvalMode,
2034
- approvalStrategy,
2035
- approvalPolicy,
2036
- mcpServers,
2037
- allowedTools,
2038
- includeDirs,
2039
- correlationId,
2040
- optimizePrompt,
2041
- idleTimeoutMs,
2042
- forceRefresh,
2043
- });
2044
- });
2045
- server.tool("grok_request_async", {
2046
- prompt: z
2047
- .string()
2048
- .min(1, "Prompt cannot be empty")
2049
- .max(100000, "Prompt too long (max 100k chars)")
2050
- .describe("Prompt text for Grok"),
2051
- model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
2052
- outputFormat: z
2053
- .enum(["plain", "json", "streaming-json"])
2054
- .optional()
2055
- .describe("Output format (plain|json|streaming-json). Grok default is plain."),
2056
- sessionId: z.string().optional().describe("Session ID (user-provided CLI handle for --resume)"),
2057
- resumeLatest: z
2058
- .boolean()
2059
- .default(false)
2060
- .describe("Resume most recent Grok session in cwd (--continue)"),
2061
- createNewSession: z.boolean().default(false).describe("Force new session"),
2062
- alwaysApprove: z
2063
- .boolean()
2064
- .default(false)
2065
- .describe("Auto-approve all tool executions (--always-approve)"),
2066
- permissionMode: z
2067
- .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
2068
- .optional()
2069
- .describe("Grok permission mode"),
2070
- effort: z
2071
- .enum(["low", "medium", "high", "xhigh", "max"])
2072
- .optional()
2073
- .describe("Grok effort level"),
2074
- reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
2075
- approvalStrategy: z
2076
- .enum(["legacy", "mcp_managed"])
2077
- .default("legacy")
2078
- .describe("Approval strategy"),
2079
- approvalPolicy: z
2080
- .enum(["strict", "balanced", "permissive"])
2081
- .optional()
2082
- .describe("Approval policy override"),
2083
- mcpServers: z
2084
- .array(MCP_SERVER_ENUM)
2085
- .default(["sqry"])
2086
- .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
2087
- allowedTools: z
2088
- .array(z.string())
2089
- .optional()
2090
- .describe("Allowed built-in tools (passed as --tools comma list)"),
2091
- disallowedTools: z
2092
- .array(z.string())
2093
- .optional()
2094
- .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
2095
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2096
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2097
- idleTimeoutMs: z
2098
- .number()
2099
- .int()
2100
- .min(30_000)
2101
- .max(3_600_000)
2102
- .optional()
2103
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2104
- forceRefresh: z
2105
- .boolean()
2106
- .default(false)
2107
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2108
- }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
2109
- return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger }, {
2110
- prompt,
2111
- model,
2112
- outputFormat,
2113
- sessionId,
2114
- resumeLatest,
2115
- createNewSession,
2116
- alwaysApprove,
2117
- permissionMode,
2118
- effort,
2119
- reasoningEffort,
2120
- approvalStrategy,
2121
- approvalPolicy,
2122
- mcpServers,
2123
- allowedTools,
2124
- disallowedTools,
2125
- correlationId,
2126
- optimizePrompt,
2127
- idleTimeoutMs,
2128
- forceRefresh,
2129
- });
2130
- });
2131
- server.tool("llm_job_status", {
2132
- jobId: z.string().describe("Async job ID from *_request_async"),
2133
- }, async ({ jobId }) => {
2134
- const job = asyncJobManager.getJobSnapshot(jobId);
2135
- if (!job) {
2136
- return {
2137
- content: [
2138
- {
2139
- type: "text",
2140
- text: JSON.stringify({
2141
- success: false,
2142
- error: "Job not found",
2143
- jobId,
2144
- }, null, 2),
2145
- },
2146
- ],
2147
- isError: true,
2148
- };
2149
- }
2150
- return {
2151
- content: [
2152
- {
2153
- type: "text",
2154
- text: JSON.stringify({
2155
- success: true,
2156
- job,
2157
- }, null, 2),
2158
- },
2159
- ],
2160
- };
2161
- });
2162
- server.tool("llm_job_result", {
2163
- jobId: z.string().describe("Async job ID from *_request_async"),
2164
- maxChars: z
2165
- .number()
2166
- .int()
2167
- .min(1000)
2168
- .max(2000000)
2169
- .default(200000)
2170
- .describe("Max chars returned per stream"),
2171
- }, async ({ jobId, maxChars }) => {
2172
- const result = asyncJobManager.getJobResult(jobId, maxChars);
2173
- if (!result) {
2174
- return {
2175
- content: [
2176
- {
2177
- type: "text",
2178
- text: JSON.stringify({
2179
- success: false,
2180
- error: "Job not found",
2181
- jobId,
2182
- }, null, 2),
2183
- },
2184
- ],
2185
- isError: true,
2186
- };
2187
- }
2188
- // Parse stream-json output for Claude async jobs
2189
- const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
2190
- let parsed;
2191
- if (outputFormat === "stream-json" && result.stdout) {
2192
- parsed = parseStreamJson(result.stdout);
2193
- }
2194
- return {
2195
- content: [
2196
- {
2197
- type: "text",
2198
- text: JSON.stringify({
2199
- success: true,
2200
- result,
2201
- ...(parsed
2202
- ? {
2203
- parsed: {
2204
- text: parsed.text,
2205
- costUsd: parsed.costUsd,
2206
- usage: parsed.usage,
2207
- model: parsed.model,
2208
- numTurns: parsed.numTurns,
2209
- },
2210
- }
2211
- : {}),
2212
- }, null, 2),
2213
- },
2214
- ],
2215
- };
2216
- });
2217
- server.tool("llm_job_cancel", {
2218
- jobId: z.string().describe("Async job ID from *_request_async"),
2219
- }, async ({ jobId }) => {
2220
- const cancel = asyncJobManager.cancelJob(jobId);
2221
- if (!cancel.canceled) {
2222
- return {
2223
- content: [
2224
- {
2225
- type: "text",
2226
- text: JSON.stringify({
2227
- success: false,
2228
- jobId,
2229
- reason: cancel.reason || "Unable to cancel",
2230
- }, null, 2),
2231
- },
2232
- ],
2233
- isError: true,
2234
- };
1894
+ // Pre-start failure: either session I/O threw, or startJob threw before
1895
+ // registering the record. In either case the manager will NOT fire
1896
+ // prepCleanup, so we must run it here.
1897
+ runPrepCleanupLocally();
1898
+ return createErrorResponse("codex_request_async", 1, "", corrId, error);
2235
1899
  }
2236
- return {
2237
- content: [
2238
- {
2239
- type: "text",
2240
- text: JSON.stringify({
2241
- success: true,
2242
- jobId,
2243
- }, null, 2),
2244
- },
2245
- ],
2246
- };
2247
- });
2248
- server.tool("llm_process_health", {}, async () => {
2249
- const health = asyncJobManager.getJobHealth();
2250
- return {
2251
- content: [
2252
- {
2253
- type: "text",
2254
- text: JSON.stringify({ success: true, ...health }, null, 2),
2255
- },
2256
- ],
2257
- };
2258
- });
1900
+ }
2259
1901
  //──────────────────────────────────────────────────────────────────────────────
2260
- // Approval Audit Tools
1902
+ // Claude Code Tool
2261
1903
  //──────────────────────────────────────────────────────────────────────────────
2262
- server.tool("approval_list", {
2263
- limit: z.number().int().min(1).max(500).default(50).describe("Max number of approval records"),
2264
- cli: z.enum(["claude", "codex", "gemini"]).optional().describe("Optional CLI filter"),
2265
- }, async ({ limit, cli }) => {
2266
- const approvals = approvalManager.list(limit, cli);
2267
- return {
2268
- content: [
2269
- {
2270
- type: "text",
2271
- text: JSON.stringify({
1904
+ export function createGatewayServer(deps = {}) {
1905
+ const runtime = resolveGatewayServerRuntime(deps, { isolateState: true });
1906
+ const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger, persistence, } = runtime;
1907
+ // Structural invariant: tools register iff ALL THREE conditions hold:
1908
+ // (1) persistence.backend !== "none" — the operator/config has not
1909
+ // explicitly disabled durable persistence;
1910
+ // (2) persistence.asyncJobsEnabled === true — the derived opt-in flag
1911
+ // agrees (loadPersistenceConfig sets this iff backend is one of
1912
+ // sqlite/postgres/memory);
1913
+ // (3) asyncJobManager.hasStore() === true — the runtime manager
1914
+ // actually has a store attached (isolate-mode runtimes use null).
1915
+ //
1916
+ // Each guard closes a distinct re-entry path for the silent-loss footgun:
1917
+ // - Without (1), a caller can inject {backend:'none', asyncJobsEnabled:true}
1918
+ // and re-advertise the async tools while reporting backend='none' in
1919
+ // llm_process_health — exactly contradicting SPEC CLAIM 4f.
1920
+ // - Without (2), config that opts out is ignored.
1921
+ // - Without (3), a null-store manager (isolate-mode / HTTP per-session)
1922
+ // accepts registrations that have nowhere to persist results.
1923
+ const asyncJobsEnabled = persistence.backend !== "none" && persistence.asyncJobsEnabled && asyncJobManager.hasStore();
1924
+ const server = newGatewayMcpServer();
1925
+ registerBaseResources(server, runtime);
1926
+ registerValidationTools(server, { asyncJobManager });
1927
+ server.tool("claude_request", {
1928
+ prompt: z
1929
+ .string()
1930
+ .min(1, "Prompt cannot be empty")
1931
+ .max(100000, "Prompt too long (max 100k chars)")
1932
+ .describe("Prompt text for Claude"),
1933
+ model: z
1934
+ .string()
1935
+ .optional()
1936
+ .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
1937
+ outputFormat: z
1938
+ .enum(["text", "json", "stream-json"])
1939
+ .default("text")
1940
+ .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
1941
+ sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
1942
+ continueSession: z.boolean().default(false).describe("Continue active session"),
1943
+ createNewSession: z.boolean().default(false).describe("Force new session"),
1944
+ allowedTools: z
1945
+ .array(z.string())
1946
+ .optional()
1947
+ .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
1948
+ disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
1949
+ dangerouslySkipPermissions: z
1950
+ .boolean()
1951
+ .default(false)
1952
+ .describe('DEPRECATED: prefer `permissionMode: "bypassPermissions"`. Maps to it when `permissionMode` is unset.'),
1953
+ permissionMode: z
1954
+ .enum(CLAUDE_PERMISSION_MODES)
1955
+ .optional()
1956
+ .describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op (no flag emitted)."),
1957
+ // U25 — Claude high-impact features
1958
+ agent: z
1959
+ .string()
1960
+ .optional()
1961
+ .describe("Claude --agent: dispatch to a named single sub-agent."),
1962
+ agents: z
1963
+ .record(z.record(z.unknown()))
1964
+ .optional()
1965
+ .describe("Claude --agents: inline JSON map of agent name → { description, prompt, tools?, model? }."),
1966
+ forkSession: z
1967
+ .boolean()
1968
+ .optional()
1969
+ .describe("Claude --fork-session: branch from an existing session into a fresh fork."),
1970
+ systemPrompt: z
1971
+ .string()
1972
+ .optional()
1973
+ .describe("Claude --system-prompt: replace the system prompt entirely."),
1974
+ appendSystemPrompt: z
1975
+ .string()
1976
+ .optional()
1977
+ .describe("Claude --append-system-prompt: append to the existing system prompt."),
1978
+ maxBudgetUsd: z
1979
+ .number()
1980
+ .positive()
1981
+ .optional()
1982
+ .describe("Claude --max-budget-usd: spend cap for this request in USD."),
1983
+ maxTurns: z
1984
+ .number()
1985
+ .int()
1986
+ .positive()
1987
+ .optional()
1988
+ .describe("Claude --max-turns: cap on agent loop iterations."),
1989
+ effort: z
1990
+ .enum(CLAUDE_EFFORT_LEVELS)
1991
+ .optional()
1992
+ .describe("Claude --effort: low|medium|high|xhigh|max."),
1993
+ excludeDynamicSystemPromptSections: z
1994
+ .boolean()
1995
+ .optional()
1996
+ .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
1997
+ approvalStrategy: z
1998
+ .enum(["legacy", "mcp_managed"])
1999
+ .default("legacy")
2000
+ .describe("Approval strategy"),
2001
+ approvalPolicy: z
2002
+ .enum(["strict", "balanced", "permissive"])
2003
+ .optional()
2004
+ .describe("Approval policy override"),
2005
+ mcpServers: z
2006
+ .array(MCP_SERVER_ENUM)
2007
+ .default(["sqry"])
2008
+ .describe("MCP servers exposed to Claude"),
2009
+ strictMcpConfig: z
2010
+ .boolean()
2011
+ .default(false)
2012
+ .describe("Restrict Claude to provided MCP config only"),
2013
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2014
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2015
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
2016
+ idleTimeoutMs: z
2017
+ .number()
2018
+ .int()
2019
+ .min(30_000)
2020
+ .max(3_600_000)
2021
+ .optional()
2022
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2023
+ forceRefresh: z
2024
+ .boolean()
2025
+ .default(false)
2026
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2027
+ }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2028
+ const startTime = Date.now();
2029
+ if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
2030
+ return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
2031
+ }
2032
+ const prep = prepareClaudeRequest({
2033
+ prompt,
2034
+ model,
2035
+ outputFormat,
2036
+ allowedTools,
2037
+ disallowedTools,
2038
+ dangerouslySkipPermissions,
2039
+ permissionMode,
2040
+ approvalStrategy,
2041
+ approvalPolicy,
2042
+ mcpServers,
2043
+ strictMcpConfig,
2044
+ correlationId,
2045
+ optimizePrompt,
2046
+ operation: "claude_request",
2047
+ agent,
2048
+ agents,
2049
+ forkSession,
2050
+ systemPrompt,
2051
+ appendSystemPrompt,
2052
+ maxBudgetUsd,
2053
+ maxTurns,
2054
+ effort,
2055
+ excludeDynamicSystemPromptSections,
2056
+ }, runtime);
2057
+ if (!("args" in prep))
2058
+ return prep;
2059
+ const { corrId, args } = prep;
2060
+ let durationMs = 0;
2061
+ let wasSuccessful = false;
2062
+ safeFlightStart({
2063
+ correlationId: corrId,
2064
+ cli: "claude",
2065
+ model: prep.resolvedModel || "default",
2066
+ prompt,
2067
+ sessionId,
2068
+ }, runtime);
2069
+ logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prompt.length}, sessionId=${sessionId}`);
2070
+ try {
2071
+ // Session management
2072
+ let effectiveSessionId = sessionId;
2073
+ let useContinue = continueSession;
2074
+ const activeSession = await sessionManager.getActiveSession("claude");
2075
+ if (!createNewSession && !continueSession && !sessionId && activeSession) {
2076
+ effectiveSessionId = activeSession.id;
2077
+ useContinue = true;
2078
+ }
2079
+ if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
2080
+ useContinue = true;
2081
+ }
2082
+ if (useContinue) {
2083
+ args.push("--continue");
2084
+ }
2085
+ else if (effectiveSessionId) {
2086
+ args.push("--session-id", effectiveSessionId);
2087
+ await sessionManager.updateSessionUsage(effectiveSessionId);
2088
+ }
2089
+ // Idle timeout only for stream-json (text/json produce no output until done)
2090
+ const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
2091
+ const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh, runtime);
2092
+ // Deferred — job still running, return async reference
2093
+ if (isDeferredResponse(result)) {
2094
+ return buildDeferredToolResponse(result, effectiveSessionId);
2095
+ }
2096
+ const { stdout, stderr, code } = result;
2097
+ durationMs = Math.max(0, Date.now() - startTime);
2098
+ if (code !== 0) {
2099
+ logger.info(`[${corrId}] claude_request failed in ${durationMs}ms`);
2100
+ safeFlightComplete(corrId, {
2101
+ response: stderr || "",
2102
+ durationMs,
2103
+ retryCount: 0,
2104
+ circuitBreakerState: "closed",
2105
+ optimizationApplied: optimizePrompt || optimizeResponse,
2106
+ exitCode: code,
2107
+ errorMessage: stderr || `Exit code ${code}`,
2108
+ status: "failed",
2109
+ }, runtime);
2110
+ return createErrorResponse("claude", code, stderr, corrId);
2111
+ }
2112
+ wasSuccessful = true;
2113
+ // If we used a session ID and it's not tracked yet, create a session record
2114
+ if (effectiveSessionId) {
2115
+ const existingSession = await sessionManager.getSession(effectiveSessionId);
2116
+ if (!existingSession) {
2117
+ await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
2118
+ }
2119
+ }
2120
+ logger.info(`[${corrId}] claude_request completed successfully in ${durationMs}ms`);
2121
+ // Parse stream-json NDJSON output to extract result text
2122
+ if (outputFormat === "stream-json") {
2123
+ const parsed = parseStreamJson(stdout);
2124
+ if (parsed.costUsd !== null) {
2125
+ logger.debug(`[${corrId}] stream-json cost=$${parsed.costUsd}, model=${parsed.model}, turns=${parsed.numTurns}`);
2126
+ }
2127
+ safeFlightComplete(corrId, {
2128
+ response: parsed.text,
2129
+ inputTokens: parsed.usage?.inputTokens,
2130
+ outputTokens: parsed.usage?.outputTokens,
2131
+ cacheReadTokens: parsed.usage?.cacheReadInputTokens || undefined,
2132
+ cacheCreationTokens: parsed.usage?.cacheCreationInputTokens || undefined,
2133
+ durationMs,
2134
+ retryCount: 0,
2135
+ circuitBreakerState: "closed",
2136
+ costUsd: parsed.costUsd ?? undefined,
2137
+ optimizationApplied: optimizePrompt || optimizeResponse,
2138
+ exitCode: 0,
2139
+ status: "completed",
2140
+ }, runtime);
2141
+ return buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
2142
+ }
2143
+ safeFlightComplete(corrId, {
2144
+ response: stdout,
2145
+ durationMs,
2146
+ retryCount: 0,
2147
+ circuitBreakerState: "closed",
2148
+ optimizationApplied: optimizePrompt || optimizeResponse,
2149
+ exitCode: 0,
2150
+ status: "completed",
2151
+ }, runtime);
2152
+ return buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
2153
+ }
2154
+ catch (error) {
2155
+ const elapsedMs = Math.max(0, Date.now() - startTime);
2156
+ logger.info(`[${corrId}] claude_request threw exception after ${elapsedMs}ms`);
2157
+ safeFlightComplete(corrId, {
2158
+ response: "",
2159
+ durationMs: elapsedMs,
2160
+ retryCount: 0,
2161
+ circuitBreakerState: "closed",
2162
+ optimizationApplied: optimizePrompt || optimizeResponse,
2163
+ exitCode: 1,
2164
+ errorMessage: error.message,
2165
+ status: "failed",
2166
+ }, runtime);
2167
+ return createErrorResponse("claude", 1, "", corrId, error);
2168
+ }
2169
+ finally {
2170
+ const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
2171
+ performanceMetrics.recordRequest("claude", finalizedDurationMs, wasSuccessful);
2172
+ }
2173
+ });
2174
+ //──────────────────────────────────────────────────────────────────────────────
2175
+ // Codex Tool
2176
+ //──────────────────────────────────────────────────────────────────────────────
2177
+ server.tool("codex_request", {
2178
+ prompt: z
2179
+ .string()
2180
+ .min(1, "Prompt cannot be empty")
2181
+ .max(100000, "Prompt too long (max 100k chars)")
2182
+ .describe("Prompt text for Codex"),
2183
+ model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
2184
+ fullAuto: z
2185
+ .boolean()
2186
+ .default(false)
2187
+ .describe("DEPRECATED: prefer `sandboxMode` + `askForApproval`. Expands to `--sandbox workspace-write --ask-for-approval never`."),
2188
+ sandboxMode: z
2189
+ .enum(CODEX_SANDBOX_MODES)
2190
+ .optional()
2191
+ .describe("Codex --sandbox: read-only|workspace-write|danger-full-access."),
2192
+ askForApproval: z
2193
+ .enum(CODEX_ASK_FOR_APPROVAL_MODES)
2194
+ .optional()
2195
+ .describe("Codex --ask-for-approval: untrusted|on-request|never."),
2196
+ useLegacyFullAutoFlag: z
2197
+ .boolean()
2198
+ .default(false)
2199
+ .describe("Escape hatch: emit `--full-auto` directly instead of expanding (deprecated)."),
2200
+ dangerouslyBypassApprovalsAndSandbox: z
2201
+ .boolean()
2202
+ .default(false)
2203
+ .describe("Run Codex without approvals/sandbox"),
2204
+ approvalStrategy: z
2205
+ .enum(["legacy", "mcp_managed"])
2206
+ .default("legacy")
2207
+ .describe("Approval strategy"),
2208
+ approvalPolicy: z
2209
+ .enum(["strict", "balanced", "permissive"])
2210
+ .optional()
2211
+ .describe("Approval policy override"),
2212
+ mcpServers: z
2213
+ .array(MCP_SERVER_ENUM)
2214
+ .default(["sqry"])
2215
+ .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
2216
+ sessionId: z
2217
+ .string()
2218
+ .optional()
2219
+ .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
2220
+ resumeLatest: z
2221
+ .boolean()
2222
+ .default(false)
2223
+ .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
2224
+ createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
2225
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2226
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2227
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
2228
+ idleTimeoutMs: z
2229
+ .number()
2230
+ .int()
2231
+ .min(30_000)
2232
+ .max(3_600_000)
2233
+ .optional()
2234
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2235
+ forceRefresh: z
2236
+ .boolean()
2237
+ .default(false)
2238
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2239
+ // U23: emit `--json` so the codex-json-parser surfaces input/output/cache
2240
+ // tokens (and any cost) through extractUsageAndCost. Without "json", the
2241
+ // parser is unreachable and Codex usage is never reported.
2242
+ outputFormat: z
2243
+ .enum(["text", "json"])
2244
+ .default("text")
2245
+ .describe("Codex output format. `json` emits --json (JSONL events) so token usage and cost are parsed and reported in the flight recorder. `text` is the default."),
2246
+ // U26: high-impact feature flags. All optional.
2247
+ outputSchema: z
2248
+ .union([z.string(), z.record(z.unknown())])
2249
+ .optional()
2250
+ .describe("Codex --output-schema. Pass a path (string) or an inline JSON Schema object; object is materialised to a 0o600 temp file under os.tmpdir() and deleted after the run."),
2251
+ search: z.boolean().optional().describe("Emit Codex --search to enable web search."),
2252
+ profile: z
2253
+ .string()
2254
+ .optional()
2255
+ .describe("Codex --profile <name>: select a profile from ~/.codex/config.toml."),
2256
+ configOverrides: CODEX_CONFIG_OVERRIDES_SCHEMA.describe("Codex -c key=value overrides. Keys: /^[a-zA-Z0-9._]+$/. Values: no CR/LF."),
2257
+ ephemeral: z
2258
+ .boolean()
2259
+ .optional()
2260
+ .describe("Codex --ephemeral: do not persist the session to disk."),
2261
+ images: z
2262
+ .array(z.string())
2263
+ .optional()
2264
+ .describe("Codex -i <path>: image attachments. Each path must exist; missing paths fail fast."),
2265
+ ignoreUserConfig: z
2266
+ .boolean()
2267
+ .optional()
2268
+ .describe("Codex --ignore-user-config: ignore ~/.codex/config.toml for this run."),
2269
+ ignoreRules: z
2270
+ .boolean()
2271
+ .optional()
2272
+ .describe("Codex --ignore-rules: skip project rule files for this run."),
2273
+ }, async ({ prompt, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
2274
+ const startTime = Date.now();
2275
+ const prep = prepareCodexRequest({
2276
+ prompt,
2277
+ model,
2278
+ fullAuto,
2279
+ sandboxMode,
2280
+ askForApproval,
2281
+ useLegacyFullAutoFlag,
2282
+ dangerouslyBypassApprovalsAndSandbox,
2283
+ approvalStrategy,
2284
+ approvalPolicy,
2285
+ mcpServers,
2286
+ sessionId,
2287
+ resumeLatest,
2288
+ createNewSession,
2289
+ correlationId,
2290
+ optimizePrompt,
2291
+ operation: "codex_request",
2292
+ outputFormat,
2293
+ outputSchema,
2294
+ search,
2295
+ profile,
2296
+ configOverrides,
2297
+ ephemeral,
2298
+ images,
2299
+ ignoreUserConfig,
2300
+ ignoreRules,
2301
+ }, runtime);
2302
+ if (!("args" in prep))
2303
+ return prep;
2304
+ const { corrId, args } = prep;
2305
+ let durationMs = 0;
2306
+ let wasSuccessful = false;
2307
+ safeFlightStart({
2308
+ correlationId: corrId,
2309
+ cli: "codex",
2310
+ model: prep.resolvedModel || "default",
2311
+ prompt,
2312
+ sessionId,
2313
+ }, runtime);
2314
+ logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prompt.length}`);
2315
+ // U26 fix: pass the outputSchema cleanup to awaitJobOrDefer, which
2316
+ // guarantees the cleanup runs exactly once — inline for direct
2317
+ // execution, on terminal status for the job-backed path (sync
2318
+ // completion or deferred). The outer finally MUST NOT clean again.
2319
+ const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
2320
+ try {
2321
+ const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), outputFormat, forceRefresh, runtime, undefined, prepCleanup);
2322
+ // Deferred — job still running, return async reference. Cleanup
2323
+ // ownership belongs to AsyncJobManager via onComplete.
2324
+ if (isDeferredResponse(result)) {
2325
+ return buildDeferredToolResponse(result, sessionId);
2326
+ }
2327
+ const { stdout, stderr, code } = result;
2328
+ durationMs = Math.max(0, Date.now() - startTime);
2329
+ if (code !== 0) {
2330
+ logger.info(`[${corrId}] codex_request failed in ${durationMs}ms`);
2331
+ safeFlightComplete(corrId, {
2332
+ response: stderr || "",
2333
+ durationMs,
2334
+ retryCount: 0,
2335
+ circuitBreakerState: "closed",
2336
+ optimizationApplied: optimizePrompt || optimizeResponse,
2337
+ exitCode: code,
2338
+ errorMessage: stderr || `Exit code ${code}`,
2339
+ status: "failed",
2340
+ }, runtime);
2341
+ return createErrorResponse("codex", code, stderr, corrId);
2342
+ }
2343
+ wasSuccessful = true;
2344
+ // Track session usage
2345
+ let effectiveSessionId = sessionId;
2346
+ if (!createNewSession && !sessionId) {
2347
+ const activeSession = await sessionManager.getActiveSession("codex");
2348
+ if (activeSession) {
2349
+ effectiveSessionId = activeSession.id;
2350
+ }
2351
+ else {
2352
+ const newSession = await sessionManager.createSession("codex", "Codex Session");
2353
+ effectiveSessionId = newSession.id;
2354
+ }
2355
+ }
2356
+ else if (sessionId) {
2357
+ await sessionManager.updateSessionUsage(sessionId);
2358
+ }
2359
+ else if (createNewSession) {
2360
+ const newSession = await sessionManager.createSession("codex", "Codex Session");
2361
+ effectiveSessionId = newSession.id;
2362
+ }
2363
+ logger.info(`[${corrId}] codex_request completed successfully in ${durationMs}ms`);
2364
+ const codexUsage = extractUsageAndCost("codex", stdout, outputFormat);
2365
+ safeFlightComplete(corrId, {
2366
+ response: stdout,
2367
+ durationMs,
2368
+ retryCount: 0,
2369
+ circuitBreakerState: "closed",
2370
+ optimizationApplied: optimizePrompt || optimizeResponse,
2371
+ exitCode: 0,
2372
+ status: "completed",
2373
+ inputTokens: codexUsage.inputTokens,
2374
+ outputTokens: codexUsage.outputTokens,
2375
+ cacheReadTokens: codexUsage.cacheReadTokens,
2376
+ cacheCreationTokens: codexUsage.cacheCreationTokens,
2377
+ costUsd: codexUsage.costUsd,
2378
+ }, runtime);
2379
+ return buildCliResponse("codex", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
2380
+ }
2381
+ catch (error) {
2382
+ const elapsedMs = Math.max(0, Date.now() - startTime);
2383
+ logger.info(`[${corrId}] codex_request threw exception after ${elapsedMs}ms`);
2384
+ safeFlightComplete(corrId, {
2385
+ response: "",
2386
+ durationMs: elapsedMs,
2387
+ retryCount: 0,
2388
+ circuitBreakerState: "closed",
2389
+ optimizationApplied: optimizePrompt || optimizeResponse,
2390
+ exitCode: 1,
2391
+ errorMessage: error.message,
2392
+ status: "failed",
2393
+ }, runtime);
2394
+ return createErrorResponse("codex", 1, "", corrId, error);
2395
+ }
2396
+ finally {
2397
+ const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
2398
+ performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
2399
+ // Cleanup is owned by awaitJobOrDefer's contract; nothing to do here.
2400
+ }
2401
+ });
2402
+ //──────────────────────────────────────────────────────────────────────────────
2403
+ // U26: codex_fork_session — `codex fork <SESSION_ID|--last> <prompt>`
2404
+ //──────────────────────────────────────────────────────────────────────────────
2405
+ server.tool("codex_fork_session", {
2406
+ prompt: z
2407
+ .string()
2408
+ .min(1, "Prompt cannot be empty")
2409
+ .max(100000, "Prompt too long (max 100k chars)")
2410
+ .describe("Prompt text for the forked Codex session"),
2411
+ sessionId: z
2412
+ .string()
2413
+ .optional()
2414
+ .describe("Codex session UUID to fork from. Mutually exclusive with `forkLast`."),
2415
+ forkLast: z
2416
+ .boolean()
2417
+ .optional()
2418
+ .describe("Fork from the most recent Codex session. Mutually exclusive with `sessionId`."),
2419
+ model: z.string().optional().describe("Model name or alias (e.g. gpt-5.5, latest)"),
2420
+ sandboxMode: z
2421
+ .enum(CODEX_SANDBOX_MODES)
2422
+ .optional()
2423
+ .describe("Codex --sandbox: read-only|workspace-write|danger-full-access."),
2424
+ askForApproval: z
2425
+ .enum(CODEX_ASK_FOR_APPROVAL_MODES)
2426
+ .optional()
2427
+ .describe("Codex --ask-for-approval: untrusted|on-request|never."),
2428
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2429
+ idleTimeoutMs: z
2430
+ .number()
2431
+ .int()
2432
+ .min(30_000)
2433
+ .max(3_600_000)
2434
+ .optional()
2435
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2436
+ }, async ({ prompt, sessionId, forkLast, model, sandboxMode, askForApproval, correlationId, idleTimeoutMs, }) => {
2437
+ const corrId = correlationId || randomUUID();
2438
+ const startTime = Date.now();
2439
+ let durationMs = 0;
2440
+ let wasSuccessful = false;
2441
+ // Enforce mutual exclusion at tool boundary (Zod records the params but
2442
+ // the SDK's `.tool(...)` does not accept top-level refines).
2443
+ if (sessionId && forkLast) {
2444
+ return createErrorResponse("codex_fork_session", 1, "", corrId, new Error("sessionId and forkLast are mutually exclusive"));
2445
+ }
2446
+ if (!sessionId && !forkLast) {
2447
+ return createErrorResponse("codex_fork_session", 1, "", corrId, new Error("one of sessionId or forkLast is required"));
2448
+ }
2449
+ let forkArgs;
2450
+ try {
2451
+ forkArgs = prepareCodexForkRequest({ prompt, sessionId, forkLast }).args;
2452
+ }
2453
+ catch (err) {
2454
+ return createErrorResponse("codex_fork_session", 1, "", corrId, err);
2455
+ }
2456
+ const cliInfo = getCliInfo();
2457
+ const resolvedModel = resolveModelAlias("codex", model, cliInfo);
2458
+ // Compose argv: forkArgs already starts with `fork`. Inject model and
2459
+ // sandbox/approval flags BEFORE the positional <sessionId|--last> +
2460
+ // prompt to keep them as flags rather than positionals. forkArgs layout
2461
+ // is either ["fork", "--last", prompt] or ["fork", sessionId, prompt];
2462
+ // we splice flags right after "fork".
2463
+ const flagSegment = [];
2464
+ if (resolvedModel)
2465
+ flagSegment.push("--model", resolvedModel);
2466
+ const sandboxFlags = resolveCodexSandboxFlags({
2467
+ sandboxMode,
2468
+ askForApproval,
2469
+ });
2470
+ if (sandboxFlags.warning) {
2471
+ logger.warn(`[${corrId}] ${sandboxFlags.warning}`);
2472
+ }
2473
+ flagSegment.push(...sandboxFlags.args);
2474
+ const finalArgs = [forkArgs[0], ...flagSegment, ...forkArgs.slice(1)];
2475
+ logger.info(`[${corrId}] codex_fork_session invoked (forkLast=${Boolean(forkLast)}, sessionId=${sessionId ? "set" : "unset"})`);
2476
+ try {
2477
+ const result = await awaitJobOrDefer("codex", finalArgs, corrId, resolveIdleTimeout("codex", idleTimeoutMs), undefined, false, runtime);
2478
+ if (isDeferredResponse(result)) {
2479
+ return buildDeferredToolResponse(result, sessionId);
2480
+ }
2481
+ const { stdout, stderr, code } = result;
2482
+ durationMs = Math.max(0, Date.now() - startTime);
2483
+ if (code !== 0) {
2484
+ return createErrorResponse("codex", code, stderr, corrId);
2485
+ }
2486
+ wasSuccessful = true;
2487
+ return {
2488
+ content: [{ type: "text", text: stdout }],
2489
+ };
2490
+ }
2491
+ catch (error) {
2492
+ return createErrorResponse("codex_fork_session", 1, "", corrId, error);
2493
+ }
2494
+ finally {
2495
+ const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
2496
+ performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
2497
+ }
2498
+ });
2499
+ //──────────────────────────────────────────────────────────────────────────────
2500
+ // Gemini Tool
2501
+ //──────────────────────────────────────────────────────────────────────────────
2502
+ server.tool("gemini_request", {
2503
+ prompt: z
2504
+ .string()
2505
+ .min(1, "Prompt cannot be empty")
2506
+ .max(100000, "Prompt too long (max 100k chars)")
2507
+ .describe("Prompt text for Gemini"),
2508
+ model: z
2509
+ .string()
2510
+ .optional()
2511
+ .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
2512
+ sessionId: z.string().optional().describe("Session ID or 'latest'"),
2513
+ resumeLatest: z.boolean().default(false).describe("Resume latest session"),
2514
+ createNewSession: z.boolean().default(false).describe("Force new session"),
2515
+ approvalMode: z
2516
+ .enum(GEMINI_APPROVAL_MODES)
2517
+ .optional()
2518
+ .describe("Approval: default|auto_edit|yolo|plan"),
2519
+ approvalStrategy: z
2520
+ .enum(["legacy", "mcp_managed"])
2521
+ .default("legacy")
2522
+ .describe("Approval strategy"),
2523
+ approvalPolicy: z
2524
+ .enum(["strict", "balanced", "permissive"])
2525
+ .optional()
2526
+ .describe("Approval policy override"),
2527
+ mcpServers: z
2528
+ .array(MCP_SERVER_ENUM)
2529
+ .default(["sqry"])
2530
+ .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
2531
+ allowedTools: z
2532
+ .array(z.string())
2533
+ .optional()
2534
+ .describe("Allowed tools (['Write','Edit','Bash'])"),
2535
+ includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
2536
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2537
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2538
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
2539
+ idleTimeoutMs: z
2540
+ .number()
2541
+ .int()
2542
+ .min(30_000)
2543
+ .max(3_600_000)
2544
+ .optional()
2545
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2546
+ forceRefresh: z
2547
+ .boolean()
2548
+ .default(false)
2549
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2550
+ // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
2551
+ // remains text so existing callers see no behavior change.
2552
+ outputFormat: z
2553
+ .enum(["text", "json"])
2554
+ .default("text")
2555
+ .describe("Gemini output format. `json` emits `-o json` so usageMetadata is parsed and reported."),
2556
+ sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
2557
+ policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
2558
+ adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
2559
+ attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
2560
+ }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
2561
+ return handleGeminiRequest({ sessionManager, logger, runtime }, {
2562
+ prompt,
2563
+ model,
2564
+ sessionId,
2565
+ resumeLatest,
2566
+ createNewSession,
2567
+ approvalMode,
2568
+ approvalStrategy,
2569
+ approvalPolicy,
2570
+ mcpServers,
2571
+ allowedTools,
2572
+ includeDirs,
2573
+ correlationId,
2574
+ optimizePrompt,
2575
+ optimizeResponse,
2576
+ idleTimeoutMs,
2577
+ forceRefresh,
2578
+ outputFormat,
2579
+ sandbox,
2580
+ policyFiles,
2581
+ adminPolicyFiles,
2582
+ attachments,
2583
+ });
2584
+ });
2585
+ //──────────────────────────────────────────────────────────────────────────────
2586
+ // Grok Tool
2587
+ //──────────────────────────────────────────────────────────────────────────────
2588
+ server.tool("grok_request", {
2589
+ prompt: z
2590
+ .string()
2591
+ .min(1, "Prompt cannot be empty")
2592
+ .max(100000, "Prompt too long (max 100k chars)")
2593
+ .describe("Prompt text for Grok"),
2594
+ model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
2595
+ outputFormat: z
2596
+ .enum(["plain", "json", "streaming-json"])
2597
+ .optional()
2598
+ .describe("Output format (plain|json|streaming-json). Grok default is plain."),
2599
+ sessionId: z
2600
+ .string()
2601
+ .optional()
2602
+ .describe("Session ID (user-provided CLI handle for --resume)"),
2603
+ resumeLatest: z
2604
+ .boolean()
2605
+ .default(false)
2606
+ .describe("Resume most recent Grok session in cwd (--continue)"),
2607
+ createNewSession: z.boolean().default(false).describe("Force new session"),
2608
+ alwaysApprove: z
2609
+ .boolean()
2610
+ .default(false)
2611
+ .describe("Auto-approve all tool executions (--always-approve)"),
2612
+ permissionMode: z
2613
+ .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
2614
+ .optional()
2615
+ .describe("Grok permission mode"),
2616
+ effort: z
2617
+ .enum(["low", "medium", "high", "xhigh", "max"])
2618
+ .optional()
2619
+ .describe("Grok effort level"),
2620
+ reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
2621
+ approvalStrategy: z
2622
+ .enum(["legacy", "mcp_managed"])
2623
+ .default("legacy")
2624
+ .describe("Approval strategy"),
2625
+ approvalPolicy: z
2626
+ .enum(["strict", "balanced", "permissive"])
2627
+ .optional()
2628
+ .describe("Approval policy override"),
2629
+ mcpServers: z
2630
+ .array(MCP_SERVER_ENUM)
2631
+ .default(["sqry"])
2632
+ .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
2633
+ allowedTools: z
2634
+ .array(z.string())
2635
+ .optional()
2636
+ .describe("Allowed built-in tools (passed as --tools comma list)"),
2637
+ disallowedTools: z
2638
+ .array(z.string())
2639
+ .optional()
2640
+ .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
2641
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2642
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2643
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
2644
+ idleTimeoutMs: z
2645
+ .number()
2646
+ .int()
2647
+ .min(30_000)
2648
+ .max(3_600_000)
2649
+ .optional()
2650
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2651
+ forceRefresh: z
2652
+ .boolean()
2653
+ .default(false)
2654
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2655
+ }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2656
+ return handleGrokRequest({ sessionManager, logger, runtime }, {
2657
+ prompt,
2658
+ model,
2659
+ outputFormat,
2660
+ sessionId,
2661
+ resumeLatest,
2662
+ createNewSession,
2663
+ alwaysApprove,
2664
+ permissionMode,
2665
+ effort,
2666
+ reasoningEffort,
2667
+ approvalStrategy,
2668
+ approvalPolicy,
2669
+ mcpServers,
2670
+ allowedTools,
2671
+ disallowedTools,
2672
+ correlationId,
2673
+ optimizePrompt,
2674
+ optimizeResponse,
2675
+ idleTimeoutMs,
2676
+ forceRefresh,
2677
+ });
2678
+ });
2679
+ //──────────────────────────────────────────────────────────────────────────────
2680
+ // Mistral Vibe Tool
2681
+ //──────────────────────────────────────────────────────────────────────────────
2682
+ server.tool("mistral_request", {
2683
+ prompt: z
2684
+ .string()
2685
+ .min(1, "Prompt cannot be empty")
2686
+ .max(100000, "Prompt too long (max 100k chars)")
2687
+ .describe("Prompt text for Mistral Vibe"),
2688
+ model: z
2689
+ .string()
2690
+ .optional()
2691
+ .describe("Model alias (e.g. devstral-medium, devstral-large, latest). Resolved alias is injected via VIBE_ACTIVE_MODEL env var — Vibe has no --model flag."),
2692
+ outputFormat: z
2693
+ .enum(["plain", "json", "stream-json"])
2694
+ .optional()
2695
+ .describe("Output format (plain|json|stream-json). Vibe default is plain."),
2696
+ sessionId: z
2697
+ .string()
2698
+ .optional()
2699
+ .describe("Session ID (user-provided CLI handle for --resume). Requires [session_logging] enabled = true in ~/.vibe/config.toml."),
2700
+ resumeLatest: z
2701
+ .boolean()
2702
+ .default(false)
2703
+ .describe("Resume most recent Vibe session in cwd (--continue)"),
2704
+ createNewSession: z.boolean().default(false).describe("Force new session"),
2705
+ permissionMode: z
2706
+ .enum(MISTRAL_AGENT_MODES)
2707
+ .optional()
2708
+ .describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
2709
+ effort: z
2710
+ .enum(["low", "medium", "high", "xhigh", "max"])
2711
+ .optional()
2712
+ .describe("Vibe effort level"),
2713
+ reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
2714
+ approvalStrategy: z
2715
+ .enum(["legacy", "mcp_managed"])
2716
+ .default("legacy")
2717
+ .describe("Approval strategy"),
2718
+ approvalPolicy: z
2719
+ .enum(["strict", "balanced", "permissive"])
2720
+ .optional()
2721
+ .describe("Approval policy override"),
2722
+ mcpServers: z
2723
+ .array(MCP_SERVER_ENUM)
2724
+ .default(["sqry"])
2725
+ .describe("MCP server names for approval tracking (Vibe manages its own MCP config via `vibe mcp`)"),
2726
+ allowedTools: z
2727
+ .array(z.string())
2728
+ .optional()
2729
+ .describe("Allowlist of built-in tools — each emitted as a separate --enabled-tools <tool> flag"),
2730
+ disallowedTools: z
2731
+ .array(z.string())
2732
+ .optional()
2733
+ .describe("Accepted for caller parity; Vibe has no deny-list flag, so values are ignored (a warning is logged)."),
2734
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2735
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2736
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
2737
+ idleTimeoutMs: z
2738
+ .number()
2739
+ .int()
2740
+ .min(30_000)
2741
+ .max(3_600_000)
2742
+ .optional()
2743
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2744
+ forceRefresh: z
2745
+ .boolean()
2746
+ .default(false)
2747
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2748
+ }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2749
+ return handleMistralRequest({ sessionManager, logger, runtime }, {
2750
+ prompt,
2751
+ model,
2752
+ outputFormat,
2753
+ sessionId,
2754
+ resumeLatest,
2755
+ createNewSession,
2756
+ permissionMode,
2757
+ effort,
2758
+ reasoningEffort,
2759
+ approvalStrategy,
2760
+ approvalPolicy,
2761
+ mcpServers,
2762
+ allowedTools,
2763
+ disallowedTools,
2764
+ correlationId,
2765
+ optimizePrompt,
2766
+ optimizeResponse,
2767
+ idleTimeoutMs,
2768
+ forceRefresh,
2769
+ });
2770
+ });
2771
+ //──────────────────────────────────────────────────────────────────────────────
2772
+ // Async Long-Running Job Tools (No Time-Bound LLM Execution)
2773
+ //
2774
+ // STRUCTURAL INVARIANT: these tools are only registered when a real job
2775
+ // store is attached (`persistence.asyncJobsEnabled === true`). When the
2776
+ // operator has configured `[persistence].backend = "none"`, none of the
2777
+ // *_request_async / llm_job_* tools exist in the MCP tool list at all —
2778
+ // orchestrating agents get a clean "tool not found" signal at connect
2779
+ // time instead of silent in-memory loss after the 1-hour TTL.
2780
+ //──────────────────────────────────────────────────────────────────────────────
2781
+ if (asyncJobsEnabled) {
2782
+ server.tool("claude_request_async", {
2783
+ prompt: z
2784
+ .string()
2785
+ .min(1, "Prompt cannot be empty")
2786
+ .max(100000, "Prompt too long (max 100k chars)")
2787
+ .describe("Prompt text for Claude"),
2788
+ model: z
2789
+ .string()
2790
+ .optional()
2791
+ .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
2792
+ outputFormat: z
2793
+ .enum(["text", "json", "stream-json"])
2794
+ .default("text")
2795
+ .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
2796
+ sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
2797
+ continueSession: z.boolean().default(false).describe("Continue active session"),
2798
+ createNewSession: z.boolean().default(false).describe("Force new session"),
2799
+ allowedTools: z
2800
+ .array(z.string())
2801
+ .optional()
2802
+ .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
2803
+ disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
2804
+ dangerouslySkipPermissions: z
2805
+ .boolean()
2806
+ .default(false)
2807
+ .describe('DEPRECATED: prefer `permissionMode: "bypassPermissions"`. Maps to it when `permissionMode` is unset.'),
2808
+ permissionMode: z
2809
+ .enum(CLAUDE_PERMISSION_MODES)
2810
+ .optional()
2811
+ .describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op."),
2812
+ // U25 — Claude high-impact features
2813
+ agent: z
2814
+ .string()
2815
+ .optional()
2816
+ .describe("Claude --agent: dispatch to a named single sub-agent."),
2817
+ agents: z
2818
+ .record(z.record(z.unknown()))
2819
+ .optional()
2820
+ .describe("Claude --agents: inline JSON map of agent name → { description, prompt, tools?, model? }."),
2821
+ forkSession: z
2822
+ .boolean()
2823
+ .optional()
2824
+ .describe("Claude --fork-session: branch from an existing session into a fresh fork."),
2825
+ systemPrompt: z
2826
+ .string()
2827
+ .optional()
2828
+ .describe("Claude --system-prompt: replace the system prompt entirely."),
2829
+ appendSystemPrompt: z
2830
+ .string()
2831
+ .optional()
2832
+ .describe("Claude --append-system-prompt: append to the existing system prompt."),
2833
+ maxBudgetUsd: z
2834
+ .number()
2835
+ .positive()
2836
+ .optional()
2837
+ .describe("Claude --max-budget-usd: spend cap for this request in USD."),
2838
+ maxTurns: z
2839
+ .number()
2840
+ .int()
2841
+ .positive()
2842
+ .optional()
2843
+ .describe("Claude --max-turns: cap on agent loop iterations."),
2844
+ effort: z
2845
+ .enum(CLAUDE_EFFORT_LEVELS)
2846
+ .optional()
2847
+ .describe("Claude --effort: low|medium|high|xhigh|max."),
2848
+ excludeDynamicSystemPromptSections: z
2849
+ .boolean()
2850
+ .optional()
2851
+ .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
2852
+ approvalStrategy: z
2853
+ .enum(["legacy", "mcp_managed"])
2854
+ .default("legacy")
2855
+ .describe("Approval strategy"),
2856
+ approvalPolicy: z
2857
+ .enum(["strict", "balanced", "permissive"])
2858
+ .optional()
2859
+ .describe("Approval policy override"),
2860
+ mcpServers: z
2861
+ .array(MCP_SERVER_ENUM)
2862
+ .default(["sqry"])
2863
+ .describe("MCP servers exposed to Claude"),
2864
+ strictMcpConfig: z
2865
+ .boolean()
2866
+ .default(false)
2867
+ .describe("Restrict Claude to provided MCP config only"),
2868
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2869
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2870
+ idleTimeoutMs: z
2871
+ .number()
2872
+ .int()
2873
+ .min(30_000)
2874
+ .max(3_600_000)
2875
+ .optional()
2876
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2877
+ forceRefresh: z
2878
+ .boolean()
2879
+ .default(false)
2880
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2881
+ }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
2882
+ if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
2883
+ return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
2884
+ }
2885
+ const prep = prepareClaudeRequest({
2886
+ prompt,
2887
+ model,
2888
+ outputFormat,
2889
+ allowedTools,
2890
+ disallowedTools,
2891
+ dangerouslySkipPermissions,
2892
+ permissionMode,
2893
+ approvalStrategy,
2894
+ approvalPolicy,
2895
+ mcpServers,
2896
+ strictMcpConfig,
2897
+ correlationId,
2898
+ optimizePrompt,
2899
+ operation: "claude_request_async",
2900
+ agent,
2901
+ agents,
2902
+ forkSession,
2903
+ systemPrompt,
2904
+ appendSystemPrompt,
2905
+ maxBudgetUsd,
2906
+ maxTurns,
2907
+ effort,
2908
+ excludeDynamicSystemPromptSections,
2909
+ }, runtime);
2910
+ if (!("args" in prep))
2911
+ return prep;
2912
+ const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
2913
+ try {
2914
+ // Session management (before job start for async)
2915
+ let effectiveSessionId = sessionId;
2916
+ let useContinue = continueSession;
2917
+ const activeSession = await sessionManager.getActiveSession("claude");
2918
+ if (!createNewSession && !continueSession && !sessionId && activeSession) {
2919
+ effectiveSessionId = activeSession.id;
2920
+ useContinue = true;
2921
+ }
2922
+ if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
2923
+ useContinue = true;
2924
+ }
2925
+ if (useContinue) {
2926
+ args.push("--continue");
2927
+ }
2928
+ else if (effectiveSessionId) {
2929
+ args.push("--session-id", effectiveSessionId);
2930
+ await sessionManager.updateSessionUsage(effectiveSessionId);
2931
+ }
2932
+ if (effectiveSessionId) {
2933
+ const existingSession = await sessionManager.getSession(effectiveSessionId);
2934
+ if (!existingSession) {
2935
+ await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
2936
+ }
2937
+ }
2938
+ // Idle timeout only for stream-json (text/json produce no output until done)
2939
+ const effectiveIdleTimeout = outputFormat === "stream-json"
2940
+ ? resolveIdleTimeout("claude", idleTimeoutMs)
2941
+ : undefined;
2942
+ assertUpstreamCliArgs("claude", args);
2943
+ assertUpstreamCliEnv("claude", undefined);
2944
+ const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat, forceRefresh);
2945
+ logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
2946
+ const asyncResponse = {
2272
2947
  success: true,
2273
- count: approvals.length,
2274
- approvals,
2275
- }, null, 2),
2276
- },
2277
- ],
2278
- };
2279
- });
2280
- //──────────────────────────────────────────────────────────────────────────────
2281
- // List Models Tool
2282
- //──────────────────────────────────────────────────────────────────────────────
2283
- server.tool("list_models", {
2284
- cli: z
2285
- .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini"]).optional())
2286
- .describe("CLI filter (claude|codex|gemini)"),
2287
- }, async ({ cli }) => {
2288
- const cliInfo = getCliInfo();
2289
- const result = cli ? { [cli]: cliInfo[cli] } : cliInfo;
2290
- return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
2291
- });
2292
- server.tool("cli_versions", {
2293
- cli: z
2294
- .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini"]).optional())
2295
- .describe("CLI filter (claude|codex|gemini)"),
2296
- }, async ({ cli }) => {
2297
- const versions = await getCliVersions(cli);
2298
- return { content: [{ type: "text", text: JSON.stringify({ versions }, null, 2) }] };
2299
- });
2300
- server.tool("cli_upgrade", {
2301
- cli: z.enum(["claude", "codex", "gemini"]).describe("CLI to upgrade"),
2302
- target: z
2303
- .string()
2304
- .min(1)
2305
- .default("latest")
2306
- .describe("Package tag/version/target to install (default: latest)"),
2307
- dryRun: z
2308
- .boolean()
2309
- .default(true)
2310
- .describe("When true, return the upgrade plan without running it"),
2311
- timeoutMs: z
2312
- .number()
2313
- .int()
2314
- .min(30_000)
2315
- .max(3_600_000)
2316
- .optional()
2317
- .describe("Upgrade timeout in ms when dryRun=false"),
2318
- }, async ({ cli, target, dryRun, timeoutMs }) => {
2319
- try {
2320
- const result = await runCliUpgrade({ cli, target, dryRun, timeoutMs, logger });
2321
- return {
2322
- content: [
2323
- {
2324
- type: "text",
2325
- text: JSON.stringify({
2326
- success: true,
2327
- ...result,
2328
- }, null, 2),
2329
- },
2330
- ],
2948
+ job,
2949
+ sessionId: effectiveSessionId || activeSession?.id || null,
2950
+ approval: approvalDecision,
2951
+ mcpServers: {
2952
+ requested: requestedMcpServers,
2953
+ enabled: mcpConfig?.enabled,
2954
+ missing: mcpConfig?.missing,
2955
+ },
2956
+ };
2957
+ if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
2958
+ asyncResponse.reviewIntegrity = prep.reviewIntegrity;
2959
+ }
2960
+ return {
2961
+ content: [
2962
+ {
2963
+ type: "text",
2964
+ text: JSON.stringify(asyncResponse, null, 2),
2965
+ },
2966
+ ],
2967
+ };
2968
+ }
2969
+ catch (error) {
2970
+ return createErrorResponse("claude_request_async", 1, "", corrId, error);
2971
+ }
2972
+ });
2973
+ server.tool("codex_request_async", {
2974
+ prompt: z
2975
+ .string()
2976
+ .min(1, "Prompt cannot be empty")
2977
+ .max(100000, "Prompt too long (max 100k chars)")
2978
+ .describe("Prompt text for Codex"),
2979
+ model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
2980
+ fullAuto: z
2981
+ .boolean()
2982
+ .default(false)
2983
+ .describe("DEPRECATED: prefer `sandboxMode` + `askForApproval`. Expands to `--sandbox workspace-write --ask-for-approval never`."),
2984
+ sandboxMode: z
2985
+ .enum(CODEX_SANDBOX_MODES)
2986
+ .optional()
2987
+ .describe("Codex --sandbox: read-only|workspace-write|danger-full-access."),
2988
+ askForApproval: z
2989
+ .enum(CODEX_ASK_FOR_APPROVAL_MODES)
2990
+ .optional()
2991
+ .describe("Codex --ask-for-approval: untrusted|on-request|never."),
2992
+ useLegacyFullAutoFlag: z
2993
+ .boolean()
2994
+ .default(false)
2995
+ .describe("Escape hatch: emit `--full-auto` directly (deprecated)."),
2996
+ dangerouslyBypassApprovalsAndSandbox: z
2997
+ .boolean()
2998
+ .default(false)
2999
+ .describe("Run Codex without approvals/sandbox"),
3000
+ approvalStrategy: z
3001
+ .enum(["legacy", "mcp_managed"])
3002
+ .default("legacy")
3003
+ .describe("Approval strategy"),
3004
+ approvalPolicy: z
3005
+ .enum(["strict", "balanced", "permissive"])
3006
+ .optional()
3007
+ .describe("Approval policy override"),
3008
+ mcpServers: z
3009
+ .array(MCP_SERVER_ENUM)
3010
+ .default(["sqry"])
3011
+ .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
3012
+ sessionId: z
3013
+ .string()
3014
+ .optional()
3015
+ .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
3016
+ resumeLatest: z
3017
+ .boolean()
3018
+ .default(false)
3019
+ .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
3020
+ createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
3021
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3022
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3023
+ idleTimeoutMs: z
3024
+ .number()
3025
+ .int()
3026
+ .min(30_000)
3027
+ .max(3_600_000)
3028
+ .optional()
3029
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3030
+ forceRefresh: z
3031
+ .boolean()
3032
+ .default(false)
3033
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3034
+ // U23: emit `--json` to enable JSONL event-stream parsing for token usage.
3035
+ outputFormat: z
3036
+ .enum(["text", "json"])
3037
+ .default("text")
3038
+ .describe("Codex output format. `json` emits --json (JSONL events) for token usage extraction."),
3039
+ // U26: high-impact feature flags. All optional.
3040
+ outputSchema: z
3041
+ .union([z.string(), z.record(z.unknown())])
3042
+ .optional()
3043
+ .describe("Codex --output-schema. Pass a path (string) or an inline JSON Schema object."),
3044
+ search: z.boolean().optional().describe("Emit Codex --search to enable web search."),
3045
+ profile: z.string().optional().describe("Codex --profile <name>."),
3046
+ configOverrides: CODEX_CONFIG_OVERRIDES_SCHEMA.describe("Codex -c key=value overrides. Keys: /^[a-zA-Z0-9._]+$/. Values: no CR/LF."),
3047
+ ephemeral: z.boolean().optional().describe("Codex --ephemeral."),
3048
+ images: z.array(z.string()).optional().describe("Codex -i <path>: image attachments."),
3049
+ ignoreUserConfig: z.boolean().optional().describe("Codex --ignore-user-config."),
3050
+ ignoreRules: z.boolean().optional().describe("Codex --ignore-rules."),
3051
+ }, async ({ prompt, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
3052
+ return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3053
+ prompt,
3054
+ model,
3055
+ fullAuto,
3056
+ sandboxMode,
3057
+ askForApproval,
3058
+ useLegacyFullAutoFlag,
3059
+ dangerouslyBypassApprovalsAndSandbox,
3060
+ approvalStrategy,
3061
+ approvalPolicy,
3062
+ mcpServers,
3063
+ sessionId,
3064
+ resumeLatest,
3065
+ createNewSession,
3066
+ correlationId,
3067
+ optimizePrompt,
3068
+ idleTimeoutMs,
3069
+ forceRefresh,
3070
+ outputFormat,
3071
+ outputSchema,
3072
+ search,
3073
+ profile,
3074
+ configOverrides,
3075
+ ephemeral,
3076
+ images,
3077
+ ignoreUserConfig,
3078
+ ignoreRules,
3079
+ });
3080
+ });
3081
+ server.tool("gemini_request_async", {
3082
+ prompt: z
3083
+ .string()
3084
+ .min(1, "Prompt cannot be empty")
3085
+ .max(100000, "Prompt too long (max 100k chars)")
3086
+ .describe("Prompt text for Gemini"),
3087
+ model: z
3088
+ .string()
3089
+ .optional()
3090
+ .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
3091
+ sessionId: z
3092
+ .string()
3093
+ .optional()
3094
+ .describe("Session ID (user-provided CLI handle for --resume)"),
3095
+ resumeLatest: z.boolean().default(false).describe("Resume latest session"),
3096
+ createNewSession: z.boolean().default(false).describe("Force new session"),
3097
+ approvalMode: z
3098
+ .enum(GEMINI_APPROVAL_MODES)
3099
+ .optional()
3100
+ .describe("Approval: default|auto_edit|yolo|plan"),
3101
+ approvalStrategy: z
3102
+ .enum(["legacy", "mcp_managed"])
3103
+ .default("legacy")
3104
+ .describe("Approval strategy"),
3105
+ approvalPolicy: z
3106
+ .enum(["strict", "balanced", "permissive"])
3107
+ .optional()
3108
+ .describe("Approval policy override"),
3109
+ mcpServers: z
3110
+ .array(MCP_SERVER_ENUM)
3111
+ .default(["sqry"])
3112
+ .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
3113
+ allowedTools: z
3114
+ .array(z.string())
3115
+ .optional()
3116
+ .describe("Allowed tools (['Write','Edit','Bash'])"),
3117
+ includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
3118
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3119
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3120
+ idleTimeoutMs: z
3121
+ .number()
3122
+ .int()
3123
+ .min(30_000)
3124
+ .max(3_600_000)
3125
+ .optional()
3126
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3127
+ forceRefresh: z
3128
+ .boolean()
3129
+ .default(false)
3130
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3131
+ // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
3132
+ // remains text so existing callers see no behavior change.
3133
+ outputFormat: z
3134
+ .enum(["text", "json"])
3135
+ .default("text")
3136
+ .describe("Gemini output format. `json` emits `-o json` so usageMetadata is parsed and reported."),
3137
+ sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
3138
+ policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
3139
+ adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
3140
+ attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
3141
+ }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
3142
+ return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3143
+ prompt,
3144
+ model,
3145
+ sessionId,
3146
+ resumeLatest,
3147
+ createNewSession,
3148
+ approvalMode,
3149
+ approvalStrategy,
3150
+ approvalPolicy,
3151
+ mcpServers,
3152
+ allowedTools,
3153
+ includeDirs,
3154
+ correlationId,
3155
+ optimizePrompt,
3156
+ idleTimeoutMs,
3157
+ forceRefresh,
3158
+ outputFormat,
3159
+ sandbox,
3160
+ policyFiles,
3161
+ adminPolicyFiles,
3162
+ attachments,
3163
+ });
3164
+ });
3165
+ server.tool("grok_request_async", {
3166
+ prompt: z
3167
+ .string()
3168
+ .min(1, "Prompt cannot be empty")
3169
+ .max(100000, "Prompt too long (max 100k chars)")
3170
+ .describe("Prompt text for Grok"),
3171
+ model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
3172
+ outputFormat: z
3173
+ .enum(["plain", "json", "streaming-json"])
3174
+ .optional()
3175
+ .describe("Output format (plain|json|streaming-json). Grok default is plain."),
3176
+ sessionId: z
3177
+ .string()
3178
+ .optional()
3179
+ .describe("Session ID (user-provided CLI handle for --resume)"),
3180
+ resumeLatest: z
3181
+ .boolean()
3182
+ .default(false)
3183
+ .describe("Resume most recent Grok session in cwd (--continue)"),
3184
+ createNewSession: z.boolean().default(false).describe("Force new session"),
3185
+ alwaysApprove: z
3186
+ .boolean()
3187
+ .default(false)
3188
+ .describe("Auto-approve all tool executions (--always-approve)"),
3189
+ permissionMode: z
3190
+ .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
3191
+ .optional()
3192
+ .describe("Grok permission mode"),
3193
+ effort: z
3194
+ .enum(["low", "medium", "high", "xhigh", "max"])
3195
+ .optional()
3196
+ .describe("Grok effort level"),
3197
+ reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
3198
+ approvalStrategy: z
3199
+ .enum(["legacy", "mcp_managed"])
3200
+ .default("legacy")
3201
+ .describe("Approval strategy"),
3202
+ approvalPolicy: z
3203
+ .enum(["strict", "balanced", "permissive"])
3204
+ .optional()
3205
+ .describe("Approval policy override"),
3206
+ mcpServers: z
3207
+ .array(MCP_SERVER_ENUM)
3208
+ .default(["sqry"])
3209
+ .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
3210
+ allowedTools: z
3211
+ .array(z.string())
3212
+ .optional()
3213
+ .describe("Allowed built-in tools (passed as --tools comma list)"),
3214
+ disallowedTools: z
3215
+ .array(z.string())
3216
+ .optional()
3217
+ .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
3218
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3219
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3220
+ idleTimeoutMs: z
3221
+ .number()
3222
+ .int()
3223
+ .min(30_000)
3224
+ .max(3_600_000)
3225
+ .optional()
3226
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3227
+ forceRefresh: z
3228
+ .boolean()
3229
+ .default(false)
3230
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3231
+ }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3232
+ return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3233
+ prompt,
3234
+ model,
3235
+ outputFormat,
3236
+ sessionId,
3237
+ resumeLatest,
3238
+ createNewSession,
3239
+ alwaysApprove,
3240
+ permissionMode,
3241
+ effort,
3242
+ reasoningEffort,
3243
+ approvalStrategy,
3244
+ approvalPolicy,
3245
+ mcpServers,
3246
+ allowedTools,
3247
+ disallowedTools,
3248
+ correlationId,
3249
+ optimizePrompt,
3250
+ idleTimeoutMs,
3251
+ forceRefresh,
3252
+ });
3253
+ });
3254
+ server.tool("mistral_request_async", {
3255
+ prompt: z
3256
+ .string()
3257
+ .min(1, "Prompt cannot be empty")
3258
+ .max(100000, "Prompt too long (max 100k chars)")
3259
+ .describe("Prompt text for Mistral Vibe"),
3260
+ model: z
3261
+ .string()
3262
+ .optional()
3263
+ .describe("Model alias (resolved into VIBE_ACTIVE_MODEL env var — Vibe has no --model flag)"),
3264
+ outputFormat: z
3265
+ .enum(["plain", "json", "stream-json"])
3266
+ .optional()
3267
+ .describe("Output format (plain|json|stream-json). Vibe default is plain."),
3268
+ sessionId: z
3269
+ .string()
3270
+ .optional()
3271
+ .describe("Session ID (user-provided CLI handle for --resume). Requires [session_logging] enabled = true in ~/.vibe/config.toml."),
3272
+ resumeLatest: z
3273
+ .boolean()
3274
+ .default(false)
3275
+ .describe("Resume most recent Vibe session in cwd (--continue)"),
3276
+ createNewSession: z.boolean().default(false).describe("Force new session"),
3277
+ permissionMode: z
3278
+ .enum(MISTRAL_AGENT_MODES)
3279
+ .optional()
3280
+ .describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
3281
+ effort: z
3282
+ .enum(["low", "medium", "high", "xhigh", "max"])
3283
+ .optional()
3284
+ .describe("Vibe effort level"),
3285
+ reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
3286
+ approvalStrategy: z
3287
+ .enum(["legacy", "mcp_managed"])
3288
+ .default("legacy")
3289
+ .describe("Approval strategy"),
3290
+ approvalPolicy: z
3291
+ .enum(["strict", "balanced", "permissive"])
3292
+ .optional()
3293
+ .describe("Approval policy override"),
3294
+ mcpServers: z
3295
+ .array(MCP_SERVER_ENUM)
3296
+ .default(["sqry"])
3297
+ .describe("MCP server names for approval tracking (Vibe manages its own MCP config via `vibe mcp`)"),
3298
+ allowedTools: z
3299
+ .array(z.string())
3300
+ .optional()
3301
+ .describe("Allowlist of built-in tools — each emitted as a separate --enabled-tools <tool> flag"),
3302
+ disallowedTools: z
3303
+ .array(z.string())
3304
+ .optional()
3305
+ .describe("Accepted for caller parity; Vibe has no deny-list flag, so values are ignored (a warning is logged)."),
3306
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3307
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3308
+ idleTimeoutMs: z
3309
+ .number()
3310
+ .int()
3311
+ .min(30_000)
3312
+ .max(3_600_000)
3313
+ .optional()
3314
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3315
+ forceRefresh: z
3316
+ .boolean()
3317
+ .default(false)
3318
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3319
+ }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3320
+ return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3321
+ prompt,
3322
+ model,
3323
+ outputFormat,
3324
+ sessionId,
3325
+ resumeLatest,
3326
+ createNewSession,
3327
+ permissionMode,
3328
+ effort,
3329
+ reasoningEffort,
3330
+ approvalStrategy,
3331
+ approvalPolicy,
3332
+ mcpServers,
3333
+ allowedTools,
3334
+ disallowedTools,
3335
+ correlationId,
3336
+ optimizePrompt,
3337
+ idleTimeoutMs,
3338
+ forceRefresh,
3339
+ });
3340
+ });
3341
+ server.tool("llm_job_status", {
3342
+ jobId: z.string().describe("Async job ID from *_request_async"),
3343
+ }, async ({ jobId }) => {
3344
+ const job = asyncJobManager.getJobSnapshot(jobId);
3345
+ if (!job) {
3346
+ return {
3347
+ content: [
3348
+ {
3349
+ type: "text",
3350
+ text: JSON.stringify({
3351
+ success: false,
3352
+ error: "Job not found",
3353
+ jobId,
3354
+ }, null, 2),
3355
+ },
3356
+ ],
3357
+ isError: true,
3358
+ };
3359
+ }
3360
+ return {
3361
+ content: [
3362
+ {
3363
+ type: "text",
3364
+ text: JSON.stringify({
3365
+ success: true,
3366
+ job,
3367
+ }, null, 2),
3368
+ },
3369
+ ],
3370
+ };
3371
+ });
3372
+ server.tool("llm_job_result", {
3373
+ jobId: z.string().describe("Async job ID from *_request_async"),
3374
+ maxChars: z
3375
+ .number()
3376
+ .int()
3377
+ .min(1000)
3378
+ .max(2000000)
3379
+ .default(200000)
3380
+ .describe("Max chars returned per stream"),
3381
+ }, async ({ jobId, maxChars }) => {
3382
+ const result = asyncJobManager.getJobResult(jobId, maxChars);
3383
+ if (!result) {
3384
+ return {
3385
+ content: [
3386
+ {
3387
+ type: "text",
3388
+ text: JSON.stringify({
3389
+ success: false,
3390
+ error: "Job not found",
3391
+ jobId,
3392
+ }, null, 2),
3393
+ },
3394
+ ],
3395
+ isError: true,
3396
+ };
3397
+ }
3398
+ // Parse stream-json output for Claude async jobs
3399
+ const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
3400
+ let parsed;
3401
+ if (outputFormat === "stream-json" && result.stdout) {
3402
+ parsed = parseStreamJson(result.stdout);
3403
+ }
3404
+ return {
3405
+ content: [
3406
+ {
3407
+ type: "text",
3408
+ text: JSON.stringify({
3409
+ success: true,
3410
+ result,
3411
+ ...(parsed
3412
+ ? {
3413
+ parsed: {
3414
+ text: parsed.text,
3415
+ costUsd: parsed.costUsd,
3416
+ usage: parsed.usage,
3417
+ model: parsed.model,
3418
+ numTurns: parsed.numTurns,
3419
+ },
3420
+ }
3421
+ : {}),
3422
+ }, null, 2),
3423
+ },
3424
+ ],
3425
+ };
3426
+ });
3427
+ server.tool("llm_job_cancel", {
3428
+ jobId: z.string().describe("Async job ID from *_request_async"),
3429
+ }, async ({ jobId }) => {
3430
+ const cancel = asyncJobManager.cancelJob(jobId);
3431
+ if (!cancel.canceled) {
3432
+ return {
3433
+ content: [
3434
+ {
3435
+ type: "text",
3436
+ text: JSON.stringify({
3437
+ success: false,
3438
+ jobId,
3439
+ reason: cancel.reason || "Unable to cancel",
3440
+ }, null, 2),
3441
+ },
3442
+ ],
3443
+ isError: true,
3444
+ };
3445
+ }
3446
+ return {
3447
+ content: [
3448
+ {
3449
+ type: "text",
3450
+ text: JSON.stringify({
3451
+ success: true,
3452
+ jobId,
3453
+ }, null, 2),
3454
+ },
3455
+ ],
3456
+ };
3457
+ });
3458
+ } // end if (asyncJobsEnabled)
3459
+ server.tool("llm_process_health", {}, async () => {
3460
+ const health = asyncJobManager.getJobHealth();
3461
+ const persistenceBlock = {
3462
+ backend: persistence.backend,
3463
+ dbPath: persistence.path,
3464
+ dsn: persistence.dsn ? "[redacted]" : null,
3465
+ retentionDays: persistence.retentionDays,
3466
+ dedupWindowMs: persistence.dedupWindowMs,
3467
+ asyncJobsEnabled: persistence.asyncJobsEnabled,
3468
+ acknowledgeEphemeral: persistence.acknowledgeEphemeral,
3469
+ sources: persistence.sources,
3470
+ warning: persistence.asyncJobsEnabled
3471
+ ? null
3472
+ : "Async job persistence is disabled (backend = 'none'). *_request_async tools are NOT registered on this gateway. Set [persistence].backend = 'sqlite' (or 'memory' + acknowledgeEphemeral = true) to enable them.",
2331
3473
  };
2332
- }
2333
- catch (error) {
2334
- const message = error instanceof Error ? error.message : String(error);
2335
3474
  return {
2336
3475
  content: [
2337
3476
  {
2338
3477
  type: "text",
2339
- text: JSON.stringify({
2340
- success: false,
2341
- error: message,
2342
- }, null, 2),
3478
+ text: JSON.stringify({ success: true, ...health, persistence: persistenceBlock }, null, 2),
2343
3479
  },
2344
3480
  ],
2345
- isError: true,
2346
3481
  };
2347
- }
2348
- });
2349
- //──────────────────────────────────────────────────────────────────────────────
2350
- // Session Management Tools
2351
- //──────────────────────────────────────────────────────────────────────────────
2352
- server.tool("session_create", {
2353
- cli: z.enum(["claude", "codex", "gemini"]).describe("CLI type (claude|codex|gemini)"),
2354
- description: z.string().optional().describe("Session description"),
2355
- setAsActive: z.boolean().default(true).describe("Set as active session"),
2356
- }, async ({ cli, description, setAsActive }) => {
2357
- try {
2358
- const session = await sessionManager.createSession(cli, description);
2359
- if (setAsActive) {
2360
- await sessionManager.setActiveSession(cli, session.id);
2361
- }
2362
- logger.info(`Created new ${cli} session: ${session.id}`);
3482
+ });
3483
+ //──────────────────────────────────────────────────────────────────────────────
3484
+ // Approval Audit Tools
3485
+ //──────────────────────────────────────────────────────────────────────────────
3486
+ server.tool("approval_list", {
3487
+ limit: z
3488
+ .number()
3489
+ .int()
3490
+ .min(1)
3491
+ .max(500)
3492
+ .default(50)
3493
+ .describe("Max number of approval records"),
3494
+ cli: z
3495
+ .enum(["claude", "codex", "gemini", "grok", "mistral"])
3496
+ .optional()
3497
+ .describe("Optional CLI filter"),
3498
+ }, async ({ limit, cli }) => {
3499
+ const approvals = approvalManager.list(limit, cli);
2363
3500
  return {
2364
3501
  content: [
2365
3502
  {
2366
3503
  type: "text",
2367
3504
  text: JSON.stringify({
2368
3505
  success: true,
2369
- session: {
2370
- id: session.id,
2371
- cli: session.cli,
2372
- description: session.description,
2373
- createdAt: session.createdAt,
2374
- isActive: setAsActive,
2375
- },
2376
- }, null, 2),
2377
- },
2378
- ],
2379
- };
2380
- }
2381
- catch (error) {
2382
- return createErrorResponse("session_create", 1, "", undefined, error);
2383
- }
2384
- });
2385
- server.tool("session_list", {
2386
- cli: z
2387
- .enum(["claude", "codex", "gemini"])
2388
- .optional()
2389
- .describe("CLI filter (claude|codex|gemini)"),
2390
- }, async ({ cli }) => {
2391
- try {
2392
- const sessions = await sessionManager.listSessions(cli);
2393
- const activeSessions = {
2394
- claude: await sessionManager.getActiveSession("claude"),
2395
- codex: await sessionManager.getActiveSession("codex"),
2396
- gemini: await sessionManager.getActiveSession("gemini"),
2397
- grok: await sessionManager.getActiveSession("grok"),
2398
- };
2399
- const sessionList = sessions.map(s => ({
2400
- id: s.id,
2401
- cli: s.cli,
2402
- description: s.description,
2403
- createdAt: s.createdAt,
2404
- lastUsedAt: s.lastUsedAt,
2405
- isActive: activeSessions[s.cli]?.id === s.id,
2406
- }));
2407
- return {
2408
- content: [
2409
- {
2410
- type: "text",
2411
- text: JSON.stringify({
2412
- total: sessionList.length,
2413
- sessions: sessionList,
2414
- activeSessions: {
2415
- claude: activeSessions.claude?.id || null,
2416
- codex: activeSessions.codex?.id || null,
2417
- gemini: activeSessions.gemini?.id || null,
2418
- grok: activeSessions.grok?.id || null,
2419
- },
3506
+ count: approvals.length,
3507
+ approvals,
2420
3508
  }, null, 2),
2421
3509
  },
2422
3510
  ],
2423
3511
  };
2424
- }
2425
- catch (error) {
2426
- return createErrorResponse("session_list", 1, "", undefined, error);
2427
- }
2428
- });
2429
- server.tool("session_set_active", {
2430
- cli: z.enum(["claude", "codex", "gemini"]).describe("CLI type (claude|codex|gemini)"),
2431
- sessionId: z.string().nullable().describe("Session ID (null to clear)"),
2432
- }, async ({ cli, sessionId }) => {
2433
- try {
2434
- const success = await sessionManager.setActiveSession(cli, sessionId || null);
2435
- if (!success) {
3512
+ });
3513
+ //──────────────────────────────────────────────────────────────────────────────
3514
+ // List Models Tool
3515
+ //──────────────────────────────────────────────────────────────────────────────
3516
+ server.tool("list_models", {
3517
+ cli: z
3518
+ .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
3519
+ .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
3520
+ }, async ({ cli }) => {
3521
+ const cliInfo = getCliInfo();
3522
+ const result = cli ? { [cli]: cliInfo[cli] } : cliInfo;
3523
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
3524
+ });
3525
+ server.tool("cli_versions", {
3526
+ cli: z
3527
+ .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
3528
+ .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
3529
+ }, async ({ cli }) => {
3530
+ const versions = await getCliVersions(cli);
3531
+ return { content: [{ type: "text", text: JSON.stringify({ versions }, null, 2) }] };
3532
+ });
3533
+ server.tool("upstream_contracts", {
3534
+ cli: z
3535
+ .preprocess(value => (value === "" || value === null ? undefined : value), SESSION_PROVIDER_ENUM.optional())
3536
+ .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
3537
+ probeInstalled: z
3538
+ .boolean()
3539
+ .default(false)
3540
+ .describe("When true, run local --help probes and compare advertised flags"),
3541
+ }, async ({ cli, probeInstalled }) => {
3542
+ const report = buildUpstreamContractReport({ cli, probeInstalled });
3543
+ return { content: [{ type: "text", text: JSON.stringify(report, null, 2) }] };
3544
+ });
3545
+ server.tool("cli_upgrade", {
3546
+ cli: z.enum(["claude", "codex", "gemini", "grok", "mistral"]).describe("CLI to upgrade"),
3547
+ target: z
3548
+ .string()
3549
+ .min(1)
3550
+ .default("latest")
3551
+ .describe("Package tag/version/target to install (default: latest)"),
3552
+ dryRun: z
3553
+ .boolean()
3554
+ .default(true)
3555
+ .describe("When true, return the upgrade plan without running it"),
3556
+ timeoutMs: z
3557
+ .number()
3558
+ .int()
3559
+ .min(30_000)
3560
+ .max(3_600_000)
3561
+ .optional()
3562
+ .describe("Upgrade timeout in ms when dryRun=false"),
3563
+ }, async ({ cli, target, dryRun, timeoutMs }) => {
3564
+ try {
3565
+ const result = await runCliUpgrade({ cli, target, dryRun, timeoutMs, logger });
2436
3566
  return {
2437
3567
  content: [
2438
3568
  {
2439
3569
  type: "text",
2440
3570
  text: JSON.stringify({
2441
- success: false,
2442
- error: "Session not found or does not belong to the specified CLI",
3571
+ success: true,
3572
+ ...result,
2443
3573
  }, null, 2),
2444
3574
  },
2445
3575
  ],
2446
- isError: true,
2447
3576
  };
2448
3577
  }
2449
- logger.info(`Set active ${cli} session to: ${sessionId}`);
2450
- return {
2451
- content: [
2452
- {
2453
- type: "text",
2454
- text: JSON.stringify({
2455
- success: true,
2456
- cli,
2457
- activeSessionId: sessionId,
2458
- }, null, 2),
2459
- },
2460
- ],
2461
- };
2462
- }
2463
- catch (error) {
2464
- return createErrorResponse("session_set_active", 1, "", undefined, error);
2465
- }
2466
- });
2467
- server.tool("session_delete", {
2468
- sessionId: z.string().describe("Session ID"),
2469
- }, async ({ sessionId }) => {
2470
- try {
2471
- const session = await sessionManager.getSession(sessionId);
2472
- if (!session) {
3578
+ catch (error) {
3579
+ const message = error instanceof Error ? error.message : String(error);
2473
3580
  return {
2474
3581
  content: [
2475
3582
  {
2476
3583
  type: "text",
2477
3584
  text: JSON.stringify({
2478
3585
  success: false,
2479
- error: "Session not found",
3586
+ error: message,
2480
3587
  }, null, 2),
2481
3588
  },
2482
3589
  ],
2483
3590
  isError: true,
2484
3591
  };
2485
3592
  }
2486
- const success = await sessionManager.deleteSession(sessionId);
2487
- logger.info(`Deleted session: ${sessionId}`);
2488
- return {
2489
- content: [
2490
- {
2491
- type: "text",
2492
- text: JSON.stringify({
2493
- success,
2494
- deletedSession: {
2495
- id: session.id,
2496
- cli: session.cli,
2497
- description: session.description,
3593
+ });
3594
+ //──────────────────────────────────────────────────────────────────────────────
3595
+ // Session Management Tools
3596
+ //──────────────────────────────────────────────────────────────────────────────
3597
+ server.tool("session_create", {
3598
+ cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
3599
+ description: z.string().optional().describe("Session description"),
3600
+ setAsActive: z.boolean().default(true).describe("Set as active session"),
3601
+ }, async ({ cli, description, setAsActive }) => {
3602
+ try {
3603
+ const session = await sessionManager.createSession(cli, description);
3604
+ if (setAsActive) {
3605
+ await sessionManager.setActiveSession(cli, session.id);
3606
+ }
3607
+ logger.info(`Created new ${cli} session: ${session.id}`);
3608
+ return {
3609
+ content: [
3610
+ {
3611
+ type: "text",
3612
+ text: JSON.stringify({
3613
+ success: true,
3614
+ session: {
3615
+ id: session.id,
3616
+ cli: session.cli,
3617
+ description: session.description,
3618
+ createdAt: session.createdAt,
3619
+ isActive: setAsActive,
3620
+ },
3621
+ }, null, 2),
3622
+ },
3623
+ ],
3624
+ };
3625
+ }
3626
+ catch (error) {
3627
+ return createErrorResponse("session_create", 1, "", undefined, error);
3628
+ }
3629
+ });
3630
+ server.tool("session_list", {
3631
+ cli: SESSION_PROVIDER_ENUM.optional().describe("CLI filter (claude|codex|gemini|grok|mistral)"),
3632
+ }, async ({ cli }) => {
3633
+ try {
3634
+ const sessions = await sessionManager.listSessions(cli);
3635
+ const activeSessions = {
3636
+ claude: await sessionManager.getActiveSession("claude"),
3637
+ codex: await sessionManager.getActiveSession("codex"),
3638
+ gemini: await sessionManager.getActiveSession("gemini"),
3639
+ grok: await sessionManager.getActiveSession("grok"),
3640
+ mistral: await sessionManager.getActiveSession("mistral"),
3641
+ };
3642
+ const sessionList = sessions.map(s => ({
3643
+ id: s.id,
3644
+ cli: s.cli,
3645
+ description: s.description,
3646
+ createdAt: s.createdAt,
3647
+ lastUsedAt: s.lastUsedAt,
3648
+ isActive: activeSessions[s.cli]?.id === s.id,
3649
+ }));
3650
+ return {
3651
+ content: [
3652
+ {
3653
+ type: "text",
3654
+ text: JSON.stringify({
3655
+ total: sessionList.length,
3656
+ sessions: sessionList,
3657
+ activeSessions: {
3658
+ claude: activeSessions.claude?.id || null,
3659
+ codex: activeSessions.codex?.id || null,
3660
+ gemini: activeSessions.gemini?.id || null,
3661
+ grok: activeSessions.grok?.id || null,
3662
+ mistral: activeSessions.mistral?.id || null,
3663
+ },
3664
+ }, null, 2),
3665
+ },
3666
+ ],
3667
+ };
3668
+ }
3669
+ catch (error) {
3670
+ return createErrorResponse("session_list", 1, "", undefined, error);
3671
+ }
3672
+ });
3673
+ server.tool("session_set_active", {
3674
+ cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
3675
+ sessionId: z.string().nullable().describe("Session ID (null to clear)"),
3676
+ }, async ({ cli, sessionId }) => {
3677
+ try {
3678
+ const success = await sessionManager.setActiveSession(cli, sessionId || null);
3679
+ if (!success) {
3680
+ return {
3681
+ content: [
3682
+ {
3683
+ type: "text",
3684
+ text: JSON.stringify({
3685
+ success: false,
3686
+ error: "Session not found or does not belong to the specified CLI",
3687
+ }, null, 2),
2498
3688
  },
2499
- }, null, 2),
2500
- },
2501
- ],
2502
- };
2503
- }
2504
- catch (error) {
2505
- return createErrorResponse("session_delete", 1, "", undefined, error);
2506
- }
2507
- });
2508
- server.tool("session_get", {
2509
- sessionId: z.string().describe("Session ID"),
2510
- }, async ({ sessionId }) => {
2511
- try {
2512
- const session = await sessionManager.getSession(sessionId);
2513
- if (!session) {
3689
+ ],
3690
+ isError: true,
3691
+ };
3692
+ }
3693
+ logger.info(`Set active ${cli} session to: ${sessionId}`);
2514
3694
  return {
2515
3695
  content: [
2516
3696
  {
2517
3697
  type: "text",
2518
3698
  text: JSON.stringify({
2519
- success: false,
2520
- error: "Session not found",
3699
+ success: true,
3700
+ cli,
3701
+ activeSessionId: sessionId,
2521
3702
  }, null, 2),
2522
3703
  },
2523
3704
  ],
2524
- isError: true,
2525
3705
  };
2526
3706
  }
2527
- const activeSession = await sessionManager.getActiveSession(session.cli);
2528
- return {
2529
- content: [
2530
- {
2531
- type: "text",
2532
- text: JSON.stringify({
2533
- success: true,
2534
- session: {
2535
- ...session,
2536
- isActive: activeSession?.id === session.id,
3707
+ catch (error) {
3708
+ return createErrorResponse("session_set_active", 1, "", undefined, error);
3709
+ }
3710
+ });
3711
+ server.tool("session_delete", {
3712
+ sessionId: z.string().describe("Session ID"),
3713
+ }, async ({ sessionId }) => {
3714
+ try {
3715
+ const session = await sessionManager.getSession(sessionId);
3716
+ if (!session) {
3717
+ return {
3718
+ content: [
3719
+ {
3720
+ type: "text",
3721
+ text: JSON.stringify({
3722
+ success: false,
3723
+ error: "Session not found",
3724
+ }, null, 2),
2537
3725
  },
2538
- }, null, 2),
2539
- },
2540
- ],
2541
- };
2542
- }
2543
- catch (error) {
2544
- return createErrorResponse("session_get", 1, "", undefined, error);
2545
- }
2546
- });
2547
- server.tool("session_clear_all", {
2548
- cli: z
2549
- .enum(["claude", "codex", "gemini"])
2550
- .optional()
2551
- .describe("CLI filter (claude|codex|gemini)"),
2552
- }, async ({ cli }) => {
2553
- try {
2554
- const count = await sessionManager.clearAllSessions(cli);
2555
- logger.info(`Cleared ${count} sessions${cli ? ` for ${cli}` : ""}`);
2556
- return {
2557
- content: [
2558
- {
2559
- type: "text",
2560
- text: JSON.stringify({
2561
- success: true,
2562
- deletedCount: count,
2563
- cli: cli || "all",
2564
- }, null, 2),
2565
- },
2566
- ],
2567
- };
2568
- }
2569
- catch (error) {
2570
- return createErrorResponse("session_clear_all", 1, "", undefined, error);
2571
- }
2572
- });
3726
+ ],
3727
+ isError: true,
3728
+ };
3729
+ }
3730
+ const success = await sessionManager.deleteSession(sessionId);
3731
+ logger.info(`Deleted session: ${sessionId}`);
3732
+ return {
3733
+ content: [
3734
+ {
3735
+ type: "text",
3736
+ text: JSON.stringify({
3737
+ success,
3738
+ deletedSession: {
3739
+ id: session.id,
3740
+ cli: session.cli,
3741
+ description: session.description,
3742
+ },
3743
+ }, null, 2),
3744
+ },
3745
+ ],
3746
+ };
3747
+ }
3748
+ catch (error) {
3749
+ return createErrorResponse("session_delete", 1, "", undefined, error);
3750
+ }
3751
+ });
3752
+ server.tool("session_get", {
3753
+ sessionId: z.string().describe("Session ID"),
3754
+ }, async ({ sessionId }) => {
3755
+ try {
3756
+ const session = await sessionManager.getSession(sessionId);
3757
+ if (!session) {
3758
+ return {
3759
+ content: [
3760
+ {
3761
+ type: "text",
3762
+ text: JSON.stringify({
3763
+ success: false,
3764
+ error: "Session not found",
3765
+ }, null, 2),
3766
+ },
3767
+ ],
3768
+ isError: true,
3769
+ };
3770
+ }
3771
+ const activeSession = await sessionManager.getActiveSession(session.cli);
3772
+ return {
3773
+ content: [
3774
+ {
3775
+ type: "text",
3776
+ text: JSON.stringify({
3777
+ success: true,
3778
+ session: {
3779
+ ...session,
3780
+ isActive: activeSession?.id === session.id,
3781
+ },
3782
+ }, null, 2),
3783
+ },
3784
+ ],
3785
+ };
3786
+ }
3787
+ catch (error) {
3788
+ return createErrorResponse("session_get", 1, "", undefined, error);
3789
+ }
3790
+ });
3791
+ server.tool("session_clear_all", {
3792
+ cli: SESSION_PROVIDER_ENUM.optional().describe("CLI filter (claude|codex|gemini|grok|mistral)"),
3793
+ }, async ({ cli }) => {
3794
+ try {
3795
+ const count = await sessionManager.clearAllSessions(cli);
3796
+ logger.info(`Cleared ${count} sessions${cli ? ` for ${cli}` : ""}`);
3797
+ return {
3798
+ content: [
3799
+ {
3800
+ type: "text",
3801
+ text: JSON.stringify({
3802
+ success: true,
3803
+ deletedCount: count,
3804
+ cli: cli || "all",
3805
+ }, null, 2),
3806
+ },
3807
+ ],
3808
+ };
3809
+ }
3810
+ catch (error) {
3811
+ return createErrorResponse("session_clear_all", 1, "", undefined, error);
3812
+ }
3813
+ });
3814
+ return server;
3815
+ }
2573
3816
  //──────────────────────────────────────────────────────────────────────────────
2574
3817
  // Async Initialization
2575
3818
  //──────────────────────────────────────────────────────────────────────────────
@@ -2592,7 +3835,7 @@ async function initializeSessionManager() {
2592
3835
  //──────────────────────────────────────────────────────────────────────────────
2593
3836
  // Health Check Resource (only if using PostgreSQL)
2594
3837
  //──────────────────────────────────────────────────────────────────────────────
2595
- function registerHealthResource() {
3838
+ function registerHealthResource(server) {
2596
3839
  if (db) {
2597
3840
  server.registerResource("health", "health://status", {
2598
3841
  title: "🏥 Health Status",
@@ -2640,8 +3883,16 @@ async function shutdown(signal) {
2640
3883
  // Kill all active process groups (SIGTERM → wait 3s → SIGKILL)
2641
3884
  await killAllProcessGroups();
2642
3885
  logger.info("All process groups terminated");
2643
- await server.close();
2644
- logger.info("MCP server closed");
3886
+ if (activeHttpGateway) {
3887
+ await activeHttpGateway.close();
3888
+ logger.info("HTTP MCP transport closed");
3889
+ activeHttpGateway = null;
3890
+ }
3891
+ if (activeServer) {
3892
+ await activeServer.close();
3893
+ logger.info("MCP server closed");
3894
+ activeServer = null;
3895
+ }
2645
3896
  if (db) {
2646
3897
  await db.disconnect();
2647
3898
  logger.info("Database connections closed");
@@ -2661,18 +3912,74 @@ process.on("SIGINT", () => shutdown("SIGINT"));
2661
3912
  // Server Startup
2662
3913
  //──────────────────────────────────────────────────────────────────────────────
2663
3914
  async function main() {
2664
- logger.info("Starting llm-cli-gateway MCP server");
3915
+ const args = process.argv.slice(2);
3916
+ if (args[0] === "doctor") {
3917
+ if (args.includes("--json")) {
3918
+ printDoctorJson();
3919
+ return;
3920
+ }
3921
+ process.stderr.write("Only doctor --json is supported in this layer.\n");
3922
+ process.exit(2);
3923
+ }
3924
+ if (args[0] === "contracts") {
3925
+ if (args.includes("--json")) {
3926
+ const cliArg = args.find(arg => arg.startsWith("--cli="))?.split("=")[1];
3927
+ const cli = SESSION_PROVIDER_VALUES.includes(cliArg)
3928
+ ? cliArg
3929
+ : undefined;
3930
+ if (cliArg && !cli) {
3931
+ process.stderr.write(`Unsupported --cli value: ${cliArg}\n`);
3932
+ process.exit(2);
3933
+ }
3934
+ const probeInstalled = args.includes("--probe-installed");
3935
+ process.stdout.write(JSON.stringify(buildUpstreamContractReport({ cli, probeInstalled }), null, 2) + "\n");
3936
+ return;
3937
+ }
3938
+ process.stderr.write("Usage: llm-cli-gateway contracts --json [--cli=claude|codex|gemini|grok|mistral] [--probe-installed]\n");
3939
+ process.exit(2);
3940
+ }
3941
+ const transportArg = args.find(arg => arg.startsWith("--transport="));
3942
+ const transportMode = transportArg?.split("=")[1] ||
3943
+ process.env.LLM_GATEWAY_TRANSPORT ||
3944
+ process.env.MCP_TRANSPORT ||
3945
+ "stdio";
3946
+ logger.info(`Starting llm-cli-gateway MCP server with ${transportMode} transport`);
2665
3947
  // Initialize session manager first
2666
3948
  await initializeSessionManager();
3949
+ const serverDeps = {
3950
+ sessionManager,
3951
+ resourceProvider,
3952
+ db,
3953
+ performanceMetrics,
3954
+ asyncJobManager,
3955
+ approvalManager,
3956
+ flightRecorder,
3957
+ logger,
3958
+ };
3959
+ if (transportMode === "http") {
3960
+ activeHttpGateway = await startHttpGateway({
3961
+ deps: serverDeps,
3962
+ createGatewayServer,
3963
+ logger,
3964
+ });
3965
+ logger.info(`llm-cli-gateway HTTP MCP server connected and ready at ${activeHttpGateway.url}`);
3966
+ return;
3967
+ }
3968
+ if (transportMode !== "stdio") {
3969
+ throw new Error(`Unsupported transport: ${transportMode}`);
3970
+ }
3971
+ activeServer = createGatewayServer({
3972
+ ...serverDeps,
3973
+ });
2667
3974
  // Register health check resource if using PostgreSQL
2668
- registerHealthResource();
3975
+ registerHealthResource(activeServer);
2669
3976
  const transport = new StdioServerTransport();
2670
- await server.connect(transport);
3977
+ await activeServer.connect(transport);
2671
3978
  logger.info("llm-cli-gateway MCP server connected and ready");
2672
3979
  }
2673
3980
  // Guard: only auto-start when run directly (not imported for testing)
2674
3981
  // Resolve symlinks so `llm-cli-gateway` (npm-linked bin) matches import.meta.url
2675
- const __entryUrl = process.argv[1] ? new URL(realpathSync(process.argv[1]), "file://").href : "";
3982
+ const __entryUrl = entrypointFileURL(process.argv[1]);
2676
3983
  if (__entryUrl === import.meta.url) {
2677
3984
  main().catch(error => {
2678
3985
  logger.error("Fatal server error:", error);