llm-cli-gateway 1.1.0 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/CHANGELOG.md +87 -0
  2. package/README.md +226 -9
  3. package/dist/approval-manager.d.ts +1 -1
  4. package/dist/async-job-manager.d.ts +75 -4
  5. package/dist/async-job-manager.js +303 -19
  6. package/dist/auth.d.ts +15 -0
  7. package/dist/auth.js +46 -0
  8. package/dist/cli-updater.d.ts +55 -0
  9. package/dist/cli-updater.js +248 -0
  10. package/dist/codex-json-parser.d.ts +34 -0
  11. package/dist/codex-json-parser.js +105 -0
  12. package/dist/doctor.d.ts +110 -0
  13. package/dist/doctor.js +280 -0
  14. package/dist/endpoint-exposure.d.ts +22 -0
  15. package/dist/endpoint-exposure.js +231 -0
  16. package/dist/executor.d.ts +2 -0
  17. package/dist/executor.js +2 -2
  18. package/dist/flight-recorder.d.ts +3 -1
  19. package/dist/flight-recorder.js +31 -2
  20. package/dist/gateway-server.d.ts +2 -0
  21. package/dist/gateway-server.js +1 -0
  22. package/dist/gemini-json-parser.d.ts +21 -0
  23. package/dist/gemini-json-parser.js +47 -0
  24. package/dist/health.d.ts +7 -0
  25. package/dist/health.js +22 -0
  26. package/dist/http-transport.d.ts +22 -0
  27. package/dist/http-transport.js +164 -0
  28. package/dist/index.d.ts +210 -2
  29. package/dist/index.js +2880 -1037
  30. package/dist/job-store.d.ts +84 -0
  31. package/dist/job-store.js +251 -0
  32. package/dist/logger.d.ts +9 -0
  33. package/dist/logger.js +14 -0
  34. package/dist/model-registry.d.ts +14 -0
  35. package/dist/model-registry.js +478 -134
  36. package/dist/provider-login-guidance.d.ts +21 -0
  37. package/dist/provider-login-guidance.js +98 -0
  38. package/dist/provider-status.d.ts +41 -0
  39. package/dist/provider-status.js +203 -0
  40. package/dist/request-helpers.d.ts +525 -4
  41. package/dist/request-helpers.js +653 -0
  42. package/dist/resources.js +88 -0
  43. package/dist/session-manager-pg.js +2 -0
  44. package/dist/session-manager.d.ts +1 -1
  45. package/dist/session-manager.js +3 -1
  46. package/dist/validation-normalizer.d.ts +23 -0
  47. package/dist/validation-normalizer.js +79 -0
  48. package/dist/validation-orchestrator.d.ts +47 -0
  49. package/dist/validation-orchestrator.js +145 -0
  50. package/dist/validation-prompts.d.ts +15 -0
  51. package/dist/validation-prompts.js +52 -0
  52. package/dist/validation-report.d.ts +57 -0
  53. package/dist/validation-report.js +129 -0
  54. package/dist/validation-tools.d.ts +7 -0
  55. package/dist/validation-tools.js +198 -0
  56. package/package.json +16 -6
  57. package/setup/status.schema.json +271 -0
package/dist/index.js CHANGED
@@ -8,6 +8,8 @@ import { fileURLToPath } from "url";
8
8
  import { z } from "zod";
9
9
  import { executeCli, killAllProcessGroups } from "./executor.js";
10
10
  import { parseStreamJson } from "./stream-json-parser.js";
11
+ import { parseCodexJsonStream } from "./codex-json-parser.js";
12
+ import { parseGeminiJson } from "./gemini-json-parser.js";
11
13
  import { createSessionManager } from "./session-manager.js";
12
14
  import { ResourceProvider } from "./resources.js";
13
15
  import { PerformanceMetrics } from "./metrics.js";
@@ -16,16 +18,24 @@ import { loadConfig } from "./config.js";
16
18
  import { checkHealth } from "./health.js";
17
19
  import { getCliInfo, resolveModelAlias } from "./model-registry.js";
18
20
  import { AsyncJobManager } from "./async-job-manager.js";
21
+ import { JobStore, resolveJobStoreDbPath } from "./job-store.js";
19
22
  import { ApprovalManager } from "./approval-manager.js";
20
23
  import { checkReviewIntegrity } from "./review-integrity.js";
21
24
  import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES, } from "./claude-mcp-config.js";
22
- import { resolveSessionResumeArgs, sanitizeCliArgValues, GATEWAY_SESSION_PREFIX, } from "./request-helpers.js";
25
+ import { resolveGrokSessionArgs, resolveMistralSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, prepareMistralRequest as buildMistralCliInvocation, MISTRAL_AGENT_MODES, GATEWAY_SESSION_PREFIX, resolveClaudePermissionFlags, resolveCodexSandboxFlags, CLAUDE_PERMISSION_MODES, GEMINI_APPROVAL_MODES, CODEX_SANDBOX_MODES, CODEX_ASK_FOR_APPROVAL_MODES, CLAUDE_EFFORT_LEVELS, prepareClaudeHighImpactFlags, validateClaudeAgentsMap, prepareCodexHighImpactFlags, prepareCodexForkRequest, CODEX_CONFIG_OVERRIDES_SCHEMA, prepareGeminiHighImpactFlags, prependGeminiAttachments, resolveGeminiSessionPlan, GEMINI_HIGH_IMPACT_PARAMS_SCHEMA, } from "./request-helpers.js";
23
26
  import { createFlightRecorder } from "./flight-recorder.js";
27
+ import { getCliVersions, runCliUpgrade } from "./cli-updater.js";
28
+ import { startHttpGateway } from "./http-transport.js";
29
+ import { printDoctorJson } from "./doctor.js";
30
+ import { registerValidationTools } from "./validation-tools.js";
24
31
  // Simple logger that writes to stderr (stdout is used for MCP protocol)
25
32
  const logger = {
26
33
  info: (message, ...args) => {
27
34
  console.error(`[INFO] ${new Date().toISOString()} - ${message}`, ...args);
28
35
  },
36
+ warn: (message, ...args) => {
37
+ console.error(`[WARN] ${new Date().toISOString()} - ${message}`, ...args);
38
+ },
29
39
  error: (message, ...args) => {
30
40
  console.error(`[ERROR] ${new Date().toISOString()} - ${message}`, ...args);
31
41
  },
@@ -88,31 +98,88 @@ const loadedSkills = loadSkills();
88
98
  // system prompt at connection time. Covers key patterns + pointers to L2 resources.
89
99
  const SERVER_INSTRUCTIONS = `llm-cli-gateway: Multi-LLM orchestration via MCP.
90
100
 
91
- Tools: claude_request, codex_request, gemini_request (sync) | *_request_async (async)
101
+ Tools: claude_request, codex_request, gemini_request, grok_request, mistral_request (sync) | *_request_async (async)
102
+ Validation: validate_with_models, second_opinion, compare_answers, red_team_review, consensus_check, ask_model, synthesize_validation
92
103
  Jobs: llm_job_status, llm_job_result, llm_job_cancel
93
104
  Sessions: session_create, session_list, session_set_active, session_get, session_delete, session_clear_all
94
- Other: list_models, approval_list, llm_process_health
105
+ Other: list_models, cli_versions, cli_upgrade, approval_list, llm_process_health
95
106
 
96
107
  Key behaviors:
97
108
  - Sync auto-defers at ${SYNC_DEADLINE_MS}ms. Poll deferred jobs via llm_job_status/llm_job_result.
98
- - Sessions: Claude --continue, Gemini --resume (real CLI continuity). Codex bookkeeping only.
109
+ - Sessions: Claude --continue, Gemini --resume, Grok --resume/--continue, Mistral --resume/--continue (requires session_logging.enabled=true in ~/.vibe/config.toml), Codex \`exec resume <ID>\` / \`exec resume --last\` (all real CLI continuity). For Codex, sessionId must be a real Codex UUID (from ~/.codex/sessions/); gateway-generated gw-* IDs are rejected.
99
110
  - Approval gates: opt-in via approvalStrategy:"mcp_managed".
100
111
  - Idle timeout kills stuck processes (default 10min, configurable via idleTimeoutMs).
101
112
 
102
113
  Skills (full docs via MCP resources):
103
114
  ${loadedSkills.map(s => `- skills://${s.name} — ${s.description}`).join("\n")}`;
104
- const server = new McpServer({ name: "llm-cli-gateway", version: "1.0.0" }, { instructions: SERVER_INSTRUCTIONS });
115
+ function newGatewayMcpServer() {
116
+ return new McpServer({ name: "llm-cli-gateway", version: "1.0.0" }, { instructions: SERVER_INSTRUCTIONS });
117
+ }
105
118
  // Global state (initialized asynchronously)
106
119
  let sessionManager;
107
120
  let db = null;
108
121
  const performanceMetrics = new PerformanceMetrics();
109
122
  let resourceProvider;
110
123
  const flightRecorder = createFlightRecorder(logger);
111
- const asyncJobManager = new AsyncJobManager(logger, (cli, durationMs, success) => {
112
- performanceMetrics.recordRequest(cli, durationMs, success);
113
- });
124
+ // Durable job store: persists every async job to ~/.llm-cli-gateway/logs.db so callers
125
+ // can collect results across long polling gaps and gateway restarts, and so repeated
126
+ // identical requests dedup onto the running/completed job instead of starting over.
127
+ const jobStore = (() => {
128
+ const dbPath = resolveJobStoreDbPath();
129
+ if (!dbPath) {
130
+ logger.info("Durable job store disabled (LLM_GATEWAY_LOGS_DB=none)");
131
+ return null;
132
+ }
133
+ try {
134
+ return new JobStore(dbPath, logger);
135
+ }
136
+ catch (err) {
137
+ logger.error("Failed to open durable job store; continuing in-memory only", err);
138
+ return null;
139
+ }
140
+ })();
141
+ function newAsyncJobManager(metrics, runtimeLogger, store = jobStore) {
142
+ return new AsyncJobManager(runtimeLogger, (cli, durationMs, success) => {
143
+ metrics.recordRequest(cli, durationMs, success);
144
+ }, store);
145
+ }
146
+ const asyncJobManager = newAsyncJobManager(performanceMetrics, logger);
114
147
  const approvalManager = new ApprovalManager(undefined, logger);
115
148
  const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
149
+ // U22: Session-provider enum extended to five providers. The storage layer's
150
+ // CLI_TYPES already includes "mistral"; the MCP-tool layer mirrors that here so
151
+ // session_create / session_list / session_clear_all accept the fifth provider.
152
+ export const SESSION_PROVIDER_VALUES = ["claude", "codex", "gemini", "grok", "mistral"];
153
+ export const SESSION_PROVIDER_ENUM = z.enum(SESSION_PROVIDER_VALUES);
154
+ let activeServer = null;
155
+ let activeHttpGateway = null;
156
+ function resolveGatewayServerRuntime(deps = {}, options = {}) {
157
+ const runtimeLogger = deps.logger ?? logger;
158
+ const runtimeSessionManager = deps.sessionManager ?? sessionManager;
159
+ const runtimePerformanceMetrics = deps.performanceMetrics ??
160
+ (options.isolateState ? new PerformanceMetrics() : performanceMetrics);
161
+ const runtimeAsyncJobManager = deps.asyncJobManager ??
162
+ (options.isolateState
163
+ ? // Factory-created test/HTTP session servers must not mark another instance's
164
+ // durable jobs orphaned. Stdio startup injects the process-global manager.
165
+ newAsyncJobManager(runtimePerformanceMetrics, runtimeLogger, null)
166
+ : asyncJobManager);
167
+ const runtimeApprovalManager = deps.approvalManager ??
168
+ (options.isolateState ? new ApprovalManager(undefined, runtimeLogger) : approvalManager);
169
+ return {
170
+ sessionManager: runtimeSessionManager,
171
+ resourceProvider: deps.resourceProvider ??
172
+ (options.isolateState
173
+ ? new ResourceProvider(runtimeSessionManager, runtimePerformanceMetrics)
174
+ : resourceProvider),
175
+ db: "db" in deps ? (deps.db ?? null) : db,
176
+ performanceMetrics: runtimePerformanceMetrics,
177
+ asyncJobManager: runtimeAsyncJobManager,
178
+ approvalManager: runtimeApprovalManager,
179
+ flightRecorder: deps.flightRecorder ?? flightRecorder,
180
+ logger: runtimeLogger,
181
+ };
182
+ }
116
183
  // Per-CLI idle timeouts: kill process if no stdout/stderr activity for this duration.
117
184
  // Claude idle timeout only applies in stream-json mode (with --include-partial-messages).
118
185
  // In text/json mode, Claude produces no output until done, so idle timeout would false-positive.
@@ -120,6 +187,8 @@ const CLI_IDLE_TIMEOUTS = {
120
187
  claude: 600_000, // 10 minutes — only used when outputFormat=stream-json
121
188
  codex: 600_000, // 10 minutes — Codex streams stderr progress
122
189
  gemini: 600_000, // 10 minutes — Gemini streams stdout in real-time
190
+ grok: 600_000, // 10 minutes — Grok streams stderr/stdout activity in headless mode
191
+ mistral: 600_000, // 10 minutes — Vibe streams stdout/stderr in headless mode
123
192
  };
124
193
  function resolveIdleTimeout(cli, override) {
125
194
  if (override !== undefined)
@@ -131,18 +200,70 @@ const SYNC_POLL_INTERVAL_MS = 1_000;
131
200
  * Start an async job and poll until completion or deadline.
132
201
  * Returns the job result if it finishes in time, or a deferral marker.
133
202
  */
134
- async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat) {
203
+ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete) {
204
+ // U26 fix: ownership of onComplete is a contract. Once this function returns
205
+ // OR throws, the caller MUST consider onComplete consumed — i.e. it has
206
+ // either been run, or the AsyncJobManager has taken ownership of it. The
207
+ // caller never needs to reclaim.
208
+ let onCompleteOwnedByCaller = onComplete !== undefined;
209
+ const consumeOnComplete = () => {
210
+ if (!onCompleteOwnedByCaller || !onComplete)
211
+ return;
212
+ onCompleteOwnedByCaller = false;
213
+ try {
214
+ onComplete();
215
+ }
216
+ catch (err) {
217
+ runtime.logger.error(`awaitJobOrDefer onComplete (${cli}) threw`, err);
218
+ }
219
+ };
135
220
  if (SYNC_DEADLINE_MS === 0) {
136
- // Disabled — fall through to direct execution
137
- return executeCli(cli, args, { idleTimeout: idleTimeoutMs, logger });
221
+ // Disabled — fall through to direct execution.
222
+ // Note: direct execution bypasses dedup. forceRefresh is implied.
223
+ const command = cli === "mistral" ? "vibe" : cli;
224
+ try {
225
+ return await executeCli(command, args, {
226
+ idleTimeout: idleTimeoutMs,
227
+ logger: runtime.logger,
228
+ env: env ? { ...process.env, ...env } : undefined,
229
+ });
230
+ }
231
+ finally {
232
+ // Direct-execution path completes inline; release per-request resources
233
+ // (e.g. outputSchema temp files) here.
234
+ consumeOnComplete();
235
+ }
236
+ }
237
+ let outcome;
238
+ try {
239
+ outcome = runtime.asyncJobManager.startJobWithDedup(cli, args, corrId, {
240
+ idleTimeoutMs,
241
+ outputFormat,
242
+ forceRefresh,
243
+ env,
244
+ onComplete,
245
+ });
246
+ // Handoff succeeded: AsyncJobManager owns onComplete (it'll fire via
247
+ // fireOnComplete on terminal status, or run inline immediately for dedup).
248
+ onCompleteOwnedByCaller = false;
249
+ }
250
+ catch (err) {
251
+ // Spawn or pre-spawn failure inside AsyncJobManager. The record was never
252
+ // registered, so onComplete will never be called by the manager. Reclaim
253
+ // here so the temp file is not leaked.
254
+ consumeOnComplete();
255
+ throw err;
256
+ }
257
+ const job = outcome.snapshot;
258
+ if (outcome.deduped) {
259
+ runtime.logger.info(`[${corrId}] sync request deduped onto running job ${job.id} (original corrId=${outcome.originalCorrelationId})`);
138
260
  }
139
- const job = asyncJobManager.startJob(cli, args, corrId, undefined, idleTimeoutMs, outputFormat);
140
261
  const deadline = Date.now() + SYNC_DEADLINE_MS;
141
262
  while (Date.now() < deadline) {
142
- const snapshot = asyncJobManager.getJobSnapshot(job.id);
263
+ const snapshot = runtime.asyncJobManager.getJobSnapshot(job.id);
143
264
  if (snapshot && snapshot.status !== "running") {
144
265
  // Job finished within deadline — extract result
145
- const result = asyncJobManager.getJobResult(job.id);
266
+ const result = runtime.asyncJobManager.getJobResult(job.id);
146
267
  if (!result) {
147
268
  return { stdout: "", stderr: "Job result unavailable", code: 1 };
148
269
  }
@@ -155,7 +276,7 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat) {
155
276
  await new Promise(resolve => setTimeout(resolve, SYNC_POLL_INTERVAL_MS));
156
277
  }
157
278
  // Deadline exceeded — return deferral
158
- logger.info(`[${corrId}] ${cli} sync deadline exceeded (${SYNC_DEADLINE_MS}ms), deferring to async job ${job.id}`);
279
+ runtime.logger.info(`[${corrId}] ${cli} sync deadline exceeded (${SYNC_DEADLINE_MS}ms), deferring to async job ${job.id}`);
159
280
  return {
160
281
  deferred: true,
161
282
  jobId: job.id,
@@ -233,28 +354,60 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
233
354
  function extractUsageAndCost(cli, output, outputFormat) {
234
355
  if (cli === "claude" && outputFormat === "stream-json") {
235
356
  const parsed = parseStreamJson(output);
357
+ if (!parsed.usage) {
358
+ return { costUsd: parsed.costUsd ?? undefined };
359
+ }
236
360
  return {
237
- inputTokens: parsed.usage?.inputTokens,
238
- outputTokens: parsed.usage?.outputTokens,
361
+ inputTokens: parsed.usage.inputTokens,
362
+ outputTokens: parsed.usage.outputTokens,
363
+ cacheReadTokens: parsed.usage.cacheReadInputTokens || undefined,
364
+ cacheCreationTokens: parsed.usage.cacheCreationInputTokens || undefined,
239
365
  costUsd: parsed.costUsd ?? undefined,
240
366
  };
241
367
  }
368
+ if (cli === "codex" && outputFormat === "json") {
369
+ const parsed = parseCodexJsonStream(output);
370
+ if (!parsed.usage) {
371
+ return {};
372
+ }
373
+ return {
374
+ inputTokens: parsed.usage.input_tokens,
375
+ outputTokens: parsed.usage.output_tokens,
376
+ cacheReadTokens: parsed.usage.cache_read_tokens,
377
+ cacheCreationTokens: parsed.usage.cache_creation_tokens,
378
+ costUsd: parsed.usage.cost_usd,
379
+ };
380
+ }
381
+ if (cli === "gemini" && outputFormat === "json") {
382
+ const parsed = parseGeminiJson(output);
383
+ if (!parsed || !parsed.usage) {
384
+ return {};
385
+ }
386
+ return {
387
+ inputTokens: parsed.usage.input_tokens,
388
+ outputTokens: parsed.usage.output_tokens,
389
+ cacheReadTokens: parsed.usage.cache_read_tokens,
390
+ };
391
+ }
392
+ // Mistral/Vibe: does not surface usage in its stdout/stream-json output. A
393
+ // future unit can read it from `~/.vibe/logs/session/<id>/metadata.json`
394
+ // once we resolve the session id post-run.
242
395
  return {};
243
396
  }
244
- function safeFlightStart(entry) {
397
+ function safeFlightStart(entry, runtime = resolveGatewayServerRuntime()) {
245
398
  try {
246
- flightRecorder.logStart(entry);
399
+ runtime.flightRecorder.logStart(entry);
247
400
  }
248
401
  catch (error) {
249
- logger.error("Flight recorder logStart failed", error);
402
+ runtime.logger.error("Flight recorder logStart failed", error);
250
403
  }
251
404
  }
252
- function safeFlightComplete(correlationId, result) {
405
+ function safeFlightComplete(correlationId, result, runtime = resolveGatewayServerRuntime()) {
253
406
  try {
254
- flightRecorder.logComplete(correlationId, result);
407
+ runtime.flightRecorder.logComplete(correlationId, result);
255
408
  }
256
409
  catch (error) {
257
- logger.error("Flight recorder logComplete failed", error);
410
+ runtime.logger.error("Flight recorder logComplete failed", error);
258
411
  }
259
412
  }
260
413
  function createApprovalDeniedResponse(operation, decision) {
@@ -321,104 +474,146 @@ function resolveClaudeMcpConfig(operation, correlationId, requestedMcpServers, s
321
474
  //──────────────────────────────────────────────────────────────────────────────
322
475
  // MCP Resources
323
476
  //──────────────────────────────────────────────────────────────────────────────
324
- // Register skill resources (L2: full docs, read on demand)
325
- for (const skill of loadedSkills) {
326
- server.registerResource(`skill-${skill.name}`, `skills://${skill.name}`, {
327
- title: skill.name,
328
- description: skill.description,
329
- mimeType: "text/markdown",
330
- }, async () => ({
331
- contents: [
332
- {
333
- uri: `skills://${skill.name}`,
334
- mimeType: "text/markdown",
335
- text: skill.content,
336
- },
337
- ],
338
- }));
477
+ function registerBaseResources(server, runtime) {
478
+ // Register skill resources (L2: full docs, read on demand)
479
+ for (const skill of loadedSkills) {
480
+ server.registerResource(`skill-${skill.name}`, `skills://${skill.name}`, {
481
+ title: skill.name,
482
+ description: skill.description,
483
+ mimeType: "text/markdown",
484
+ }, async () => ({
485
+ contents: [
486
+ {
487
+ uri: `skills://${skill.name}`,
488
+ mimeType: "text/markdown",
489
+ text: skill.content,
490
+ },
491
+ ],
492
+ }));
493
+ }
494
+ runtime.logger.info(`Registered ${loadedSkills.length} skill resources`);
495
+ // Register all sessions resource
496
+ server.registerResource("all-sessions", "sessions://all", {
497
+ title: "📋 All Sessions",
498
+ description: "All conversation sessions across CLIs",
499
+ mimeType: "application/json",
500
+ }, async (uri) => {
501
+ runtime.logger.debug("Reading all sessions resource");
502
+ const contents = await runtime.resourceProvider.readResource(uri.href);
503
+ return { contents: contents ? [contents] : [] };
504
+ });
505
+ // Register Claude sessions resource
506
+ server.registerResource("claude-sessions", "sessions://claude", {
507
+ title: "🤖 Claude Sessions",
508
+ description: "Claude conversation sessions",
509
+ mimeType: "application/json",
510
+ }, async (uri) => {
511
+ runtime.logger.debug("Reading Claude sessions resource");
512
+ const contents = await runtime.resourceProvider.readResource(uri.href);
513
+ return { contents: contents ? [contents] : [] };
514
+ });
515
+ // Register Codex sessions resource
516
+ server.registerResource("codex-sessions", "sessions://codex", {
517
+ title: "💻 Codex Sessions",
518
+ description: "Codex conversation sessions",
519
+ mimeType: "application/json",
520
+ }, async (uri) => {
521
+ runtime.logger.debug("Reading Codex sessions resource");
522
+ const contents = await runtime.resourceProvider.readResource(uri.href);
523
+ return { contents: contents ? [contents] : [] };
524
+ });
525
+ // Register Gemini sessions resource
526
+ server.registerResource("gemini-sessions", "sessions://gemini", {
527
+ title: "✨ Gemini Sessions",
528
+ description: "Gemini conversation sessions",
529
+ mimeType: "application/json",
530
+ }, async (uri) => {
531
+ runtime.logger.debug("Reading Gemini sessions resource");
532
+ const contents = await runtime.resourceProvider.readResource(uri.href);
533
+ return { contents: contents ? [contents] : [] };
534
+ });
535
+ // Register Grok sessions resource
536
+ server.registerResource("grok-sessions", "sessions://grok", {
537
+ title: "⚡ Grok Sessions",
538
+ description: "Grok conversation sessions",
539
+ mimeType: "application/json",
540
+ }, async (uri) => {
541
+ runtime.logger.debug("Reading Grok sessions resource");
542
+ const contents = await runtime.resourceProvider.readResource(uri.href);
543
+ return { contents: contents ? [contents] : [] };
544
+ });
545
+ // Register Mistral sessions resource
546
+ server.registerResource("mistral-sessions", "sessions://mistral", {
547
+ title: "🌬 Mistral Sessions",
548
+ description: "Mistral Vibe conversation sessions",
549
+ mimeType: "application/json",
550
+ }, async (uri) => {
551
+ runtime.logger.debug("Reading Mistral sessions resource");
552
+ const contents = await runtime.resourceProvider.readResource(uri.href);
553
+ return { contents: contents ? [contents] : [] };
554
+ });
555
+ // Register Claude models resource
556
+ server.registerResource("claude-models", "models://claude", {
557
+ title: "🧠 Claude Models",
558
+ description: "Claude models and capabilities",
559
+ mimeType: "application/json",
560
+ }, async (uri) => {
561
+ runtime.logger.debug("Reading Claude models resource");
562
+ const contents = await runtime.resourceProvider.readResource(uri.href);
563
+ return { contents: contents ? [contents] : [] };
564
+ });
565
+ // Register Codex models resource
566
+ server.registerResource("codex-models", "models://codex", {
567
+ title: "🔧 Codex Models",
568
+ description: "Codex models and capabilities",
569
+ mimeType: "application/json",
570
+ }, async (uri) => {
571
+ runtime.logger.debug("Reading Codex models resource");
572
+ const contents = await runtime.resourceProvider.readResource(uri.href);
573
+ return { contents: contents ? [contents] : [] };
574
+ });
575
+ // Register Gemini models resource
576
+ server.registerResource("gemini-models", "models://gemini", {
577
+ title: "🌟 Gemini Models",
578
+ description: "Gemini models and capabilities",
579
+ mimeType: "application/json",
580
+ }, async (uri) => {
581
+ runtime.logger.debug("Reading Gemini models resource");
582
+ const contents = await runtime.resourceProvider.readResource(uri.href);
583
+ return { contents: contents ? [contents] : [] };
584
+ });
585
+ // Register Grok models resource
586
+ server.registerResource("grok-models", "models://grok", {
587
+ title: "⚡ Grok Models",
588
+ description: "Grok models and capabilities",
589
+ mimeType: "application/json",
590
+ }, async (uri) => {
591
+ runtime.logger.debug("Reading Grok models resource");
592
+ const contents = await runtime.resourceProvider.readResource(uri.href);
593
+ return { contents: contents ? [contents] : [] };
594
+ });
595
+ // Register Mistral models resource
596
+ server.registerResource("mistral-models", "models://mistral", {
597
+ title: "🌬 Mistral Models",
598
+ description: "Mistral Vibe models and capabilities",
599
+ mimeType: "application/json",
600
+ }, async (uri) => {
601
+ runtime.logger.debug("Reading Mistral models resource");
602
+ const contents = await runtime.resourceProvider.readResource(uri.href);
603
+ return { contents: contents ? [contents] : [] };
604
+ });
605
+ // Register performance metrics resource
606
+ server.registerResource("performance-metrics", "metrics://performance", {
607
+ title: "📈 Performance Metrics",
608
+ description: "Request counts, latency, success/failure rates",
609
+ mimeType: "application/json",
610
+ }, async (uri) => {
611
+ runtime.logger.debug("Reading performance metrics resource");
612
+ const contents = await runtime.resourceProvider.readResource(uri.href);
613
+ return { contents: contents ? [contents] : [] };
614
+ });
339
615
  }
340
- logger.info(`Registered ${loadedSkills.length} skill resources`);
341
- // Register all sessions resource
342
- server.registerResource("all-sessions", "sessions://all", {
343
- title: "📋 All Sessions",
344
- description: "All conversation sessions across CLIs",
345
- mimeType: "application/json",
346
- }, async (uri) => {
347
- logger.debug("Reading all sessions resource");
348
- const contents = await resourceProvider.readResource(uri.href);
349
- return { contents: contents ? [contents] : [] };
350
- });
351
- // Register Claude sessions resource
352
- server.registerResource("claude-sessions", "sessions://claude", {
353
- title: "🤖 Claude Sessions",
354
- description: "Claude conversation sessions",
355
- mimeType: "application/json",
356
- }, async (uri) => {
357
- logger.debug("Reading Claude sessions resource");
358
- const contents = await resourceProvider.readResource(uri.href);
359
- return { contents: contents ? [contents] : [] };
360
- });
361
- // Register Codex sessions resource
362
- server.registerResource("codex-sessions", "sessions://codex", {
363
- title: "💻 Codex Sessions",
364
- description: "Codex conversation sessions",
365
- mimeType: "application/json",
366
- }, async (uri) => {
367
- logger.debug("Reading Codex sessions resource");
368
- const contents = await resourceProvider.readResource(uri.href);
369
- return { contents: contents ? [contents] : [] };
370
- });
371
- // Register Gemini sessions resource
372
- server.registerResource("gemini-sessions", "sessions://gemini", {
373
- title: "✨ Gemini Sessions",
374
- description: "Gemini conversation sessions",
375
- mimeType: "application/json",
376
- }, async (uri) => {
377
- logger.debug("Reading Gemini sessions resource");
378
- const contents = await resourceProvider.readResource(uri.href);
379
- return { contents: contents ? [contents] : [] };
380
- });
381
- // Register Claude models resource
382
- server.registerResource("claude-models", "models://claude", {
383
- title: "🧠 Claude Models",
384
- description: "Claude models and capabilities",
385
- mimeType: "application/json",
386
- }, async (uri) => {
387
- logger.debug("Reading Claude models resource");
388
- const contents = await resourceProvider.readResource(uri.href);
389
- return { contents: contents ? [contents] : [] };
390
- });
391
- // Register Codex models resource
392
- server.registerResource("codex-models", "models://codex", {
393
- title: "🔧 Codex Models",
394
- description: "Codex models and capabilities",
395
- mimeType: "application/json",
396
- }, async (uri) => {
397
- logger.debug("Reading Codex models resource");
398
- const contents = await resourceProvider.readResource(uri.href);
399
- return { contents: contents ? [contents] : [] };
400
- });
401
- // Register Gemini models resource
402
- server.registerResource("gemini-models", "models://gemini", {
403
- title: "🌟 Gemini Models",
404
- description: "Gemini models and capabilities",
405
- mimeType: "application/json",
406
- }, async (uri) => {
407
- logger.debug("Reading Gemini models resource");
408
- const contents = await resourceProvider.readResource(uri.href);
409
- return { contents: contents ? [contents] : [] };
410
- });
411
- // Register performance metrics resource
412
- server.registerResource("performance-metrics", "metrics://performance", {
413
- title: "📈 Performance Metrics",
414
- description: "Request counts, latency, success/failure rates",
415
- mimeType: "application/json",
416
- }, async (uri) => {
417
- logger.debug("Reading performance metrics resource");
418
- const contents = await resourceProvider.readResource(uri.href);
419
- return { contents: contents ? [contents] : [] };
420
- });
421
- function prepareClaudeRequest(params) {
616
+ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRuntime()) {
422
617
  const corrId = params.correlationId || randomUUID();
423
618
  const cliInfo = getCliInfo();
424
619
  const resolvedModel = resolveModelAlias("claude", params.model, cliInfo);
@@ -429,7 +624,7 @@ function prepareClaudeRequest(params) {
429
624
  disallowedTools: params.disallowedTools,
430
625
  });
431
626
  if (reviewIntegrity.violations.length > 0) {
432
- logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
627
+ runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
433
628
  cli: "claude",
434
629
  operation: params.operation,
435
630
  score: reviewIntegrity.totalScore,
@@ -449,7 +644,7 @@ function prepareClaudeRequest(params) {
449
644
  const mcpConfig = mcpConfigResolution.config;
450
645
  let approvalDecision = null;
451
646
  if (params.approvalStrategy === "mcp_managed") {
452
- approvalDecision = approvalManager.decide({
647
+ approvalDecision = runtime.approvalManager.decide({
453
648
  cli: "claude",
454
649
  operation: params.operation,
455
650
  prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
@@ -486,8 +681,15 @@ function prepareClaudeRequest(params) {
486
681
  if (params.approvalStrategy === "mcp_managed") {
487
682
  args.push("--permission-mode", "bypassPermissions");
488
683
  }
489
- else if (params.dangerouslySkipPermissions) {
490
- args.push("--permission-mode", "bypassPermissions");
684
+ else {
685
+ const permFlags = resolveClaudePermissionFlags({
686
+ permissionMode: params.permissionMode,
687
+ dangerouslySkipPermissions: params.dangerouslySkipPermissions,
688
+ });
689
+ if (permFlags.warning) {
690
+ runtime.logger.warn(`[${corrId}] ${permFlags.warning}`);
691
+ }
692
+ args.push(...permFlags.args);
491
693
  }
492
694
  if (params.strictMcpConfig || mcpConfig.enabled.length > 0) {
493
695
  args.push("--mcp-config", mcpConfig.path);
@@ -495,6 +697,26 @@ function prepareClaudeRequest(params) {
495
697
  args.push("--strict-mcp-config");
496
698
  }
497
699
  }
700
+ // U25: Claude high-impact features (agent, agents, fork, system-prompt, budget, effort, …)
701
+ let validatedAgents;
702
+ if (params.agents && Object.keys(params.agents).length > 0) {
703
+ const result = validateClaudeAgentsMap(params.agents);
704
+ if (!result.ok) {
705
+ return createErrorResponse("claude", 1, "", corrId, new Error(result.message));
706
+ }
707
+ validatedAgents = result.value;
708
+ }
709
+ args.push(...prepareClaudeHighImpactFlags({
710
+ agent: params.agent,
711
+ agents: validatedAgents,
712
+ forkSession: params.forkSession,
713
+ systemPrompt: params.systemPrompt,
714
+ appendSystemPrompt: params.appendSystemPrompt,
715
+ maxBudgetUsd: params.maxBudgetUsd,
716
+ maxTurns: params.maxTurns,
717
+ effort: params.effort,
718
+ excludeDynamicSystemPromptSections: params.excludeDynamicSystemPromptSections,
719
+ }));
498
720
  return {
499
721
  corrId,
500
722
  effectivePrompt,
@@ -506,14 +728,14 @@ function prepareClaudeRequest(params) {
506
728
  args,
507
729
  };
508
730
  }
509
- function prepareCodexRequest(params) {
731
+ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntime()) {
510
732
  const corrId = params.correlationId || randomUUID();
511
733
  const cliInfo = getCliInfo();
512
734
  const resolvedModel = resolveModelAlias("codex", params.model, cliInfo);
513
735
  // Review integrity check on raw prompt (before optimization)
514
736
  const reviewIntegrity = checkReviewIntegrity({ prompt: params.prompt });
515
737
  if (reviewIntegrity.violations.length > 0) {
516
- logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
738
+ runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
517
739
  cli: "codex",
518
740
  operation: params.operation,
519
741
  score: reviewIntegrity.totalScore,
@@ -528,7 +750,7 @@ function prepareCodexRequest(params) {
528
750
  const requestedMcpServers = normalizeMcpServers(params.mcpServers);
529
751
  let approvalDecision = null;
530
752
  if (params.approvalStrategy === "mcp_managed") {
531
- approvalDecision = approvalManager.decide({
753
+ approvalDecision = runtime.approvalManager.decide({
532
754
  cli: "codex",
533
755
  operation: params.operation,
534
756
  prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
@@ -543,15 +765,100 @@ function prepareCodexRequest(params) {
543
765
  return createApprovalDeniedResponse(params.operation, approvalDecision);
544
766
  }
545
767
  }
768
+ // Resume mode: codex exec resume <SESSION_ID|--last> [flags] PROMPT
769
+ // Note: `codex exec resume` does NOT accept `--full-auto`; the original
770
+ // session's approval policy is inherited. We silently drop fullAuto on resume.
771
+ let sessionPlan;
772
+ try {
773
+ sessionPlan = resolveCodexSessionArgs({
774
+ sessionId: params.sessionId,
775
+ resumeLatest: params.resumeLatest,
776
+ createNewSession: params.createNewSession,
777
+ });
778
+ }
779
+ catch (err) {
780
+ return createErrorResponse(params.operation, 1, "", corrId, err);
781
+ }
546
782
  const args = ["exec"];
783
+ if (sessionPlan.mode !== "new") {
784
+ args.push("resume");
785
+ if (sessionPlan.mode === "resume-latest") {
786
+ args.push("--last");
787
+ }
788
+ }
547
789
  if (resolvedModel)
548
790
  args.push("--model", resolvedModel);
549
- if (params.fullAuto)
550
- args.push("--full-auto");
791
+ // Codex sandbox / approval: resolve modern flags + legacy fullAuto shorthand.
792
+ // `codex exec resume` rejects all of these (the original session's policy is
793
+ // inherited), so we only emit them when starting a NEW session.
794
+ if (sessionPlan.mode === "new") {
795
+ const sandboxFlags = resolveCodexSandboxFlags({
796
+ sandboxMode: params.sandboxMode,
797
+ askForApproval: params.askForApproval,
798
+ fullAuto: params.fullAuto,
799
+ useLegacyFullAutoFlag: params.useLegacyFullAutoFlag,
800
+ });
801
+ if (sandboxFlags.warning) {
802
+ runtime.logger.warn(`[${corrId}] ${sandboxFlags.warning}`);
803
+ }
804
+ args.push(...sandboxFlags.args);
805
+ }
551
806
  if (params.dangerouslyBypassApprovalsAndSandbox) {
552
807
  args.push("--dangerously-bypass-approvals-and-sandbox");
553
808
  }
554
- args.push("--skip-git-repo-check", effectivePrompt);
809
+ // U23 fix: emit `--json` when the caller asked for JSON output so the
810
+ // codex-json-parser actually receives JSONL events. This is what makes
811
+ // extractUsageAndCost() reachable from the tool surface; without it, the
812
+ // U23 parser is dead code.
813
+ if (params.outputFormat === "json") {
814
+ args.push("--json");
815
+ }
816
+ args.push("--skip-git-repo-check");
817
+ // U26: High-impact feature flags. Some of these (`--output-schema`,
818
+ // `--search`, `-C`, `--add-dir`) are rejected by `codex exec resume`, so we
819
+ // only emit them on a NEW session. Images / ephemeral / profile /
820
+ // ignore-rules / ignore-user-config are allowed on resume per the audited
821
+ // CLI help; we emit them in both branches.
822
+ let highImpactCleanup;
823
+ if (sessionPlan.mode === "new") {
824
+ const high = prepareCodexHighImpactFlags({
825
+ outputSchema: params.outputSchema,
826
+ search: params.search,
827
+ profile: params.profile,
828
+ configOverrides: params.configOverrides,
829
+ ephemeral: params.ephemeral,
830
+ images: params.images,
831
+ ignoreUserConfig: params.ignoreUserConfig,
832
+ ignoreRules: params.ignoreRules,
833
+ });
834
+ if (high.missingImagePath) {
835
+ return createErrorResponse(params.operation, 1, "", corrId, new Error(`images: path does not exist: ${high.missingImagePath}`));
836
+ }
837
+ args.push(...high.args);
838
+ highImpactCleanup = high.cleanup;
839
+ }
840
+ else {
841
+ // On resume, emit only the resume-safe subset (profile, ephemeral,
842
+ // images, ignoreUserConfig, ignoreRules). outputSchema, search, and
843
+ // configOverrides are dropped silently to mirror existing behavior for
844
+ // sandbox/ask-for-approval on resume.
845
+ const high = prepareCodexHighImpactFlags({
846
+ profile: params.profile,
847
+ ephemeral: params.ephemeral,
848
+ images: params.images,
849
+ ignoreUserConfig: params.ignoreUserConfig,
850
+ ignoreRules: params.ignoreRules,
851
+ });
852
+ if (high.missingImagePath) {
853
+ return createErrorResponse(params.operation, 1, "", corrId, new Error(`images: path does not exist: ${high.missingImagePath}`));
854
+ }
855
+ args.push(...high.args);
856
+ highImpactCleanup = high.cleanup;
857
+ }
858
+ if (sessionPlan.mode === "resume-by-id" && sessionPlan.sessionId) {
859
+ args.push(sessionPlan.sessionId);
860
+ }
861
+ args.push(effectivePrompt);
555
862
  return {
556
863
  corrId,
557
864
  effectivePrompt,
@@ -560,9 +867,10 @@ function prepareCodexRequest(params) {
560
867
  approvalDecision,
561
868
  reviewIntegrity,
562
869
  args,
870
+ cleanup: highImpactCleanup,
563
871
  };
564
872
  }
565
- function prepareGeminiRequest(params) {
873
+ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRuntime()) {
566
874
  const corrId = params.correlationId || randomUUID();
567
875
  const cliInfo = getCliInfo();
568
876
  const resolvedModel = resolveModelAlias("gemini", params.model, cliInfo);
@@ -572,7 +880,7 @@ function prepareGeminiRequest(params) {
572
880
  allowedTools: params.allowedTools,
573
881
  });
574
882
  if (reviewIntegrity.violations.length > 0) {
575
- logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
883
+ runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
576
884
  cli: "gemini",
577
885
  operation: params.operation,
578
886
  score: reviewIntegrity.totalScore,
@@ -587,7 +895,7 @@ function prepareGeminiRequest(params) {
587
895
  const requestedMcpServers = normalizeMcpServers(params.mcpServers);
588
896
  let approvalDecision = null;
589
897
  if (params.approvalStrategy === "mcp_managed") {
590
- approvalDecision = approvalManager.decide({
898
+ approvalDecision = runtime.approvalManager.decide({
591
899
  cli: "gemini",
592
900
  operation: params.operation,
593
901
  prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
@@ -604,7 +912,29 @@ function prepareGeminiRequest(params) {
604
912
  }
605
913
  }
606
914
  const effectiveApprovalMode = params.approvalStrategy === "mcp_managed" ? "yolo" : params.approvalMode;
607
- const args = [effectivePrompt];
915
+ // U27: Validate high-impact policy paths and prepend attachment tokens
916
+ // BEFORE the `-p` pair is emitted, preserving the U21 ordering invariant.
917
+ const highImpact = prepareGeminiHighImpactFlags({
918
+ sandbox: params.sandbox,
919
+ policyFiles: params.policyFiles,
920
+ adminPolicyFiles: params.adminPolicyFiles,
921
+ });
922
+ if (highImpact.missingPolicyPath) {
923
+ return createErrorResponse(params.operation, 1, "", corrId, new Error(`${highImpact.missingPolicyField}: path does not exist: ${highImpact.missingPolicyPath}`));
924
+ }
925
+ if (params.attachments && params.attachments.length > 0) {
926
+ try {
927
+ effectivePrompt = prependGeminiAttachments(effectivePrompt, params.attachments);
928
+ }
929
+ catch (err) {
930
+ return createErrorResponse(params.operation, 1, "", corrId, err instanceof Error ? err : new Error(String(err)));
931
+ }
932
+ }
933
+ // U21: Emit the prompt via -p/--prompt rather than as a positional argument.
934
+ // Positional prompts depend on Gemini's TTY/mode-detection heuristics; -p is
935
+ // the documented non-interactive flag and is robust against future CLI mode
936
+ // changes.
937
+ const args = ["-p", effectivePrompt];
608
938
  if (resolvedModel)
609
939
  args.push("--model", resolvedModel);
610
940
  if (effectiveApprovalMode)
@@ -621,6 +951,90 @@ function prepareGeminiRequest(params) {
621
951
  sanitizeCliArgValues(params.includeDirs, "includeDirs");
622
952
  params.includeDirs.forEach(dir => args.push("--include-directories", dir));
623
953
  }
954
+ // U27 high-impact flags (-s / --policy / --admin-policy) appended after the
955
+ // existing flag set so positional ordering relative to `-p` is preserved.
956
+ args.push(...highImpact.args);
957
+ // U23 fix: emit `-o json` when the caller asked for JSON output. The Gemini
958
+ // JSON parser is otherwise unreachable from the tool surface and the
959
+ // structured usageMetadata is silently dropped.
960
+ if (params.outputFormat === "json") {
961
+ args.push("-o", "json");
962
+ }
963
+ return {
964
+ corrId,
965
+ effectivePrompt,
966
+ resolvedModel,
967
+ requestedMcpServers,
968
+ approvalDecision,
969
+ reviewIntegrity,
970
+ args,
971
+ };
972
+ }
973
+ function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime()) {
974
+ const corrId = params.correlationId || randomUUID();
975
+ const cliInfo = getCliInfo();
976
+ const resolvedModel = resolveModelAlias("grok", params.model, cliInfo);
977
+ // Review integrity check on raw prompt (before optimization)
978
+ const reviewIntegrity = checkReviewIntegrity({
979
+ prompt: params.prompt,
980
+ allowedTools: params.allowedTools,
981
+ disallowedTools: params.disallowedTools,
982
+ });
983
+ if (reviewIntegrity.violations.length > 0) {
984
+ runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
985
+ cli: "grok",
986
+ operation: params.operation,
987
+ score: reviewIntegrity.totalScore,
988
+ });
989
+ }
990
+ let effectivePrompt = params.prompt;
991
+ if (params.optimizePrompt) {
992
+ const optimized = optimizePromptText(effectivePrompt);
993
+ logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
994
+ effectivePrompt = optimized;
995
+ }
996
+ const requestedMcpServers = normalizeMcpServers(params.mcpServers);
997
+ let approvalDecision = null;
998
+ if (params.approvalStrategy === "mcp_managed") {
999
+ approvalDecision = runtime.approvalManager.decide({
1000
+ cli: "grok",
1001
+ operation: params.operation,
1002
+ prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
1003
+ bypassRequested: Boolean(params.alwaysApprove) || params.permissionMode === "bypassPermissions",
1004
+ fullAuto: false,
1005
+ requestedMcpServers,
1006
+ allowedTools: params.allowedTools,
1007
+ disallowedTools: params.disallowedTools,
1008
+ policy: params.approvalPolicy,
1009
+ metadata: { model: resolvedModel || "default" },
1010
+ reviewIntegrity,
1011
+ });
1012
+ if (approvalDecision.status !== "approved") {
1013
+ return createApprovalDeniedResponse(params.operation, approvalDecision);
1014
+ }
1015
+ }
1016
+ const effectiveAlwaysApprove = params.approvalStrategy === "mcp_managed" ? true : Boolean(params.alwaysApprove);
1017
+ const args = ["-p", effectivePrompt];
1018
+ if (resolvedModel)
1019
+ args.push("--model", resolvedModel);
1020
+ if (params.outputFormat)
1021
+ args.push("--output-format", params.outputFormat);
1022
+ if (effectiveAlwaysApprove) {
1023
+ args.push("--always-approve");
1024
+ }
1025
+ else if (params.permissionMode) {
1026
+ args.push("--permission-mode", params.permissionMode);
1027
+ }
1028
+ if (params.effort)
1029
+ args.push("--effort", params.effort);
1030
+ if (params.reasoningEffort)
1031
+ args.push("--reasoning-effort", params.reasoningEffort);
1032
+ if (params.allowedTools && params.allowedTools.length > 0) {
1033
+ args.push("--tools", params.allowedTools.join(","));
1034
+ }
1035
+ if (params.disallowedTools && params.disallowedTools.length > 0) {
1036
+ args.push("--disallowed-tools", params.disallowedTools.join(","));
1037
+ }
624
1038
  return {
625
1039
  corrId,
626
1040
  effectivePrompt,
@@ -631,6 +1045,78 @@ function prepareGeminiRequest(params) {
631
1045
  args,
632
1046
  };
633
1047
  }
1048
+ function prepareMistralRequest(params, runtime = resolveGatewayServerRuntime()) {
1049
+ const corrId = params.correlationId || randomUUID();
1050
+ const cliInfo = getCliInfo();
1051
+ const resolvedModel = resolveModelAlias("mistral", params.model, cliInfo) || "devstral-medium";
1052
+ const reviewIntegrity = checkReviewIntegrity({
1053
+ prompt: params.prompt,
1054
+ allowedTools: params.allowedTools,
1055
+ disallowedTools: params.disallowedTools,
1056
+ });
1057
+ if (reviewIntegrity.violations.length > 0) {
1058
+ runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
1059
+ cli: "mistral",
1060
+ operation: params.operation,
1061
+ score: reviewIntegrity.totalScore,
1062
+ });
1063
+ }
1064
+ let effectivePrompt = params.prompt;
1065
+ if (params.optimizePrompt) {
1066
+ const optimized = optimizePromptText(effectivePrompt);
1067
+ logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
1068
+ effectivePrompt = optimized;
1069
+ }
1070
+ const requestedMcpServers = normalizeMcpServers(params.mcpServers);
1071
+ let approvalDecision = null;
1072
+ if (params.approvalStrategy === "mcp_managed") {
1073
+ approvalDecision = runtime.approvalManager.decide({
1074
+ cli: "mistral",
1075
+ operation: params.operation,
1076
+ prompt: params.prompt,
1077
+ bypassRequested: params.permissionMode === "auto-approve",
1078
+ fullAuto: false,
1079
+ requestedMcpServers,
1080
+ allowedTools: params.allowedTools,
1081
+ disallowedTools: params.disallowedTools,
1082
+ policy: params.approvalPolicy,
1083
+ metadata: { model: resolvedModel, vibeActiveModelEnv: true },
1084
+ reviewIntegrity,
1085
+ });
1086
+ if (approvalDecision.status !== "approved") {
1087
+ return createApprovalDeniedResponse(params.operation, approvalDecision);
1088
+ }
1089
+ }
1090
+ // Under mcp_managed, force --agent auto-approve so the approval gate's
1091
+ // verdict carries through to the CLI invocation (mirrors Grok's --always-approve
1092
+ // forcing under mcp_managed).
1093
+ const effectivePermissionMode = params.approvalStrategy === "mcp_managed"
1094
+ ? "auto-approve"
1095
+ : (params.permissionMode ?? "auto-approve");
1096
+ const prep = buildMistralCliInvocation({
1097
+ prompt: effectivePrompt,
1098
+ resolvedModel,
1099
+ outputFormat: params.outputFormat,
1100
+ permissionMode: effectivePermissionMode,
1101
+ effort: params.effort,
1102
+ reasoningEffort: params.reasoningEffort,
1103
+ allowedTools: params.allowedTools,
1104
+ disallowedTools: params.disallowedTools,
1105
+ });
1106
+ if (prep.ignoredDisallowedTools) {
1107
+ runtime.logger.info(`[${corrId}] Mistral does not support disallowedTools; ignoring (caller passed ${params.disallowedTools?.length ?? 0} entries)`);
1108
+ }
1109
+ return {
1110
+ corrId,
1111
+ effectivePrompt,
1112
+ resolvedModel,
1113
+ requestedMcpServers,
1114
+ approvalDecision,
1115
+ reviewIntegrity,
1116
+ args: prep.args,
1117
+ mistralEnv: prep.env,
1118
+ };
1119
+ }
634
1120
  function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat) {
635
1121
  let finalStdout = stdout;
636
1122
  // Skip response optimization for JSON output to prevent corrupting structured data
@@ -682,7 +1168,26 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
682
1168
  }
683
1169
  return response;
684
1170
  }
1171
+ function resolveHandlerRuntime(deps) {
1172
+ if (deps.runtime)
1173
+ return deps.runtime;
1174
+ const asyncDeps = deps;
1175
+ // Older HandlerDeps callers may not provide `warn`; default-route to `info`.
1176
+ const depLogger = deps.logger;
1177
+ const normalizedLogger = {
1178
+ info: depLogger.info,
1179
+ warn: depLogger.warn ?? ((msg, ...rest) => depLogger.info(`[WARN] ${msg}`, ...rest)),
1180
+ error: depLogger.error,
1181
+ debug: depLogger.debug,
1182
+ };
1183
+ return resolveGatewayServerRuntime({
1184
+ sessionManager: deps.sessionManager,
1185
+ logger: normalizedLogger,
1186
+ asyncJobManager: asyncDeps.asyncJobManager,
1187
+ });
1188
+ }
685
1189
  export async function handleGeminiRequest(deps, params) {
1190
+ const runtime = resolveHandlerRuntime(deps);
686
1191
  const startTime = Date.now();
687
1192
  const prep = prepareGeminiRequest({
688
1193
  prompt: params.prompt,
@@ -696,7 +1201,12 @@ export async function handleGeminiRequest(deps, params) {
696
1201
  correlationId: params.correlationId,
697
1202
  optimizePrompt: params.optimizePrompt,
698
1203
  operation: "gemini_request",
699
- });
1204
+ outputFormat: params.outputFormat,
1205
+ sandbox: params.sandbox,
1206
+ policyFiles: params.policyFiles,
1207
+ adminPolicyFiles: params.adminPolicyFiles,
1208
+ attachments: params.attachments,
1209
+ }, runtime);
700
1210
  if (!("args" in prep))
701
1211
  return prep;
702
1212
  const { corrId, args } = prep;
@@ -708,20 +1218,24 @@ export async function handleGeminiRequest(deps, params) {
708
1218
  model: prep.resolvedModel || "default",
709
1219
  prompt: params.prompt,
710
1220
  sessionId: params.sessionId,
711
- });
1221
+ }, runtime);
712
1222
  deps.logger.info(`[${corrId}] gemini_request invoked with model=${prep.resolvedModel || "default"}, approvalMode=${params.approvalMode}, prompt length=${params.prompt.length}`);
713
1223
  try {
714
- // Session arg planning (pure, no I/O)
715
- const sessionResult = resolveSessionResumeArgs({
1224
+ // U27: Session arg planning. For fresh sessions, emit `--session-id <uuid>`
1225
+ // so the gateway and Gemini agree on the session identifier from turn 1.
1226
+ // For resume flows, fall back to `--resume <id>` (existing behavior).
1227
+ const sessionPlan = resolveGeminiSessionPlan({
716
1228
  sessionId: params.sessionId,
717
1229
  resumeLatest: params.resumeLatest,
718
1230
  createNewSession: params.createNewSession,
719
1231
  });
720
- args.push(...sessionResult.resumeArgs);
721
- const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs));
1232
+ args.push(...sessionPlan.args);
1233
+ const userProvidedSession = sessionPlan.resumed;
1234
+ const effectiveSessionIdHint = sessionPlan.emittedSessionId ?? params.sessionId;
1235
+ const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime);
722
1236
  // Deferred — job still running, return async reference
723
1237
  if (isDeferredResponse(result)) {
724
- return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
1238
+ return buildDeferredToolResponse(result, effectiveSessionIdHint);
725
1239
  }
726
1240
  const { stdout, stderr, code } = result;
727
1241
  durationMs = Math.max(0, Date.now() - startTime);
@@ -736,13 +1250,15 @@ export async function handleGeminiRequest(deps, params) {
736
1250
  exitCode: code,
737
1251
  errorMessage: stderr || `Exit code ${code}`,
738
1252
  status: "failed",
739
- });
1253
+ }, runtime);
740
1254
  return createErrorResponse("gemini", code, stderr, corrId);
741
1255
  }
742
1256
  wasSuccessful = true;
743
- // Post-success session I/O (sync handlers: no phantom sessions on CLI failure)
744
- let effectiveSessionId = sessionResult.effectiveSessionId;
745
- if (sessionResult.userProvidedSession && effectiveSessionId) {
1257
+ // U27 Post-success session I/O. Mirror the gateway store 1:1 to whatever
1258
+ // session id Gemini is using (either the user-supplied resume id or the
1259
+ // deterministic --session-id we emitted).
1260
+ let effectiveSessionId = effectiveSessionIdHint;
1261
+ if (effectiveSessionId) {
746
1262
  const existing = await deps.sessionManager.getSession(effectiveSessionId);
747
1263
  if (!existing) {
748
1264
  try {
@@ -756,12 +1272,9 @@ export async function handleGeminiRequest(deps, params) {
756
1272
  }
757
1273
  await deps.sessionManager.updateSessionUsage(effectiveSessionId);
758
1274
  }
759
- else if (!params.createNewSession && !effectiveSessionId) {
760
- const newSession = await deps.sessionManager.createSession("gemini", "Gemini Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
761
- effectiveSessionId = newSession.id;
762
- }
763
1275
  deps.logger.info(`[${corrId}] gemini_request completed successfully in ${durationMs}ms`);
764
- const response = buildCliResponse("gemini", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession);
1276
+ const response = buildCliResponse("gemini", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, userProvidedSession, params.outputFormat);
1277
+ const geminiUsage = extractUsageAndCost("gemini", stdout, params.outputFormat);
765
1278
  safeFlightComplete(corrId, {
766
1279
  response: stdout,
767
1280
  durationMs,
@@ -771,7 +1284,12 @@ export async function handleGeminiRequest(deps, params) {
771
1284
  optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
772
1285
  exitCode: 0,
773
1286
  status: "completed",
774
- });
1287
+ inputTokens: geminiUsage.inputTokens,
1288
+ outputTokens: geminiUsage.outputTokens,
1289
+ cacheReadTokens: geminiUsage.cacheReadTokens,
1290
+ cacheCreationTokens: geminiUsage.cacheCreationTokens,
1291
+ costUsd: geminiUsage.costUsd,
1292
+ }, runtime);
775
1293
  return response;
776
1294
  }
777
1295
  catch (error) {
@@ -786,15 +1304,16 @@ export async function handleGeminiRequest(deps, params) {
786
1304
  exitCode: 1,
787
1305
  errorMessage: error.message,
788
1306
  status: "failed",
789
- });
1307
+ }, runtime);
790
1308
  return createErrorResponse("gemini", 1, "", corrId, error);
791
1309
  }
792
1310
  finally {
793
1311
  const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
794
- performanceMetrics.recordRequest("gemini", finalizedDurationMs, wasSuccessful);
1312
+ runtime.performanceMetrics.recordRequest("gemini", finalizedDurationMs, wasSuccessful);
795
1313
  }
796
1314
  }
797
1315
  export async function handleGeminiRequestAsync(deps, params) {
1316
+ const runtime = resolveHandlerRuntime(deps);
798
1317
  const prep = prepareGeminiRequest({
799
1318
  prompt: params.prompt,
800
1319
  model: params.model,
@@ -807,21 +1326,26 @@ export async function handleGeminiRequestAsync(deps, params) {
807
1326
  correlationId: params.correlationId,
808
1327
  optimizePrompt: params.optimizePrompt,
809
1328
  operation: "gemini_request_async",
810
- });
1329
+ outputFormat: params.outputFormat,
1330
+ sandbox: params.sandbox,
1331
+ policyFiles: params.policyFiles,
1332
+ adminPolicyFiles: params.adminPolicyFiles,
1333
+ attachments: params.attachments,
1334
+ }, runtime);
811
1335
  if (!("args" in prep))
812
1336
  return prep;
813
1337
  const { corrId, args, requestedMcpServers, approvalDecision } = prep;
814
1338
  try {
815
- // Session arg planning (pure, no I/O)
816
- const sessionResult = resolveSessionResumeArgs({
1339
+ // U27: Session arg planning with deterministic --session-id for fresh sessions.
1340
+ const sessionPlan = resolveGeminiSessionPlan({
817
1341
  sessionId: params.sessionId,
818
1342
  resumeLatest: params.resumeLatest,
819
1343
  createNewSession: params.createNewSession,
820
1344
  });
821
- args.push(...sessionResult.resumeArgs);
1345
+ args.push(...sessionPlan.args);
822
1346
  // Pre-start session I/O (async handlers: prevent orphaned jobs)
823
- let effectiveSessionId = sessionResult.effectiveSessionId;
824
- if (sessionResult.userProvidedSession && effectiveSessionId) {
1347
+ let effectiveSessionId = sessionPlan.emittedSessionId ?? params.sessionId;
1348
+ if (effectiveSessionId) {
825
1349
  const existing = await deps.sessionManager.getSession(effectiveSessionId);
826
1350
  if (!existing) {
827
1351
  try {
@@ -835,18 +1359,16 @@ export async function handleGeminiRequestAsync(deps, params) {
835
1359
  }
836
1360
  await deps.sessionManager.updateSessionUsage(effectiveSessionId);
837
1361
  }
838
- else if (!params.createNewSession && !effectiveSessionId) {
839
- const newSession = await deps.sessionManager.createSession("gemini", "Gemini Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
840
- effectiveSessionId = newSession.id;
841
- }
842
- // Start job only after all session I/O succeeds
843
- const job = deps.asyncJobManager.startJob("gemini", args, corrId, undefined, resolveIdleTimeout("gemini", params.idleTimeoutMs));
1362
+ // Start job only after all session I/O succeeds. U23: forward outputFormat
1363
+ // so AsyncJobManager records it in the durable store (the manager also
1364
+ // surfaces it in the snapshot).
1365
+ const job = deps.asyncJobManager.startJob("gemini", args, corrId, undefined, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh);
844
1366
  deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
845
1367
  const asyncResponse = {
846
1368
  success: true,
847
1369
  job,
848
1370
  sessionId: effectiveSessionId || null,
849
- resumable: sessionResult.userProvidedSession,
1371
+ resumable: sessionPlan.resumed,
850
1372
  approval: approvalDecision,
851
1373
  mcpServers: { requested: requestedMcpServers },
852
1374
  };
@@ -866,140 +1388,26 @@ export async function handleGeminiRequestAsync(deps, params) {
866
1388
  return createErrorResponse("gemini_request_async", 1, "", corrId, error);
867
1389
  }
868
1390
  }
869
- export async function handleCodexRequestAsync(deps, params) {
870
- const prep = prepareCodexRequest({
1391
+ export async function handleGrokRequest(deps, params) {
1392
+ const runtime = resolveHandlerRuntime(deps);
1393
+ const startTime = Date.now();
1394
+ const prep = prepareGrokRequest({
871
1395
  prompt: params.prompt,
872
1396
  model: params.model,
873
- fullAuto: params.fullAuto,
874
- dangerouslyBypassApprovalsAndSandbox: params.dangerouslyBypassApprovalsAndSandbox,
1397
+ outputFormat: params.outputFormat,
1398
+ alwaysApprove: params.alwaysApprove,
1399
+ permissionMode: params.permissionMode,
1400
+ effort: params.effort,
1401
+ reasoningEffort: params.reasoningEffort,
1402
+ allowedTools: params.allowedTools,
1403
+ disallowedTools: params.disallowedTools,
875
1404
  approvalStrategy: params.approvalStrategy,
876
1405
  approvalPolicy: params.approvalPolicy,
877
1406
  mcpServers: params.mcpServers,
878
1407
  correlationId: params.correlationId,
879
1408
  optimizePrompt: params.optimizePrompt,
880
- operation: "codex_request_async",
881
- });
882
- if (!("args" in prep))
883
- return prep;
884
- const { corrId, args, requestedMcpServers, approvalDecision } = prep;
885
- try {
886
- // Pre-start session I/O (async handlers: prevent orphaned jobs)
887
- let effectiveSessionId = params.sessionId;
888
- if (!params.createNewSession && !params.sessionId) {
889
- const activeSession = await deps.sessionManager.getActiveSession("codex");
890
- if (activeSession) {
891
- effectiveSessionId = activeSession.id;
892
- }
893
- else {
894
- const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
895
- effectiveSessionId = newSession.id;
896
- }
897
- }
898
- else if (params.sessionId) {
899
- await deps.sessionManager.updateSessionUsage(params.sessionId);
900
- }
901
- else if (params.createNewSession) {
902
- const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
903
- effectiveSessionId = newSession.id;
904
- }
905
- // Start job only after all session I/O succeeds
906
- const job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs));
907
- deps.logger.info(`[${corrId}] codex_request_async started job ${job.id}`);
908
- const asyncResponse = {
909
- success: true,
910
- job,
911
- sessionId: effectiveSessionId || null,
912
- approval: approvalDecision,
913
- mcpServers: { requested: requestedMcpServers },
914
- };
915
- if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
916
- asyncResponse.reviewIntegrity = prep.reviewIntegrity;
917
- }
918
- return {
919
- content: [
920
- {
921
- type: "text",
922
- text: JSON.stringify(asyncResponse, null, 2),
923
- },
924
- ],
925
- };
926
- }
927
- catch (error) {
928
- return createErrorResponse("codex_request_async", 1, "", corrId, error);
929
- }
930
- }
931
- //──────────────────────────────────────────────────────────────────────────────
932
- // Claude Code Tool
933
- //──────────────────────────────────────────────────────────────────────────────
934
- server.tool("claude_request", {
935
- prompt: z
936
- .string()
937
- .min(1, "Prompt cannot be empty")
938
- .max(100000, "Prompt too long (max 100k chars)")
939
- .describe("Prompt text for Claude"),
940
- model: z
941
- .string()
942
- .optional()
943
- .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
944
- outputFormat: z
945
- .enum(["text", "json", "stream-json"])
946
- .default("text")
947
- .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
948
- sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
949
- continueSession: z.boolean().default(false).describe("Continue active session"),
950
- createNewSession: z.boolean().default(false).describe("Force new session"),
951
- allowedTools: z
952
- .array(z.string())
953
- .optional()
954
- .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
955
- disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
956
- dangerouslySkipPermissions: z
957
- .boolean()
958
- .default(false)
959
- .describe("Bypass permissions (sandbox only)"),
960
- approvalStrategy: z
961
- .enum(["legacy", "mcp_managed"])
962
- .default("legacy")
963
- .describe("Approval strategy"),
964
- approvalPolicy: z
965
- .enum(["strict", "balanced", "permissive"])
966
- .optional()
967
- .describe("Approval policy override"),
968
- mcpServers: z
969
- .array(MCP_SERVER_ENUM)
970
- .default(["sqry"])
971
- .describe("MCP servers exposed to Claude"),
972
- strictMcpConfig: z
973
- .boolean()
974
- .default(false)
975
- .describe("Restrict Claude to provided MCP config only"),
976
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
977
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
978
- optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
979
- idleTimeoutMs: z
980
- .number()
981
- .int()
982
- .min(30_000)
983
- .max(3_600_000)
984
- .optional()
985
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
986
- }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, }) => {
987
- const startTime = Date.now();
988
- const prep = prepareClaudeRequest({
989
- prompt,
990
- model,
991
- outputFormat,
992
- allowedTools,
993
- disallowedTools,
994
- dangerouslySkipPermissions,
995
- approvalStrategy,
996
- approvalPolicy,
997
- mcpServers,
998
- strictMcpConfig,
999
- correlationId,
1000
- optimizePrompt,
1001
- operation: "claude_request",
1002
- });
1409
+ operation: "grok_request",
1410
+ }, runtime);
1003
1411
  if (!("args" in prep))
1004
1412
  return prep;
1005
1413
  const { corrId, args } = prep;
@@ -1007,432 +1415,451 @@ server.tool("claude_request", {
1007
1415
  let wasSuccessful = false;
1008
1416
  safeFlightStart({
1009
1417
  correlationId: corrId,
1010
- cli: "claude",
1418
+ cli: "grok",
1011
1419
  model: prep.resolvedModel || "default",
1012
- prompt,
1013
- sessionId,
1014
- });
1015
- logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prompt.length}, sessionId=${sessionId}`);
1420
+ prompt: params.prompt,
1421
+ sessionId: params.sessionId,
1422
+ }, runtime);
1423
+ deps.logger.info(`[${corrId}] grok_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode}, prompt length=${params.prompt.length}`);
1016
1424
  try {
1017
- // Session management
1018
- let effectiveSessionId = sessionId;
1019
- let useContinue = continueSession;
1020
- const activeSession = await sessionManager.getActiveSession("claude");
1021
- if (!createNewSession && !continueSession && !sessionId && activeSession) {
1022
- effectiveSessionId = activeSession.id;
1023
- useContinue = true;
1024
- }
1025
- if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
1026
- useContinue = true;
1027
- }
1028
- if (useContinue) {
1029
- args.push("--continue");
1030
- }
1031
- else if (effectiveSessionId) {
1032
- args.push("--session-id", effectiveSessionId);
1033
- await sessionManager.updateSessionUsage(effectiveSessionId);
1034
- }
1035
- // Idle timeout only for stream-json (text/json produce no output until done)
1036
- const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
1037
- const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat);
1425
+ // Session arg planning (pure, no I/O)
1426
+ const sessionResult = resolveGrokSessionArgs({
1427
+ sessionId: params.sessionId,
1428
+ resumeLatest: params.resumeLatest,
1429
+ createNewSession: params.createNewSession,
1430
+ });
1431
+ args.push(...sessionResult.resumeArgs);
1432
+ const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime);
1038
1433
  // Deferred — job still running, return async reference
1039
1434
  if (isDeferredResponse(result)) {
1040
- return buildDeferredToolResponse(result, effectiveSessionId);
1435
+ return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
1041
1436
  }
1042
1437
  const { stdout, stderr, code } = result;
1043
1438
  durationMs = Math.max(0, Date.now() - startTime);
1044
1439
  if (code !== 0) {
1045
- logger.info(`[${corrId}] claude_request failed in ${durationMs}ms`);
1440
+ deps.logger.info(`[${corrId}] grok_request failed in ${durationMs}ms`);
1046
1441
  safeFlightComplete(corrId, {
1047
1442
  response: stderr || "",
1048
1443
  durationMs,
1049
1444
  retryCount: 0,
1050
1445
  circuitBreakerState: "closed",
1051
- optimizationApplied: optimizePrompt || optimizeResponse,
1446
+ optimizationApplied: false,
1052
1447
  exitCode: code,
1053
1448
  errorMessage: stderr || `Exit code ${code}`,
1054
1449
  status: "failed",
1055
- });
1056
- return createErrorResponse("claude", code, stderr, corrId);
1450
+ }, runtime);
1451
+ return createErrorResponse("grok", code, stderr, corrId);
1057
1452
  }
1058
1453
  wasSuccessful = true;
1059
- // If we used a session ID and it's not tracked yet, create a session record
1060
- if (effectiveSessionId) {
1061
- const existingSession = await sessionManager.getSession(effectiveSessionId);
1062
- if (!existingSession) {
1063
- await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
1454
+ // Post-success session I/O (sync handlers: no phantom sessions on CLI failure)
1455
+ let effectiveSessionId = sessionResult.effectiveSessionId;
1456
+ if (sessionResult.userProvidedSession && effectiveSessionId) {
1457
+ const existing = await deps.sessionManager.getSession(effectiveSessionId);
1458
+ if (!existing) {
1459
+ try {
1460
+ await deps.sessionManager.createSession("grok", "Grok Session", effectiveSessionId);
1461
+ }
1462
+ catch {
1463
+ const rechecked = await deps.sessionManager.getSession(effectiveSessionId);
1464
+ if (!rechecked)
1465
+ throw new Error(`Failed to create or find session ${effectiveSessionId}`);
1466
+ }
1064
1467
  }
1468
+ await deps.sessionManager.updateSessionUsage(effectiveSessionId);
1065
1469
  }
1066
- logger.info(`[${corrId}] claude_request completed successfully in ${durationMs}ms`);
1067
- // Parse stream-json NDJSON output to extract result text
1068
- if (outputFormat === "stream-json") {
1069
- const parsed = parseStreamJson(stdout);
1070
- if (parsed.costUsd !== null) {
1071
- logger.debug(`[${corrId}] stream-json cost=$${parsed.costUsd}, model=${parsed.model}, turns=${parsed.numTurns}`);
1072
- }
1073
- safeFlightComplete(corrId, {
1074
- response: parsed.text,
1075
- inputTokens: parsed.usage?.inputTokens,
1076
- outputTokens: parsed.usage?.outputTokens,
1077
- durationMs,
1078
- retryCount: 0,
1079
- circuitBreakerState: "closed",
1080
- costUsd: parsed.costUsd ?? undefined,
1081
- optimizationApplied: optimizePrompt || optimizeResponse,
1082
- exitCode: 0,
1083
- status: "completed",
1084
- });
1085
- return buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
1470
+ else if (!params.createNewSession && !effectiveSessionId) {
1471
+ const newSession = await deps.sessionManager.createSession("grok", "Grok Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
1472
+ effectiveSessionId = newSession.id;
1086
1473
  }
1474
+ deps.logger.info(`[${corrId}] grok_request completed successfully in ${durationMs}ms`);
1475
+ const response = buildCliResponse("grok", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession, params.outputFormat);
1087
1476
  safeFlightComplete(corrId, {
1088
1477
  response: stdout,
1089
1478
  durationMs,
1090
1479
  retryCount: 0,
1091
1480
  circuitBreakerState: "closed",
1092
- optimizationApplied: optimizePrompt || optimizeResponse,
1481
+ approvalDecision: prep.approvalDecision?.status,
1482
+ optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
1093
1483
  exitCode: 0,
1094
1484
  status: "completed",
1095
- });
1096
- return buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
1485
+ }, runtime);
1486
+ return response;
1097
1487
  }
1098
1488
  catch (error) {
1099
1489
  const elapsedMs = Math.max(0, Date.now() - startTime);
1100
- logger.info(`[${corrId}] claude_request threw exception after ${elapsedMs}ms`);
1490
+ deps.logger.info(`[${corrId}] grok_request threw exception after ${elapsedMs}ms`);
1101
1491
  safeFlightComplete(corrId, {
1102
1492
  response: "",
1103
1493
  durationMs: elapsedMs,
1104
1494
  retryCount: 0,
1105
1495
  circuitBreakerState: "closed",
1106
- optimizationApplied: optimizePrompt || optimizeResponse,
1496
+ optimizationApplied: false,
1107
1497
  exitCode: 1,
1108
1498
  errorMessage: error.message,
1109
1499
  status: "failed",
1110
- });
1111
- return createErrorResponse("claude", 1, "", corrId, error);
1500
+ }, runtime);
1501
+ return createErrorResponse("grok", 1, "", corrId, error);
1112
1502
  }
1113
1503
  finally {
1114
1504
  const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
1115
- performanceMetrics.recordRequest("claude", finalizedDurationMs, wasSuccessful);
1505
+ runtime.performanceMetrics.recordRequest("grok", finalizedDurationMs, wasSuccessful);
1116
1506
  }
1117
- });
1118
- //──────────────────────────────────────────────────────────────────────────────
1119
- // Codex Tool
1120
- //──────────────────────────────────────────────────────────────────────────────
1121
- server.tool("codex_request", {
1122
- prompt: z
1123
- .string()
1124
- .min(1, "Prompt cannot be empty")
1125
- .max(100000, "Prompt too long (max 100k chars)")
1126
- .describe("Prompt text for Codex"),
1127
- model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
1128
- fullAuto: z.boolean().default(false).describe("Full-auto mode (sandboxed execution)"),
1129
- dangerouslyBypassApprovalsAndSandbox: z
1130
- .boolean()
1131
- .default(false)
1132
- .describe("Run Codex without approvals/sandbox"),
1133
- approvalStrategy: z
1134
- .enum(["legacy", "mcp_managed"])
1135
- .default("legacy")
1136
- .describe("Approval strategy"),
1137
- approvalPolicy: z
1138
- .enum(["strict", "balanced", "permissive"])
1139
- .optional()
1140
- .describe("Approval policy override"),
1141
- mcpServers: z
1142
- .array(MCP_SERVER_ENUM)
1143
- .default(["sqry"])
1144
- .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
1145
- sessionId: z.string().optional().describe("Session ID (Codex manages internally)"),
1146
- createNewSession: z.boolean().default(false).describe("Force new session"),
1147
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1148
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1149
- optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
1150
- idleTimeoutMs: z
1151
- .number()
1152
- .int()
1153
- .min(30_000)
1154
- .max(3_600_000)
1155
- .optional()
1156
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1157
- }, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, }) => {
1507
+ }
1508
+ export async function handleGrokRequestAsync(deps, params) {
1509
+ const runtime = resolveHandlerRuntime(deps);
1510
+ const prep = prepareGrokRequest({
1511
+ prompt: params.prompt,
1512
+ model: params.model,
1513
+ outputFormat: params.outputFormat,
1514
+ alwaysApprove: params.alwaysApprove,
1515
+ permissionMode: params.permissionMode,
1516
+ effort: params.effort,
1517
+ reasoningEffort: params.reasoningEffort,
1518
+ allowedTools: params.allowedTools,
1519
+ disallowedTools: params.disallowedTools,
1520
+ approvalStrategy: params.approvalStrategy,
1521
+ approvalPolicy: params.approvalPolicy,
1522
+ mcpServers: params.mcpServers,
1523
+ correlationId: params.correlationId,
1524
+ optimizePrompt: params.optimizePrompt,
1525
+ operation: "grok_request_async",
1526
+ }, runtime);
1527
+ if (!("args" in prep))
1528
+ return prep;
1529
+ const { corrId, args, requestedMcpServers, approvalDecision } = prep;
1530
+ try {
1531
+ // Session arg planning (pure, no I/O)
1532
+ const sessionResult = resolveGrokSessionArgs({
1533
+ sessionId: params.sessionId,
1534
+ resumeLatest: params.resumeLatest,
1535
+ createNewSession: params.createNewSession,
1536
+ });
1537
+ args.push(...sessionResult.resumeArgs);
1538
+ // Pre-start session I/O (async handlers: prevent orphaned jobs)
1539
+ let effectiveSessionId = sessionResult.effectiveSessionId;
1540
+ if (sessionResult.userProvidedSession && effectiveSessionId) {
1541
+ const existing = await deps.sessionManager.getSession(effectiveSessionId);
1542
+ if (!existing) {
1543
+ try {
1544
+ await deps.sessionManager.createSession("grok", "Grok Session", effectiveSessionId);
1545
+ }
1546
+ catch {
1547
+ const rechecked = await deps.sessionManager.getSession(effectiveSessionId);
1548
+ if (!rechecked)
1549
+ throw new Error(`Failed to create or find session ${effectiveSessionId}`);
1550
+ }
1551
+ }
1552
+ await deps.sessionManager.updateSessionUsage(effectiveSessionId);
1553
+ }
1554
+ else if (!params.createNewSession && !effectiveSessionId) {
1555
+ const newSession = await deps.sessionManager.createSession("grok", "Grok Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
1556
+ effectiveSessionId = newSession.id;
1557
+ }
1558
+ // Start job only after all session I/O succeeds
1559
+ const job = deps.asyncJobManager.startJob("grok", args, corrId, undefined, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh);
1560
+ deps.logger.info(`[${corrId}] grok_request_async started job ${job.id}`);
1561
+ const asyncResponse = {
1562
+ success: true,
1563
+ job,
1564
+ sessionId: effectiveSessionId || null,
1565
+ resumable: sessionResult.userProvidedSession,
1566
+ approval: approvalDecision,
1567
+ mcpServers: { requested: requestedMcpServers },
1568
+ };
1569
+ if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
1570
+ asyncResponse.reviewIntegrity = prep.reviewIntegrity;
1571
+ }
1572
+ return {
1573
+ content: [
1574
+ {
1575
+ type: "text",
1576
+ text: JSON.stringify(asyncResponse, null, 2),
1577
+ },
1578
+ ],
1579
+ };
1580
+ }
1581
+ catch (error) {
1582
+ return createErrorResponse("grok_request_async", 1, "", corrId, error);
1583
+ }
1584
+ }
1585
+ export async function handleMistralRequest(deps, params) {
1586
+ const runtime = resolveHandlerRuntime(deps);
1158
1587
  const startTime = Date.now();
1159
- const prep = prepareCodexRequest({
1160
- prompt,
1161
- model,
1162
- fullAuto,
1163
- dangerouslyBypassApprovalsAndSandbox,
1164
- approvalStrategy,
1165
- approvalPolicy,
1166
- mcpServers,
1167
- correlationId,
1168
- optimizePrompt,
1169
- operation: "codex_request",
1170
- });
1588
+ const prep = prepareMistralRequest({
1589
+ prompt: params.prompt,
1590
+ model: params.model,
1591
+ outputFormat: params.outputFormat,
1592
+ permissionMode: params.permissionMode,
1593
+ effort: params.effort,
1594
+ reasoningEffort: params.reasoningEffort,
1595
+ allowedTools: params.allowedTools,
1596
+ disallowedTools: params.disallowedTools,
1597
+ approvalStrategy: params.approvalStrategy,
1598
+ approvalPolicy: params.approvalPolicy,
1599
+ mcpServers: params.mcpServers,
1600
+ correlationId: params.correlationId,
1601
+ optimizePrompt: params.optimizePrompt,
1602
+ operation: "mistral_request",
1603
+ }, runtime);
1171
1604
  if (!("args" in prep))
1172
1605
  return prep;
1173
- const { corrId, args } = prep;
1606
+ const { corrId, args, mistralEnv } = prep;
1174
1607
  let durationMs = 0;
1175
1608
  let wasSuccessful = false;
1176
1609
  safeFlightStart({
1177
1610
  correlationId: corrId,
1178
- cli: "codex",
1611
+ cli: "mistral",
1179
1612
  model: prep.resolvedModel || "default",
1180
- prompt,
1181
- sessionId,
1182
- });
1183
- logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prompt.length}`);
1613
+ prompt: params.prompt,
1614
+ sessionId: params.sessionId,
1615
+ }, runtime);
1616
+ deps.logger.info(`[${corrId}] mistral_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode || "auto-approve"}, prompt length=${params.prompt.length}`);
1184
1617
  try {
1185
- const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs));
1186
- // Deferred — job still running, return async reference
1618
+ const sessionResult = resolveMistralSessionArgs({
1619
+ sessionId: params.sessionId,
1620
+ resumeLatest: params.resumeLatest,
1621
+ createNewSession: params.createNewSession,
1622
+ });
1623
+ args.push(...sessionResult.resumeArgs);
1624
+ const result = await awaitJobOrDefer("mistral", args, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, mistralEnv);
1187
1625
  if (isDeferredResponse(result)) {
1188
- return buildDeferredToolResponse(result, sessionId);
1626
+ return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
1189
1627
  }
1190
1628
  const { stdout, stderr, code } = result;
1191
1629
  durationMs = Math.max(0, Date.now() - startTime);
1192
1630
  if (code !== 0) {
1193
- logger.info(`[${corrId}] codex_request failed in ${durationMs}ms`);
1631
+ deps.logger.info(`[${corrId}] mistral_request failed in ${durationMs}ms`);
1194
1632
  safeFlightComplete(corrId, {
1195
1633
  response: stderr || "",
1196
1634
  durationMs,
1197
1635
  retryCount: 0,
1198
1636
  circuitBreakerState: "closed",
1199
- optimizationApplied: optimizePrompt || optimizeResponse,
1637
+ optimizationApplied: false,
1200
1638
  exitCode: code,
1201
1639
  errorMessage: stderr || `Exit code ${code}`,
1202
1640
  status: "failed",
1203
- });
1204
- return createErrorResponse("codex", code, stderr, corrId);
1641
+ }, runtime);
1642
+ return createErrorResponse("mistral", code, stderr, corrId);
1205
1643
  }
1206
1644
  wasSuccessful = true;
1207
- // Track session usage
1208
- let effectiveSessionId = sessionId;
1209
- if (!createNewSession && !sessionId) {
1210
- const activeSession = await sessionManager.getActiveSession("codex");
1211
- if (activeSession) {
1212
- effectiveSessionId = activeSession.id;
1213
- }
1214
- else {
1215
- const newSession = await sessionManager.createSession("codex", "Codex Session");
1216
- effectiveSessionId = newSession.id;
1645
+ let effectiveSessionId = sessionResult.effectiveSessionId;
1646
+ if (sessionResult.userProvidedSession && effectiveSessionId) {
1647
+ const existing = await deps.sessionManager.getSession(effectiveSessionId);
1648
+ if (!existing) {
1649
+ try {
1650
+ await deps.sessionManager.createSession("mistral", "Mistral Session", effectiveSessionId);
1651
+ }
1652
+ catch {
1653
+ const rechecked = await deps.sessionManager.getSession(effectiveSessionId);
1654
+ if (!rechecked)
1655
+ throw new Error(`Failed to create or find session ${effectiveSessionId}`);
1656
+ }
1217
1657
  }
1658
+ await deps.sessionManager.updateSessionUsage(effectiveSessionId);
1218
1659
  }
1219
- else if (sessionId) {
1220
- await sessionManager.updateSessionUsage(sessionId);
1221
- }
1222
- else if (createNewSession) {
1223
- const newSession = await sessionManager.createSession("codex", "Codex Session");
1660
+ else if (!params.createNewSession && !effectiveSessionId) {
1661
+ const newSession = await deps.sessionManager.createSession("mistral", "Mistral Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
1224
1662
  effectiveSessionId = newSession.id;
1225
1663
  }
1226
- logger.info(`[${corrId}] codex_request completed successfully in ${durationMs}ms`);
1664
+ deps.logger.info(`[${corrId}] mistral_request completed successfully in ${durationMs}ms`);
1665
+ const response = buildCliResponse("mistral", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession, params.outputFormat);
1227
1666
  safeFlightComplete(corrId, {
1228
1667
  response: stdout,
1229
1668
  durationMs,
1230
1669
  retryCount: 0,
1231
1670
  circuitBreakerState: "closed",
1232
- optimizationApplied: optimizePrompt || optimizeResponse,
1671
+ approvalDecision: prep.approvalDecision?.status,
1672
+ optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
1233
1673
  exitCode: 0,
1234
1674
  status: "completed",
1235
- });
1236
- return buildCliResponse("codex", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs);
1675
+ }, runtime);
1676
+ return response;
1237
1677
  }
1238
1678
  catch (error) {
1239
1679
  const elapsedMs = Math.max(0, Date.now() - startTime);
1240
- logger.info(`[${corrId}] codex_request threw exception after ${elapsedMs}ms`);
1680
+ deps.logger.info(`[${corrId}] mistral_request threw exception after ${elapsedMs}ms`);
1241
1681
  safeFlightComplete(corrId, {
1242
1682
  response: "",
1243
1683
  durationMs: elapsedMs,
1244
1684
  retryCount: 0,
1245
1685
  circuitBreakerState: "closed",
1246
- optimizationApplied: optimizePrompt || optimizeResponse,
1686
+ optimizationApplied: false,
1247
1687
  exitCode: 1,
1248
1688
  errorMessage: error.message,
1249
1689
  status: "failed",
1250
- });
1251
- return createErrorResponse("codex", 1, "", corrId, error);
1690
+ }, runtime);
1691
+ return createErrorResponse("mistral", 1, "", corrId, error);
1252
1692
  }
1253
1693
  finally {
1254
1694
  const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
1255
- performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
1695
+ runtime.performanceMetrics.recordRequest("mistral", finalizedDurationMs, wasSuccessful);
1256
1696
  }
1257
- });
1258
- //──────────────────────────────────────────────────────────────────────────────
1259
- // Gemini Tool
1260
- //──────────────────────────────────────────────────────────────────────────────
1261
- server.tool("gemini_request", {
1262
- prompt: z
1263
- .string()
1264
- .min(1, "Prompt cannot be empty")
1265
- .max(100000, "Prompt too long (max 100k chars)")
1266
- .describe("Prompt text for Gemini"),
1267
- model: z
1268
- .string()
1269
- .optional()
1270
- .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
1271
- sessionId: z.string().optional().describe("Session ID or 'latest'"),
1272
- resumeLatest: z.boolean().default(false).describe("Resume latest session"),
1273
- createNewSession: z.boolean().default(false).describe("Force new session"),
1274
- approvalMode: z
1275
- .enum(["default", "auto_edit", "yolo"])
1276
- .optional()
1277
- .describe("Approval: default|auto_edit|yolo"),
1278
- approvalStrategy: z
1279
- .enum(["legacy", "mcp_managed"])
1280
- .default("legacy")
1281
- .describe("Approval strategy"),
1282
- approvalPolicy: z
1283
- .enum(["strict", "balanced", "permissive"])
1284
- .optional()
1285
- .describe("Approval policy override"),
1286
- mcpServers: z
1287
- .array(MCP_SERVER_ENUM)
1288
- .default(["sqry"])
1289
- .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
1290
- allowedTools: z
1291
- .array(z.string())
1292
- .optional()
1293
- .describe("Allowed tools (['Write','Edit','Bash'])"),
1294
- includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
1295
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1296
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1297
- optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
1298
- idleTimeoutMs: z
1299
- .number()
1300
- .int()
1301
- .min(30_000)
1302
- .max(3_600_000)
1303
- .optional()
1304
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1305
- }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, }) => {
1306
- return handleGeminiRequest({ sessionManager, logger }, {
1307
- prompt,
1308
- model,
1309
- sessionId,
1310
- resumeLatest,
1311
- createNewSession,
1312
- approvalMode,
1313
- approvalStrategy,
1314
- approvalPolicy,
1315
- mcpServers,
1316
- allowedTools,
1317
- includeDirs,
1318
- correlationId,
1319
- optimizePrompt,
1320
- optimizeResponse,
1321
- idleTimeoutMs,
1322
- });
1323
- });
1324
- //──────────────────────────────────────────────────────────────────────────────
1325
- // Async Long-Running Job Tools (No Time-Bound LLM Execution)
1326
- //──────────────────────────────────────────────────────────────────────────────
1327
- server.tool("claude_request_async", {
1328
- prompt: z
1329
- .string()
1330
- .min(1, "Prompt cannot be empty")
1331
- .max(100000, "Prompt too long (max 100k chars)")
1332
- .describe("Prompt text for Claude"),
1333
- model: z
1334
- .string()
1335
- .optional()
1336
- .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
1337
- outputFormat: z
1338
- .enum(["text", "json", "stream-json"])
1339
- .default("text")
1340
- .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
1341
- sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
1342
- continueSession: z.boolean().default(false).describe("Continue active session"),
1343
- createNewSession: z.boolean().default(false).describe("Force new session"),
1344
- allowedTools: z
1345
- .array(z.string())
1346
- .optional()
1347
- .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
1348
- disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
1349
- dangerouslySkipPermissions: z
1350
- .boolean()
1351
- .default(false)
1352
- .describe("Bypass permissions (sandbox only)"),
1353
- approvalStrategy: z
1354
- .enum(["legacy", "mcp_managed"])
1355
- .default("legacy")
1356
- .describe("Approval strategy"),
1357
- approvalPolicy: z
1358
- .enum(["strict", "balanced", "permissive"])
1359
- .optional()
1360
- .describe("Approval policy override"),
1361
- mcpServers: z
1362
- .array(MCP_SERVER_ENUM)
1363
- .default(["sqry"])
1364
- .describe("MCP servers exposed to Claude"),
1365
- strictMcpConfig: z
1366
- .boolean()
1367
- .default(false)
1368
- .describe("Restrict Claude to provided MCP config only"),
1369
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1370
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1371
- idleTimeoutMs: z
1372
- .number()
1373
- .int()
1374
- .min(30_000)
1375
- .max(3_600_000)
1376
- .optional()
1377
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1378
- }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, }) => {
1379
- const prep = prepareClaudeRequest({
1380
- prompt,
1381
- model,
1382
- outputFormat,
1383
- allowedTools,
1384
- disallowedTools,
1385
- dangerouslySkipPermissions,
1386
- approvalStrategy,
1387
- approvalPolicy,
1388
- mcpServers,
1389
- strictMcpConfig,
1390
- correlationId,
1391
- optimizePrompt,
1392
- operation: "claude_request_async",
1393
- });
1697
+ }
1698
+ export async function handleMistralRequestAsync(deps, params) {
1699
+ const runtime = resolveHandlerRuntime(deps);
1700
+ const prep = prepareMistralRequest({
1701
+ prompt: params.prompt,
1702
+ model: params.model,
1703
+ outputFormat: params.outputFormat,
1704
+ permissionMode: params.permissionMode,
1705
+ effort: params.effort,
1706
+ reasoningEffort: params.reasoningEffort,
1707
+ allowedTools: params.allowedTools,
1708
+ disallowedTools: params.disallowedTools,
1709
+ approvalStrategy: params.approvalStrategy,
1710
+ approvalPolicy: params.approvalPolicy,
1711
+ mcpServers: params.mcpServers,
1712
+ correlationId: params.correlationId,
1713
+ optimizePrompt: params.optimizePrompt,
1714
+ operation: "mistral_request_async",
1715
+ }, runtime);
1394
1716
  if (!("args" in prep))
1395
1717
  return prep;
1396
- const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
1718
+ const { corrId, args, requestedMcpServers, approvalDecision, mistralEnv } = prep;
1397
1719
  try {
1398
- // Session management (before job start for async)
1399
- let effectiveSessionId = sessionId;
1400
- let useContinue = continueSession;
1401
- const activeSession = await sessionManager.getActiveSession("claude");
1402
- if (!createNewSession && !continueSession && !sessionId && activeSession) {
1403
- effectiveSessionId = activeSession.id;
1404
- useContinue = true;
1720
+ const sessionResult = resolveMistralSessionArgs({
1721
+ sessionId: params.sessionId,
1722
+ resumeLatest: params.resumeLatest,
1723
+ createNewSession: params.createNewSession,
1724
+ });
1725
+ args.push(...sessionResult.resumeArgs);
1726
+ let effectiveSessionId = sessionResult.effectiveSessionId;
1727
+ if (sessionResult.userProvidedSession && effectiveSessionId) {
1728
+ const existing = await deps.sessionManager.getSession(effectiveSessionId);
1729
+ if (!existing) {
1730
+ try {
1731
+ await deps.sessionManager.createSession("mistral", "Mistral Session", effectiveSessionId);
1732
+ }
1733
+ catch {
1734
+ const rechecked = await deps.sessionManager.getSession(effectiveSessionId);
1735
+ if (!rechecked)
1736
+ throw new Error(`Failed to create or find session ${effectiveSessionId}`);
1737
+ }
1738
+ }
1739
+ await deps.sessionManager.updateSessionUsage(effectiveSessionId);
1740
+ }
1741
+ else if (!params.createNewSession && !effectiveSessionId) {
1742
+ const newSession = await deps.sessionManager.createSession("mistral", "Mistral Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
1743
+ effectiveSessionId = newSession.id;
1405
1744
  }
1406
- if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
1407
- useContinue = true;
1745
+ const job = deps.asyncJobManager.startJob("mistral", args, corrId, undefined, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, mistralEnv);
1746
+ deps.logger.info(`[${corrId}] mistral_request_async started job ${job.id}`);
1747
+ const asyncResponse = {
1748
+ success: true,
1749
+ job,
1750
+ sessionId: effectiveSessionId || null,
1751
+ resumable: sessionResult.userProvidedSession,
1752
+ approval: approvalDecision,
1753
+ mcpServers: { requested: requestedMcpServers },
1754
+ };
1755
+ if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
1756
+ asyncResponse.reviewIntegrity = prep.reviewIntegrity;
1408
1757
  }
1409
- if (useContinue) {
1410
- args.push("--continue");
1758
+ return {
1759
+ content: [
1760
+ {
1761
+ type: "text",
1762
+ text: JSON.stringify(asyncResponse, null, 2),
1763
+ },
1764
+ ],
1765
+ };
1766
+ }
1767
+ catch (error) {
1768
+ return createErrorResponse("mistral_request_async", 1, "", corrId, error);
1769
+ }
1770
+ }
1771
+ export async function handleCodexRequestAsync(deps, params) {
1772
+ const runtime = resolveHandlerRuntime(deps);
1773
+ const prep = prepareCodexRequest({
1774
+ prompt: params.prompt,
1775
+ model: params.model,
1776
+ fullAuto: params.fullAuto,
1777
+ sandboxMode: params.sandboxMode,
1778
+ askForApproval: params.askForApproval,
1779
+ useLegacyFullAutoFlag: params.useLegacyFullAutoFlag,
1780
+ dangerouslyBypassApprovalsAndSandbox: params.dangerouslyBypassApprovalsAndSandbox,
1781
+ approvalStrategy: params.approvalStrategy,
1782
+ approvalPolicy: params.approvalPolicy,
1783
+ mcpServers: params.mcpServers,
1784
+ sessionId: params.sessionId,
1785
+ resumeLatest: params.resumeLatest,
1786
+ createNewSession: params.createNewSession,
1787
+ correlationId: params.correlationId,
1788
+ optimizePrompt: params.optimizePrompt,
1789
+ operation: "codex_request_async",
1790
+ outputFormat: params.outputFormat,
1791
+ outputSchema: params.outputSchema,
1792
+ search: params.search,
1793
+ profile: params.profile,
1794
+ configOverrides: params.configOverrides,
1795
+ ephemeral: params.ephemeral,
1796
+ images: params.images,
1797
+ ignoreUserConfig: params.ignoreUserConfig,
1798
+ ignoreRules: params.ignoreRules,
1799
+ }, runtime);
1800
+ if (!("args" in prep))
1801
+ return prep;
1802
+ const { corrId, args, requestedMcpServers, approvalDecision } = prep;
1803
+ // U26 fix: outputSchema temp-file ownership. The cleanup callable lives in
1804
+ // exactly one place at a time: this scope until startJob succeeds, then
1805
+ // AsyncJobManager (via onComplete → persistComplete → fireOnComplete) once
1806
+ // the job is registered. Any code path that fails to hand it off MUST run
1807
+ // it locally.
1808
+ const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
1809
+ let prepCleanupOwnedHere = prepCleanup !== undefined;
1810
+ const runPrepCleanupLocally = () => {
1811
+ if (!prepCleanupOwnedHere || !prepCleanup)
1812
+ return;
1813
+ prepCleanupOwnedHere = false;
1814
+ try {
1815
+ prepCleanup();
1411
1816
  }
1412
- else if (effectiveSessionId) {
1413
- args.push("--session-id", effectiveSessionId);
1414
- await sessionManager.updateSessionUsage(effectiveSessionId);
1817
+ catch (err) {
1818
+ deps.logger.error(`[${corrId}] codex_request_async outputSchema cleanup threw`, err);
1415
1819
  }
1416
- if (effectiveSessionId) {
1417
- const existingSession = await sessionManager.getSession(effectiveSessionId);
1418
- if (!existingSession) {
1419
- await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
1820
+ };
1821
+ try {
1822
+ // Pre-start session I/O (async handlers: prevent orphaned jobs)
1823
+ let effectiveSessionId = params.sessionId;
1824
+ if (!params.createNewSession && !params.sessionId) {
1825
+ const activeSession = await deps.sessionManager.getActiveSession("codex");
1826
+ if (activeSession) {
1827
+ effectiveSessionId = activeSession.id;
1828
+ }
1829
+ else {
1830
+ const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
1831
+ effectiveSessionId = newSession.id;
1420
1832
  }
1421
1833
  }
1422
- // Idle timeout only for stream-json (text/json produce no output until done)
1423
- const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
1424
- const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat);
1425
- logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
1834
+ else if (params.sessionId) {
1835
+ await deps.sessionManager.updateSessionUsage(params.sessionId);
1836
+ }
1837
+ else if (params.createNewSession) {
1838
+ const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
1839
+ effectiveSessionId = newSession.id;
1840
+ }
1841
+ // Start job only after all session I/O succeeds. If startJob throws before
1842
+ // registering the record, ownership stays here and we run it in the catch.
1843
+ let job;
1844
+ try {
1845
+ job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup);
1846
+ // Handoff succeeded: AsyncJobManager will fire prepCleanup on terminal
1847
+ // status. Release our local ownership claim so the catch path doesn't
1848
+ // double-fire.
1849
+ prepCleanupOwnedHere = false;
1850
+ }
1851
+ catch (startErr) {
1852
+ // startJob never stored the record → manager won't call onComplete. We
1853
+ // still own the cleanup; let the outer catch run it.
1854
+ throw startErr;
1855
+ }
1856
+ deps.logger.info(`[${corrId}] codex_request_async started job ${job.id}`);
1426
1857
  const asyncResponse = {
1427
1858
  success: true,
1428
1859
  job,
1429
- sessionId: effectiveSessionId || activeSession?.id || null,
1860
+ sessionId: effectiveSessionId || null,
1430
1861
  approval: approvalDecision,
1431
- mcpServers: {
1432
- requested: requestedMcpServers,
1433
- enabled: mcpConfig?.enabled,
1434
- missing: mcpConfig?.missing,
1435
- },
1862
+ mcpServers: { requested: requestedMcpServers },
1436
1863
  };
1437
1864
  if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
1438
1865
  asyncResponse.reviewIntegrity = prep.reviewIntegrity;
@@ -1447,504 +1874,1873 @@ server.tool("claude_request_async", {
1447
1874
  };
1448
1875
  }
1449
1876
  catch (error) {
1450
- return createErrorResponse("claude_request_async", 1, "", corrId, error);
1451
- }
1452
- });
1453
- server.tool("codex_request_async", {
1454
- prompt: z
1455
- .string()
1456
- .min(1, "Prompt cannot be empty")
1457
- .max(100000, "Prompt too long (max 100k chars)")
1458
- .describe("Prompt text for Codex"),
1459
- model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
1460
- fullAuto: z.boolean().default(false).describe("Full-auto mode (sandboxed execution)"),
1461
- dangerouslyBypassApprovalsAndSandbox: z
1462
- .boolean()
1463
- .default(false)
1464
- .describe("Run Codex without approvals/sandbox"),
1465
- approvalStrategy: z
1466
- .enum(["legacy", "mcp_managed"])
1467
- .default("legacy")
1468
- .describe("Approval strategy"),
1469
- approvalPolicy: z
1470
- .enum(["strict", "balanced", "permissive"])
1471
- .optional()
1472
- .describe("Approval policy override"),
1473
- mcpServers: z
1474
- .array(MCP_SERVER_ENUM)
1475
- .default(["sqry"])
1476
- .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
1477
- sessionId: z.string().optional().describe("Session ID (Codex manages internally)"),
1478
- createNewSession: z.boolean().default(false).describe("Force new session"),
1479
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1480
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1481
- idleTimeoutMs: z
1482
- .number()
1483
- .int()
1484
- .min(30_000)
1485
- .max(3_600_000)
1486
- .optional()
1487
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1488
- }, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, }) => {
1489
- return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger }, {
1490
- prompt,
1491
- model,
1492
- fullAuto,
1493
- dangerouslyBypassApprovalsAndSandbox,
1494
- approvalStrategy,
1495
- approvalPolicy,
1496
- mcpServers,
1497
- sessionId,
1498
- createNewSession,
1499
- correlationId,
1500
- optimizePrompt,
1501
- idleTimeoutMs,
1877
+ // Pre-start failure: either session I/O threw, or startJob threw before
1878
+ // registering the record. In either case the manager will NOT fire
1879
+ // prepCleanup, so we must run it here.
1880
+ runPrepCleanupLocally();
1881
+ return createErrorResponse("codex_request_async", 1, "", corrId, error);
1882
+ }
1883
+ }
1884
+ //──────────────────────────────────────────────────────────────────────────────
1885
+ // Claude Code Tool
1886
+ //──────────────────────────────────────────────────────────────────────────────
1887
+ export function createGatewayServer(deps = {}) {
1888
+ const runtime = resolveGatewayServerRuntime(deps, { isolateState: true });
1889
+ const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger } = runtime;
1890
+ const server = newGatewayMcpServer();
1891
+ registerBaseResources(server, runtime);
1892
+ registerValidationTools(server, { asyncJobManager });
1893
+ server.tool("claude_request", {
1894
+ prompt: z
1895
+ .string()
1896
+ .min(1, "Prompt cannot be empty")
1897
+ .max(100000, "Prompt too long (max 100k chars)")
1898
+ .describe("Prompt text for Claude"),
1899
+ model: z
1900
+ .string()
1901
+ .optional()
1902
+ .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
1903
+ outputFormat: z
1904
+ .enum(["text", "json", "stream-json"])
1905
+ .default("text")
1906
+ .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
1907
+ sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
1908
+ continueSession: z.boolean().default(false).describe("Continue active session"),
1909
+ createNewSession: z.boolean().default(false).describe("Force new session"),
1910
+ allowedTools: z
1911
+ .array(z.string())
1912
+ .optional()
1913
+ .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
1914
+ disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
1915
+ dangerouslySkipPermissions: z
1916
+ .boolean()
1917
+ .default(false)
1918
+ .describe('DEPRECATED: prefer `permissionMode: "bypassPermissions"`. Maps to it when `permissionMode` is unset.'),
1919
+ permissionMode: z
1920
+ .enum(CLAUDE_PERMISSION_MODES)
1921
+ .optional()
1922
+ .describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op (no flag emitted)."),
1923
+ // U25 — Claude high-impact features
1924
+ agent: z
1925
+ .string()
1926
+ .optional()
1927
+ .describe("Claude --agent: dispatch to a named single sub-agent."),
1928
+ agents: z
1929
+ .record(z.record(z.unknown()))
1930
+ .optional()
1931
+ .describe("Claude --agents: inline JSON map of agent name → { description, prompt, tools?, model? }."),
1932
+ forkSession: z
1933
+ .boolean()
1934
+ .optional()
1935
+ .describe("Claude --fork-session: branch from an existing session into a fresh fork."),
1936
+ systemPrompt: z
1937
+ .string()
1938
+ .optional()
1939
+ .describe("Claude --system-prompt: replace the system prompt entirely."),
1940
+ appendSystemPrompt: z
1941
+ .string()
1942
+ .optional()
1943
+ .describe("Claude --append-system-prompt: append to the existing system prompt."),
1944
+ maxBudgetUsd: z
1945
+ .number()
1946
+ .positive()
1947
+ .optional()
1948
+ .describe("Claude --max-budget-usd: spend cap for this request in USD."),
1949
+ maxTurns: z
1950
+ .number()
1951
+ .int()
1952
+ .positive()
1953
+ .optional()
1954
+ .describe("Claude --max-turns: cap on agent loop iterations."),
1955
+ effort: z
1956
+ .enum(CLAUDE_EFFORT_LEVELS)
1957
+ .optional()
1958
+ .describe("Claude --effort: low|medium|high|xhigh|max."),
1959
+ excludeDynamicSystemPromptSections: z
1960
+ .boolean()
1961
+ .optional()
1962
+ .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
1963
+ approvalStrategy: z
1964
+ .enum(["legacy", "mcp_managed"])
1965
+ .default("legacy")
1966
+ .describe("Approval strategy"),
1967
+ approvalPolicy: z
1968
+ .enum(["strict", "balanced", "permissive"])
1969
+ .optional()
1970
+ .describe("Approval policy override"),
1971
+ mcpServers: z
1972
+ .array(MCP_SERVER_ENUM)
1973
+ .default(["sqry"])
1974
+ .describe("MCP servers exposed to Claude"),
1975
+ strictMcpConfig: z
1976
+ .boolean()
1977
+ .default(false)
1978
+ .describe("Restrict Claude to provided MCP config only"),
1979
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1980
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1981
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
1982
+ idleTimeoutMs: z
1983
+ .number()
1984
+ .int()
1985
+ .min(30_000)
1986
+ .max(3_600_000)
1987
+ .optional()
1988
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1989
+ forceRefresh: z
1990
+ .boolean()
1991
+ .default(false)
1992
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1993
+ }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
1994
+ const startTime = Date.now();
1995
+ if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
1996
+ return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
1997
+ }
1998
+ const prep = prepareClaudeRequest({
1999
+ prompt,
2000
+ model,
2001
+ outputFormat,
2002
+ allowedTools,
2003
+ disallowedTools,
2004
+ dangerouslySkipPermissions,
2005
+ permissionMode,
2006
+ approvalStrategy,
2007
+ approvalPolicy,
2008
+ mcpServers,
2009
+ strictMcpConfig,
2010
+ correlationId,
2011
+ optimizePrompt,
2012
+ operation: "claude_request",
2013
+ agent,
2014
+ agents,
2015
+ forkSession,
2016
+ systemPrompt,
2017
+ appendSystemPrompt,
2018
+ maxBudgetUsd,
2019
+ maxTurns,
2020
+ effort,
2021
+ excludeDynamicSystemPromptSections,
2022
+ }, runtime);
2023
+ if (!("args" in prep))
2024
+ return prep;
2025
+ const { corrId, args } = prep;
2026
+ let durationMs = 0;
2027
+ let wasSuccessful = false;
2028
+ safeFlightStart({
2029
+ correlationId: corrId,
2030
+ cli: "claude",
2031
+ model: prep.resolvedModel || "default",
2032
+ prompt,
2033
+ sessionId,
2034
+ }, runtime);
2035
+ logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prompt.length}, sessionId=${sessionId}`);
2036
+ try {
2037
+ // Session management
2038
+ let effectiveSessionId = sessionId;
2039
+ let useContinue = continueSession;
2040
+ const activeSession = await sessionManager.getActiveSession("claude");
2041
+ if (!createNewSession && !continueSession && !sessionId && activeSession) {
2042
+ effectiveSessionId = activeSession.id;
2043
+ useContinue = true;
2044
+ }
2045
+ if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
2046
+ useContinue = true;
2047
+ }
2048
+ if (useContinue) {
2049
+ args.push("--continue");
2050
+ }
2051
+ else if (effectiveSessionId) {
2052
+ args.push("--session-id", effectiveSessionId);
2053
+ await sessionManager.updateSessionUsage(effectiveSessionId);
2054
+ }
2055
+ // Idle timeout only for stream-json (text/json produce no output until done)
2056
+ const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
2057
+ const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh, runtime);
2058
+ // Deferred — job still running, return async reference
2059
+ if (isDeferredResponse(result)) {
2060
+ return buildDeferredToolResponse(result, effectiveSessionId);
2061
+ }
2062
+ const { stdout, stderr, code } = result;
2063
+ durationMs = Math.max(0, Date.now() - startTime);
2064
+ if (code !== 0) {
2065
+ logger.info(`[${corrId}] claude_request failed in ${durationMs}ms`);
2066
+ safeFlightComplete(corrId, {
2067
+ response: stderr || "",
2068
+ durationMs,
2069
+ retryCount: 0,
2070
+ circuitBreakerState: "closed",
2071
+ optimizationApplied: optimizePrompt || optimizeResponse,
2072
+ exitCode: code,
2073
+ errorMessage: stderr || `Exit code ${code}`,
2074
+ status: "failed",
2075
+ }, runtime);
2076
+ return createErrorResponse("claude", code, stderr, corrId);
2077
+ }
2078
+ wasSuccessful = true;
2079
+ // If we used a session ID and it's not tracked yet, create a session record
2080
+ if (effectiveSessionId) {
2081
+ const existingSession = await sessionManager.getSession(effectiveSessionId);
2082
+ if (!existingSession) {
2083
+ await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
2084
+ }
2085
+ }
2086
+ logger.info(`[${corrId}] claude_request completed successfully in ${durationMs}ms`);
2087
+ // Parse stream-json NDJSON output to extract result text
2088
+ if (outputFormat === "stream-json") {
2089
+ const parsed = parseStreamJson(stdout);
2090
+ if (parsed.costUsd !== null) {
2091
+ logger.debug(`[${corrId}] stream-json cost=$${parsed.costUsd}, model=${parsed.model}, turns=${parsed.numTurns}`);
2092
+ }
2093
+ safeFlightComplete(corrId, {
2094
+ response: parsed.text,
2095
+ inputTokens: parsed.usage?.inputTokens,
2096
+ outputTokens: parsed.usage?.outputTokens,
2097
+ cacheReadTokens: parsed.usage?.cacheReadInputTokens || undefined,
2098
+ cacheCreationTokens: parsed.usage?.cacheCreationInputTokens || undefined,
2099
+ durationMs,
2100
+ retryCount: 0,
2101
+ circuitBreakerState: "closed",
2102
+ costUsd: parsed.costUsd ?? undefined,
2103
+ optimizationApplied: optimizePrompt || optimizeResponse,
2104
+ exitCode: 0,
2105
+ status: "completed",
2106
+ }, runtime);
2107
+ return buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
2108
+ }
2109
+ safeFlightComplete(corrId, {
2110
+ response: stdout,
2111
+ durationMs,
2112
+ retryCount: 0,
2113
+ circuitBreakerState: "closed",
2114
+ optimizationApplied: optimizePrompt || optimizeResponse,
2115
+ exitCode: 0,
2116
+ status: "completed",
2117
+ }, runtime);
2118
+ return buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
2119
+ }
2120
+ catch (error) {
2121
+ const elapsedMs = Math.max(0, Date.now() - startTime);
2122
+ logger.info(`[${corrId}] claude_request threw exception after ${elapsedMs}ms`);
2123
+ safeFlightComplete(corrId, {
2124
+ response: "",
2125
+ durationMs: elapsedMs,
2126
+ retryCount: 0,
2127
+ circuitBreakerState: "closed",
2128
+ optimizationApplied: optimizePrompt || optimizeResponse,
2129
+ exitCode: 1,
2130
+ errorMessage: error.message,
2131
+ status: "failed",
2132
+ }, runtime);
2133
+ return createErrorResponse("claude", 1, "", corrId, error);
2134
+ }
2135
+ finally {
2136
+ const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
2137
+ performanceMetrics.recordRequest("claude", finalizedDurationMs, wasSuccessful);
2138
+ }
2139
+ });
2140
+ //──────────────────────────────────────────────────────────────────────────────
2141
+ // Codex Tool
2142
+ //──────────────────────────────────────────────────────────────────────────────
2143
+ server.tool("codex_request", {
2144
+ prompt: z
2145
+ .string()
2146
+ .min(1, "Prompt cannot be empty")
2147
+ .max(100000, "Prompt too long (max 100k chars)")
2148
+ .describe("Prompt text for Codex"),
2149
+ model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
2150
+ fullAuto: z
2151
+ .boolean()
2152
+ .default(false)
2153
+ .describe("DEPRECATED: prefer `sandboxMode` + `askForApproval`. Expands to `--sandbox workspace-write --ask-for-approval never`."),
2154
+ sandboxMode: z
2155
+ .enum(CODEX_SANDBOX_MODES)
2156
+ .optional()
2157
+ .describe("Codex --sandbox: read-only|workspace-write|danger-full-access."),
2158
+ askForApproval: z
2159
+ .enum(CODEX_ASK_FOR_APPROVAL_MODES)
2160
+ .optional()
2161
+ .describe("Codex --ask-for-approval: untrusted|on-request|never."),
2162
+ useLegacyFullAutoFlag: z
2163
+ .boolean()
2164
+ .default(false)
2165
+ .describe("Escape hatch: emit `--full-auto` directly instead of expanding (deprecated)."),
2166
+ dangerouslyBypassApprovalsAndSandbox: z
2167
+ .boolean()
2168
+ .default(false)
2169
+ .describe("Run Codex without approvals/sandbox"),
2170
+ approvalStrategy: z
2171
+ .enum(["legacy", "mcp_managed"])
2172
+ .default("legacy")
2173
+ .describe("Approval strategy"),
2174
+ approvalPolicy: z
2175
+ .enum(["strict", "balanced", "permissive"])
2176
+ .optional()
2177
+ .describe("Approval policy override"),
2178
+ mcpServers: z
2179
+ .array(MCP_SERVER_ENUM)
2180
+ .default(["sqry"])
2181
+ .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
2182
+ sessionId: z
2183
+ .string()
2184
+ .optional()
2185
+ .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
2186
+ resumeLatest: z
2187
+ .boolean()
2188
+ .default(false)
2189
+ .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
2190
+ createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
2191
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2192
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2193
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
2194
+ idleTimeoutMs: z
2195
+ .number()
2196
+ .int()
2197
+ .min(30_000)
2198
+ .max(3_600_000)
2199
+ .optional()
2200
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2201
+ forceRefresh: z
2202
+ .boolean()
2203
+ .default(false)
2204
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2205
+ // U23: emit `--json` so the codex-json-parser surfaces input/output/cache
2206
+ // tokens (and any cost) through extractUsageAndCost. Without "json", the
2207
+ // parser is unreachable and Codex usage is never reported.
2208
+ outputFormat: z
2209
+ .enum(["text", "json"])
2210
+ .default("text")
2211
+ .describe("Codex output format. `json` emits --json (JSONL events) so token usage and cost are parsed and reported in the flight recorder. `text` is the default."),
2212
+ // U26: high-impact feature flags. All optional.
2213
+ outputSchema: z
2214
+ .union([z.string(), z.record(z.unknown())])
2215
+ .optional()
2216
+ .describe("Codex --output-schema. Pass a path (string) or an inline JSON Schema object; object is materialised to a 0o600 temp file under os.tmpdir() and deleted after the run."),
2217
+ search: z.boolean().optional().describe("Emit Codex --search to enable web search."),
2218
+ profile: z
2219
+ .string()
2220
+ .optional()
2221
+ .describe("Codex --profile <name>: select a profile from ~/.codex/config.toml."),
2222
+ configOverrides: CODEX_CONFIG_OVERRIDES_SCHEMA.describe("Codex -c key=value overrides. Keys: /^[a-zA-Z0-9._]+$/. Values: no CR/LF."),
2223
+ ephemeral: z
2224
+ .boolean()
2225
+ .optional()
2226
+ .describe("Codex --ephemeral: do not persist the session to disk."),
2227
+ images: z
2228
+ .array(z.string())
2229
+ .optional()
2230
+ .describe("Codex -i <path>: image attachments. Each path must exist; missing paths fail fast."),
2231
+ ignoreUserConfig: z
2232
+ .boolean()
2233
+ .optional()
2234
+ .describe("Codex --ignore-user-config: ignore ~/.codex/config.toml for this run."),
2235
+ ignoreRules: z
2236
+ .boolean()
2237
+ .optional()
2238
+ .describe("Codex --ignore-rules: skip project rule files for this run."),
2239
+ }, async ({ prompt, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
2240
+ const startTime = Date.now();
2241
+ const prep = prepareCodexRequest({
2242
+ prompt,
2243
+ model,
2244
+ fullAuto,
2245
+ sandboxMode,
2246
+ askForApproval,
2247
+ useLegacyFullAutoFlag,
2248
+ dangerouslyBypassApprovalsAndSandbox,
2249
+ approvalStrategy,
2250
+ approvalPolicy,
2251
+ mcpServers,
2252
+ sessionId,
2253
+ resumeLatest,
2254
+ createNewSession,
2255
+ correlationId,
2256
+ optimizePrompt,
2257
+ operation: "codex_request",
2258
+ outputFormat,
2259
+ outputSchema,
2260
+ search,
2261
+ profile,
2262
+ configOverrides,
2263
+ ephemeral,
2264
+ images,
2265
+ ignoreUserConfig,
2266
+ ignoreRules,
2267
+ }, runtime);
2268
+ if (!("args" in prep))
2269
+ return prep;
2270
+ const { corrId, args } = prep;
2271
+ let durationMs = 0;
2272
+ let wasSuccessful = false;
2273
+ safeFlightStart({
2274
+ correlationId: corrId,
2275
+ cli: "codex",
2276
+ model: prep.resolvedModel || "default",
2277
+ prompt,
2278
+ sessionId,
2279
+ }, runtime);
2280
+ logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prompt.length}`);
2281
+ // U26 fix: pass the outputSchema cleanup to awaitJobOrDefer, which
2282
+ // guarantees the cleanup runs exactly once — inline for direct
2283
+ // execution, on terminal status for the job-backed path (sync
2284
+ // completion or deferred). The outer finally MUST NOT clean again.
2285
+ const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
2286
+ try {
2287
+ const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), outputFormat, forceRefresh, runtime, undefined, prepCleanup);
2288
+ // Deferred — job still running, return async reference. Cleanup
2289
+ // ownership belongs to AsyncJobManager via onComplete.
2290
+ if (isDeferredResponse(result)) {
2291
+ return buildDeferredToolResponse(result, sessionId);
2292
+ }
2293
+ const { stdout, stderr, code } = result;
2294
+ durationMs = Math.max(0, Date.now() - startTime);
2295
+ if (code !== 0) {
2296
+ logger.info(`[${corrId}] codex_request failed in ${durationMs}ms`);
2297
+ safeFlightComplete(corrId, {
2298
+ response: stderr || "",
2299
+ durationMs,
2300
+ retryCount: 0,
2301
+ circuitBreakerState: "closed",
2302
+ optimizationApplied: optimizePrompt || optimizeResponse,
2303
+ exitCode: code,
2304
+ errorMessage: stderr || `Exit code ${code}`,
2305
+ status: "failed",
2306
+ }, runtime);
2307
+ return createErrorResponse("codex", code, stderr, corrId);
2308
+ }
2309
+ wasSuccessful = true;
2310
+ // Track session usage
2311
+ let effectiveSessionId = sessionId;
2312
+ if (!createNewSession && !sessionId) {
2313
+ const activeSession = await sessionManager.getActiveSession("codex");
2314
+ if (activeSession) {
2315
+ effectiveSessionId = activeSession.id;
2316
+ }
2317
+ else {
2318
+ const newSession = await sessionManager.createSession("codex", "Codex Session");
2319
+ effectiveSessionId = newSession.id;
2320
+ }
2321
+ }
2322
+ else if (sessionId) {
2323
+ await sessionManager.updateSessionUsage(sessionId);
2324
+ }
2325
+ else if (createNewSession) {
2326
+ const newSession = await sessionManager.createSession("codex", "Codex Session");
2327
+ effectiveSessionId = newSession.id;
2328
+ }
2329
+ logger.info(`[${corrId}] codex_request completed successfully in ${durationMs}ms`);
2330
+ const codexUsage = extractUsageAndCost("codex", stdout, outputFormat);
2331
+ safeFlightComplete(corrId, {
2332
+ response: stdout,
2333
+ durationMs,
2334
+ retryCount: 0,
2335
+ circuitBreakerState: "closed",
2336
+ optimizationApplied: optimizePrompt || optimizeResponse,
2337
+ exitCode: 0,
2338
+ status: "completed",
2339
+ inputTokens: codexUsage.inputTokens,
2340
+ outputTokens: codexUsage.outputTokens,
2341
+ cacheReadTokens: codexUsage.cacheReadTokens,
2342
+ cacheCreationTokens: codexUsage.cacheCreationTokens,
2343
+ costUsd: codexUsage.costUsd,
2344
+ }, runtime);
2345
+ return buildCliResponse("codex", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
2346
+ }
2347
+ catch (error) {
2348
+ const elapsedMs = Math.max(0, Date.now() - startTime);
2349
+ logger.info(`[${corrId}] codex_request threw exception after ${elapsedMs}ms`);
2350
+ safeFlightComplete(corrId, {
2351
+ response: "",
2352
+ durationMs: elapsedMs,
2353
+ retryCount: 0,
2354
+ circuitBreakerState: "closed",
2355
+ optimizationApplied: optimizePrompt || optimizeResponse,
2356
+ exitCode: 1,
2357
+ errorMessage: error.message,
2358
+ status: "failed",
2359
+ }, runtime);
2360
+ return createErrorResponse("codex", 1, "", corrId, error);
2361
+ }
2362
+ finally {
2363
+ const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
2364
+ performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
2365
+ // Cleanup is owned by awaitJobOrDefer's contract; nothing to do here.
2366
+ }
1502
2367
  });
1503
- });
1504
- server.tool("gemini_request_async", {
1505
- prompt: z
1506
- .string()
1507
- .min(1, "Prompt cannot be empty")
1508
- .max(100000, "Prompt too long (max 100k chars)")
1509
- .describe("Prompt text for Gemini"),
1510
- model: z
1511
- .string()
1512
- .optional()
1513
- .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
1514
- sessionId: z.string().optional().describe("Session ID (user-provided CLI handle for --resume)"),
1515
- resumeLatest: z.boolean().default(false).describe("Resume latest session"),
1516
- createNewSession: z.boolean().default(false).describe("Force new session"),
1517
- approvalMode: z
1518
- .enum(["default", "auto_edit", "yolo"])
1519
- .optional()
1520
- .describe("Approval: default|auto_edit|yolo"),
1521
- approvalStrategy: z
1522
- .enum(["legacy", "mcp_managed"])
1523
- .default("legacy")
1524
- .describe("Approval strategy"),
1525
- approvalPolicy: z
1526
- .enum(["strict", "balanced", "permissive"])
1527
- .optional()
1528
- .describe("Approval policy override"),
1529
- mcpServers: z
1530
- .array(MCP_SERVER_ENUM)
1531
- .default(["sqry"])
1532
- .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
1533
- allowedTools: z
1534
- .array(z.string())
1535
- .optional()
1536
- .describe("Allowed tools (['Write','Edit','Bash'])"),
1537
- includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
1538
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1539
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1540
- idleTimeoutMs: z
1541
- .number()
1542
- .int()
1543
- .min(30_000)
1544
- .max(3_600_000)
1545
- .optional()
1546
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1547
- }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, }) => {
1548
- return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger }, {
1549
- prompt,
1550
- model,
1551
- sessionId,
1552
- resumeLatest,
1553
- createNewSession,
1554
- approvalMode,
1555
- approvalStrategy,
1556
- approvalPolicy,
1557
- mcpServers,
1558
- allowedTools,
1559
- includeDirs,
1560
- correlationId,
1561
- optimizePrompt,
1562
- idleTimeoutMs,
2368
+ //──────────────────────────────────────────────────────────────────────────────
2369
+ // U26: codex_fork_session — `codex fork <SESSION_ID|--last> <prompt>`
2370
+ //──────────────────────────────────────────────────────────────────────────────
2371
+ server.tool("codex_fork_session", {
2372
+ prompt: z
2373
+ .string()
2374
+ .min(1, "Prompt cannot be empty")
2375
+ .max(100000, "Prompt too long (max 100k chars)")
2376
+ .describe("Prompt text for the forked Codex session"),
2377
+ sessionId: z
2378
+ .string()
2379
+ .optional()
2380
+ .describe("Codex session UUID to fork from. Mutually exclusive with `forkLast`."),
2381
+ forkLast: z
2382
+ .boolean()
2383
+ .optional()
2384
+ .describe("Fork from the most recent Codex session. Mutually exclusive with `sessionId`."),
2385
+ model: z.string().optional().describe("Model name or alias (e.g. gpt-5.5, latest)"),
2386
+ sandboxMode: z
2387
+ .enum(CODEX_SANDBOX_MODES)
2388
+ .optional()
2389
+ .describe("Codex --sandbox: read-only|workspace-write|danger-full-access."),
2390
+ askForApproval: z
2391
+ .enum(CODEX_ASK_FOR_APPROVAL_MODES)
2392
+ .optional()
2393
+ .describe("Codex --ask-for-approval: untrusted|on-request|never."),
2394
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2395
+ idleTimeoutMs: z
2396
+ .number()
2397
+ .int()
2398
+ .min(30_000)
2399
+ .max(3_600_000)
2400
+ .optional()
2401
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2402
+ }, async ({ prompt, sessionId, forkLast, model, sandboxMode, askForApproval, correlationId, idleTimeoutMs, }) => {
2403
+ const corrId = correlationId || randomUUID();
2404
+ const startTime = Date.now();
2405
+ let durationMs = 0;
2406
+ let wasSuccessful = false;
2407
+ // Enforce mutual exclusion at tool boundary (Zod records the params but
2408
+ // the SDK's `.tool(...)` does not accept top-level refines).
2409
+ if (sessionId && forkLast) {
2410
+ return createErrorResponse("codex_fork_session", 1, "", corrId, new Error("sessionId and forkLast are mutually exclusive"));
2411
+ }
2412
+ if (!sessionId && !forkLast) {
2413
+ return createErrorResponse("codex_fork_session", 1, "", corrId, new Error("one of sessionId or forkLast is required"));
2414
+ }
2415
+ let forkArgs;
2416
+ try {
2417
+ forkArgs = prepareCodexForkRequest({ prompt, sessionId, forkLast }).args;
2418
+ }
2419
+ catch (err) {
2420
+ return createErrorResponse("codex_fork_session", 1, "", corrId, err);
2421
+ }
2422
+ const cliInfo = getCliInfo();
2423
+ const resolvedModel = resolveModelAlias("codex", model, cliInfo);
2424
+ // Compose argv: forkArgs already starts with `fork`. Inject model and
2425
+ // sandbox/approval flags BEFORE the positional <sessionId|--last> +
2426
+ // prompt to keep them as flags rather than positionals. forkArgs layout
2427
+ // is either ["fork", "--last", prompt] or ["fork", sessionId, prompt];
2428
+ // we splice flags right after "fork".
2429
+ const flagSegment = [];
2430
+ if (resolvedModel)
2431
+ flagSegment.push("--model", resolvedModel);
2432
+ const sandboxFlags = resolveCodexSandboxFlags({
2433
+ sandboxMode,
2434
+ askForApproval,
2435
+ });
2436
+ if (sandboxFlags.warning) {
2437
+ logger.warn(`[${corrId}] ${sandboxFlags.warning}`);
2438
+ }
2439
+ flagSegment.push(...sandboxFlags.args);
2440
+ const finalArgs = [forkArgs[0], ...flagSegment, ...forkArgs.slice(1)];
2441
+ logger.info(`[${corrId}] codex_fork_session invoked (forkLast=${Boolean(forkLast)}, sessionId=${sessionId ? "set" : "unset"})`);
2442
+ try {
2443
+ const result = await awaitJobOrDefer("codex", finalArgs, corrId, resolveIdleTimeout("codex", idleTimeoutMs), undefined, false, runtime);
2444
+ if (isDeferredResponse(result)) {
2445
+ return buildDeferredToolResponse(result, sessionId);
2446
+ }
2447
+ const { stdout, stderr, code } = result;
2448
+ durationMs = Math.max(0, Date.now() - startTime);
2449
+ if (code !== 0) {
2450
+ return createErrorResponse("codex", code, stderr, corrId);
2451
+ }
2452
+ wasSuccessful = true;
2453
+ return {
2454
+ content: [{ type: "text", text: stdout }],
2455
+ };
2456
+ }
2457
+ catch (error) {
2458
+ return createErrorResponse("codex_fork_session", 1, "", corrId, error);
2459
+ }
2460
+ finally {
2461
+ const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
2462
+ performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
2463
+ }
1563
2464
  });
1564
- });
1565
- server.tool("llm_job_status", {
1566
- jobId: z.string().describe("Async job ID from *_request_async"),
1567
- }, async ({ jobId }) => {
1568
- const job = asyncJobManager.getJobSnapshot(jobId);
1569
- if (!job) {
2465
+ //──────────────────────────────────────────────────────────────────────────────
2466
+ // Gemini Tool
2467
+ //──────────────────────────────────────────────────────────────────────────────
2468
+ server.tool("gemini_request", {
2469
+ prompt: z
2470
+ .string()
2471
+ .min(1, "Prompt cannot be empty")
2472
+ .max(100000, "Prompt too long (max 100k chars)")
2473
+ .describe("Prompt text for Gemini"),
2474
+ model: z
2475
+ .string()
2476
+ .optional()
2477
+ .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
2478
+ sessionId: z.string().optional().describe("Session ID or 'latest'"),
2479
+ resumeLatest: z.boolean().default(false).describe("Resume latest session"),
2480
+ createNewSession: z.boolean().default(false).describe("Force new session"),
2481
+ approvalMode: z
2482
+ .enum(GEMINI_APPROVAL_MODES)
2483
+ .optional()
2484
+ .describe("Approval: default|auto_edit|yolo|plan"),
2485
+ approvalStrategy: z
2486
+ .enum(["legacy", "mcp_managed"])
2487
+ .default("legacy")
2488
+ .describe("Approval strategy"),
2489
+ approvalPolicy: z
2490
+ .enum(["strict", "balanced", "permissive"])
2491
+ .optional()
2492
+ .describe("Approval policy override"),
2493
+ mcpServers: z
2494
+ .array(MCP_SERVER_ENUM)
2495
+ .default(["sqry"])
2496
+ .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
2497
+ allowedTools: z
2498
+ .array(z.string())
2499
+ .optional()
2500
+ .describe("Allowed tools (['Write','Edit','Bash'])"),
2501
+ includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
2502
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2503
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2504
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
2505
+ idleTimeoutMs: z
2506
+ .number()
2507
+ .int()
2508
+ .min(30_000)
2509
+ .max(3_600_000)
2510
+ .optional()
2511
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2512
+ forceRefresh: z
2513
+ .boolean()
2514
+ .default(false)
2515
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2516
+ // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
2517
+ // remains text so existing callers see no behavior change.
2518
+ outputFormat: z
2519
+ .enum(["text", "json"])
2520
+ .default("text")
2521
+ .describe("Gemini output format. `json` emits `-o json` so usageMetadata is parsed and reported."),
2522
+ sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
2523
+ policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
2524
+ adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
2525
+ attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
2526
+ }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
2527
+ return handleGeminiRequest({ sessionManager, logger, runtime }, {
2528
+ prompt,
2529
+ model,
2530
+ sessionId,
2531
+ resumeLatest,
2532
+ createNewSession,
2533
+ approvalMode,
2534
+ approvalStrategy,
2535
+ approvalPolicy,
2536
+ mcpServers,
2537
+ allowedTools,
2538
+ includeDirs,
2539
+ correlationId,
2540
+ optimizePrompt,
2541
+ optimizeResponse,
2542
+ idleTimeoutMs,
2543
+ forceRefresh,
2544
+ outputFormat,
2545
+ sandbox,
2546
+ policyFiles,
2547
+ adminPolicyFiles,
2548
+ attachments,
2549
+ });
2550
+ });
2551
+ //──────────────────────────────────────────────────────────────────────────────
2552
+ // Grok Tool
2553
+ //──────────────────────────────────────────────────────────────────────────────
2554
+ server.tool("grok_request", {
2555
+ prompt: z
2556
+ .string()
2557
+ .min(1, "Prompt cannot be empty")
2558
+ .max(100000, "Prompt too long (max 100k chars)")
2559
+ .describe("Prompt text for Grok"),
2560
+ model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
2561
+ outputFormat: z
2562
+ .enum(["plain", "json", "streaming-json"])
2563
+ .optional()
2564
+ .describe("Output format (plain|json|streaming-json). Grok default is plain."),
2565
+ sessionId: z
2566
+ .string()
2567
+ .optional()
2568
+ .describe("Session ID (user-provided CLI handle for --resume)"),
2569
+ resumeLatest: z
2570
+ .boolean()
2571
+ .default(false)
2572
+ .describe("Resume most recent Grok session in cwd (--continue)"),
2573
+ createNewSession: z.boolean().default(false).describe("Force new session"),
2574
+ alwaysApprove: z
2575
+ .boolean()
2576
+ .default(false)
2577
+ .describe("Auto-approve all tool executions (--always-approve)"),
2578
+ permissionMode: z
2579
+ .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
2580
+ .optional()
2581
+ .describe("Grok permission mode"),
2582
+ effort: z
2583
+ .enum(["low", "medium", "high", "xhigh", "max"])
2584
+ .optional()
2585
+ .describe("Grok effort level"),
2586
+ reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
2587
+ approvalStrategy: z
2588
+ .enum(["legacy", "mcp_managed"])
2589
+ .default("legacy")
2590
+ .describe("Approval strategy"),
2591
+ approvalPolicy: z
2592
+ .enum(["strict", "balanced", "permissive"])
2593
+ .optional()
2594
+ .describe("Approval policy override"),
2595
+ mcpServers: z
2596
+ .array(MCP_SERVER_ENUM)
2597
+ .default(["sqry"])
2598
+ .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
2599
+ allowedTools: z
2600
+ .array(z.string())
2601
+ .optional()
2602
+ .describe("Allowed built-in tools (passed as --tools comma list)"),
2603
+ disallowedTools: z
2604
+ .array(z.string())
2605
+ .optional()
2606
+ .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
2607
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2608
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2609
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
2610
+ idleTimeoutMs: z
2611
+ .number()
2612
+ .int()
2613
+ .min(30_000)
2614
+ .max(3_600_000)
2615
+ .optional()
2616
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2617
+ forceRefresh: z
2618
+ .boolean()
2619
+ .default(false)
2620
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2621
+ }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2622
+ return handleGrokRequest({ sessionManager, logger, runtime }, {
2623
+ prompt,
2624
+ model,
2625
+ outputFormat,
2626
+ sessionId,
2627
+ resumeLatest,
2628
+ createNewSession,
2629
+ alwaysApprove,
2630
+ permissionMode,
2631
+ effort,
2632
+ reasoningEffort,
2633
+ approvalStrategy,
2634
+ approvalPolicy,
2635
+ mcpServers,
2636
+ allowedTools,
2637
+ disallowedTools,
2638
+ correlationId,
2639
+ optimizePrompt,
2640
+ optimizeResponse,
2641
+ idleTimeoutMs,
2642
+ forceRefresh,
2643
+ });
2644
+ });
2645
+ //──────────────────────────────────────────────────────────────────────────────
2646
+ // Mistral Vibe Tool
2647
+ //──────────────────────────────────────────────────────────────────────────────
2648
+ server.tool("mistral_request", {
2649
+ prompt: z
2650
+ .string()
2651
+ .min(1, "Prompt cannot be empty")
2652
+ .max(100000, "Prompt too long (max 100k chars)")
2653
+ .describe("Prompt text for Mistral Vibe"),
2654
+ model: z
2655
+ .string()
2656
+ .optional()
2657
+ .describe("Model alias (e.g. devstral-medium, devstral-large, latest). Resolved alias is injected via VIBE_ACTIVE_MODEL env var — Vibe has no --model flag."),
2658
+ outputFormat: z
2659
+ .enum(["plain", "json", "stream-json"])
2660
+ .optional()
2661
+ .describe("Output format (plain|json|stream-json). Vibe default is plain."),
2662
+ sessionId: z
2663
+ .string()
2664
+ .optional()
2665
+ .describe("Session ID (user-provided CLI handle for --resume). Requires [session_logging] enabled = true in ~/.vibe/config.toml."),
2666
+ resumeLatest: z
2667
+ .boolean()
2668
+ .default(false)
2669
+ .describe("Resume most recent Vibe session in cwd (--continue)"),
2670
+ createNewSession: z.boolean().default(false).describe("Force new session"),
2671
+ permissionMode: z
2672
+ .enum(MISTRAL_AGENT_MODES)
2673
+ .optional()
2674
+ .describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
2675
+ effort: z
2676
+ .enum(["low", "medium", "high", "xhigh", "max"])
2677
+ .optional()
2678
+ .describe("Vibe effort level"),
2679
+ reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
2680
+ approvalStrategy: z
2681
+ .enum(["legacy", "mcp_managed"])
2682
+ .default("legacy")
2683
+ .describe("Approval strategy"),
2684
+ approvalPolicy: z
2685
+ .enum(["strict", "balanced", "permissive"])
2686
+ .optional()
2687
+ .describe("Approval policy override"),
2688
+ mcpServers: z
2689
+ .array(MCP_SERVER_ENUM)
2690
+ .default(["sqry"])
2691
+ .describe("MCP server names for approval tracking (Vibe manages its own MCP config via `vibe mcp`)"),
2692
+ allowedTools: z
2693
+ .array(z.string())
2694
+ .optional()
2695
+ .describe("Allowlist of built-in tools — each emitted as a separate --enabled-tools <tool> flag"),
2696
+ disallowedTools: z
2697
+ .array(z.string())
2698
+ .optional()
2699
+ .describe("Accepted for caller parity; Vibe has no deny-list flag, so values are ignored (a warning is logged)."),
2700
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2701
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2702
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
2703
+ idleTimeoutMs: z
2704
+ .number()
2705
+ .int()
2706
+ .min(30_000)
2707
+ .max(3_600_000)
2708
+ .optional()
2709
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2710
+ forceRefresh: z
2711
+ .boolean()
2712
+ .default(false)
2713
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2714
+ }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2715
+ return handleMistralRequest({ sessionManager, logger, runtime }, {
2716
+ prompt,
2717
+ model,
2718
+ outputFormat,
2719
+ sessionId,
2720
+ resumeLatest,
2721
+ createNewSession,
2722
+ permissionMode,
2723
+ effort,
2724
+ reasoningEffort,
2725
+ approvalStrategy,
2726
+ approvalPolicy,
2727
+ mcpServers,
2728
+ allowedTools,
2729
+ disallowedTools,
2730
+ correlationId,
2731
+ optimizePrompt,
2732
+ optimizeResponse,
2733
+ idleTimeoutMs,
2734
+ forceRefresh,
2735
+ });
2736
+ });
2737
+ //──────────────────────────────────────────────────────────────────────────────
2738
+ // Async Long-Running Job Tools (No Time-Bound LLM Execution)
2739
+ //──────────────────────────────────────────────────────────────────────────────
2740
+ server.tool("claude_request_async", {
2741
+ prompt: z
2742
+ .string()
2743
+ .min(1, "Prompt cannot be empty")
2744
+ .max(100000, "Prompt too long (max 100k chars)")
2745
+ .describe("Prompt text for Claude"),
2746
+ model: z
2747
+ .string()
2748
+ .optional()
2749
+ .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
2750
+ outputFormat: z
2751
+ .enum(["text", "json", "stream-json"])
2752
+ .default("text")
2753
+ .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
2754
+ sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
2755
+ continueSession: z.boolean().default(false).describe("Continue active session"),
2756
+ createNewSession: z.boolean().default(false).describe("Force new session"),
2757
+ allowedTools: z
2758
+ .array(z.string())
2759
+ .optional()
2760
+ .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
2761
+ disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
2762
+ dangerouslySkipPermissions: z
2763
+ .boolean()
2764
+ .default(false)
2765
+ .describe('DEPRECATED: prefer `permissionMode: "bypassPermissions"`. Maps to it when `permissionMode` is unset.'),
2766
+ permissionMode: z
2767
+ .enum(CLAUDE_PERMISSION_MODES)
2768
+ .optional()
2769
+ .describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op."),
2770
+ // U25 — Claude high-impact features
2771
+ agent: z
2772
+ .string()
2773
+ .optional()
2774
+ .describe("Claude --agent: dispatch to a named single sub-agent."),
2775
+ agents: z
2776
+ .record(z.record(z.unknown()))
2777
+ .optional()
2778
+ .describe("Claude --agents: inline JSON map of agent name → { description, prompt, tools?, model? }."),
2779
+ forkSession: z
2780
+ .boolean()
2781
+ .optional()
2782
+ .describe("Claude --fork-session: branch from an existing session into a fresh fork."),
2783
+ systemPrompt: z
2784
+ .string()
2785
+ .optional()
2786
+ .describe("Claude --system-prompt: replace the system prompt entirely."),
2787
+ appendSystemPrompt: z
2788
+ .string()
2789
+ .optional()
2790
+ .describe("Claude --append-system-prompt: append to the existing system prompt."),
2791
+ maxBudgetUsd: z
2792
+ .number()
2793
+ .positive()
2794
+ .optional()
2795
+ .describe("Claude --max-budget-usd: spend cap for this request in USD."),
2796
+ maxTurns: z
2797
+ .number()
2798
+ .int()
2799
+ .positive()
2800
+ .optional()
2801
+ .describe("Claude --max-turns: cap on agent loop iterations."),
2802
+ effort: z
2803
+ .enum(CLAUDE_EFFORT_LEVELS)
2804
+ .optional()
2805
+ .describe("Claude --effort: low|medium|high|xhigh|max."),
2806
+ excludeDynamicSystemPromptSections: z
2807
+ .boolean()
2808
+ .optional()
2809
+ .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
2810
+ approvalStrategy: z
2811
+ .enum(["legacy", "mcp_managed"])
2812
+ .default("legacy")
2813
+ .describe("Approval strategy"),
2814
+ approvalPolicy: z
2815
+ .enum(["strict", "balanced", "permissive"])
2816
+ .optional()
2817
+ .describe("Approval policy override"),
2818
+ mcpServers: z
2819
+ .array(MCP_SERVER_ENUM)
2820
+ .default(["sqry"])
2821
+ .describe("MCP servers exposed to Claude"),
2822
+ strictMcpConfig: z
2823
+ .boolean()
2824
+ .default(false)
2825
+ .describe("Restrict Claude to provided MCP config only"),
2826
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2827
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2828
+ idleTimeoutMs: z
2829
+ .number()
2830
+ .int()
2831
+ .min(30_000)
2832
+ .max(3_600_000)
2833
+ .optional()
2834
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2835
+ forceRefresh: z
2836
+ .boolean()
2837
+ .default(false)
2838
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2839
+ }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
2840
+ if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
2841
+ return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
2842
+ }
2843
+ const prep = prepareClaudeRequest({
2844
+ prompt,
2845
+ model,
2846
+ outputFormat,
2847
+ allowedTools,
2848
+ disallowedTools,
2849
+ dangerouslySkipPermissions,
2850
+ permissionMode,
2851
+ approvalStrategy,
2852
+ approvalPolicy,
2853
+ mcpServers,
2854
+ strictMcpConfig,
2855
+ correlationId,
2856
+ optimizePrompt,
2857
+ operation: "claude_request_async",
2858
+ agent,
2859
+ agents,
2860
+ forkSession,
2861
+ systemPrompt,
2862
+ appendSystemPrompt,
2863
+ maxBudgetUsd,
2864
+ maxTurns,
2865
+ effort,
2866
+ excludeDynamicSystemPromptSections,
2867
+ }, runtime);
2868
+ if (!("args" in prep))
2869
+ return prep;
2870
+ const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
2871
+ try {
2872
+ // Session management (before job start for async)
2873
+ let effectiveSessionId = sessionId;
2874
+ let useContinue = continueSession;
2875
+ const activeSession = await sessionManager.getActiveSession("claude");
2876
+ if (!createNewSession && !continueSession && !sessionId && activeSession) {
2877
+ effectiveSessionId = activeSession.id;
2878
+ useContinue = true;
2879
+ }
2880
+ if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
2881
+ useContinue = true;
2882
+ }
2883
+ if (useContinue) {
2884
+ args.push("--continue");
2885
+ }
2886
+ else if (effectiveSessionId) {
2887
+ args.push("--session-id", effectiveSessionId);
2888
+ await sessionManager.updateSessionUsage(effectiveSessionId);
2889
+ }
2890
+ if (effectiveSessionId) {
2891
+ const existingSession = await sessionManager.getSession(effectiveSessionId);
2892
+ if (!existingSession) {
2893
+ await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
2894
+ }
2895
+ }
2896
+ // Idle timeout only for stream-json (text/json produce no output until done)
2897
+ const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
2898
+ const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat, forceRefresh);
2899
+ logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
2900
+ const asyncResponse = {
2901
+ success: true,
2902
+ job,
2903
+ sessionId: effectiveSessionId || activeSession?.id || null,
2904
+ approval: approvalDecision,
2905
+ mcpServers: {
2906
+ requested: requestedMcpServers,
2907
+ enabled: mcpConfig?.enabled,
2908
+ missing: mcpConfig?.missing,
2909
+ },
2910
+ };
2911
+ if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
2912
+ asyncResponse.reviewIntegrity = prep.reviewIntegrity;
2913
+ }
2914
+ return {
2915
+ content: [
2916
+ {
2917
+ type: "text",
2918
+ text: JSON.stringify(asyncResponse, null, 2),
2919
+ },
2920
+ ],
2921
+ };
2922
+ }
2923
+ catch (error) {
2924
+ return createErrorResponse("claude_request_async", 1, "", corrId, error);
2925
+ }
2926
+ });
2927
+ server.tool("codex_request_async", {
2928
+ prompt: z
2929
+ .string()
2930
+ .min(1, "Prompt cannot be empty")
2931
+ .max(100000, "Prompt too long (max 100k chars)")
2932
+ .describe("Prompt text for Codex"),
2933
+ model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
2934
+ fullAuto: z
2935
+ .boolean()
2936
+ .default(false)
2937
+ .describe("DEPRECATED: prefer `sandboxMode` + `askForApproval`. Expands to `--sandbox workspace-write --ask-for-approval never`."),
2938
+ sandboxMode: z
2939
+ .enum(CODEX_SANDBOX_MODES)
2940
+ .optional()
2941
+ .describe("Codex --sandbox: read-only|workspace-write|danger-full-access."),
2942
+ askForApproval: z
2943
+ .enum(CODEX_ASK_FOR_APPROVAL_MODES)
2944
+ .optional()
2945
+ .describe("Codex --ask-for-approval: untrusted|on-request|never."),
2946
+ useLegacyFullAutoFlag: z
2947
+ .boolean()
2948
+ .default(false)
2949
+ .describe("Escape hatch: emit `--full-auto` directly (deprecated)."),
2950
+ dangerouslyBypassApprovalsAndSandbox: z
2951
+ .boolean()
2952
+ .default(false)
2953
+ .describe("Run Codex without approvals/sandbox"),
2954
+ approvalStrategy: z
2955
+ .enum(["legacy", "mcp_managed"])
2956
+ .default("legacy")
2957
+ .describe("Approval strategy"),
2958
+ approvalPolicy: z
2959
+ .enum(["strict", "balanced", "permissive"])
2960
+ .optional()
2961
+ .describe("Approval policy override"),
2962
+ mcpServers: z
2963
+ .array(MCP_SERVER_ENUM)
2964
+ .default(["sqry"])
2965
+ .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
2966
+ sessionId: z
2967
+ .string()
2968
+ .optional()
2969
+ .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
2970
+ resumeLatest: z
2971
+ .boolean()
2972
+ .default(false)
2973
+ .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
2974
+ createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
2975
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2976
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2977
+ idleTimeoutMs: z
2978
+ .number()
2979
+ .int()
2980
+ .min(30_000)
2981
+ .max(3_600_000)
2982
+ .optional()
2983
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2984
+ forceRefresh: z
2985
+ .boolean()
2986
+ .default(false)
2987
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2988
+ // U23: emit `--json` to enable JSONL event-stream parsing for token usage.
2989
+ outputFormat: z
2990
+ .enum(["text", "json"])
2991
+ .default("text")
2992
+ .describe("Codex output format. `json` emits --json (JSONL events) for token usage extraction."),
2993
+ // U26: high-impact feature flags. All optional.
2994
+ outputSchema: z
2995
+ .union([z.string(), z.record(z.unknown())])
2996
+ .optional()
2997
+ .describe("Codex --output-schema. Pass a path (string) or an inline JSON Schema object."),
2998
+ search: z.boolean().optional().describe("Emit Codex --search to enable web search."),
2999
+ profile: z.string().optional().describe("Codex --profile <name>."),
3000
+ configOverrides: CODEX_CONFIG_OVERRIDES_SCHEMA.describe("Codex -c key=value overrides. Keys: /^[a-zA-Z0-9._]+$/. Values: no CR/LF."),
3001
+ ephemeral: z.boolean().optional().describe("Codex --ephemeral."),
3002
+ images: z.array(z.string()).optional().describe("Codex -i <path>: image attachments."),
3003
+ ignoreUserConfig: z.boolean().optional().describe("Codex --ignore-user-config."),
3004
+ ignoreRules: z.boolean().optional().describe("Codex --ignore-rules."),
3005
+ }, async ({ prompt, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
3006
+ return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3007
+ prompt,
3008
+ model,
3009
+ fullAuto,
3010
+ sandboxMode,
3011
+ askForApproval,
3012
+ useLegacyFullAutoFlag,
3013
+ dangerouslyBypassApprovalsAndSandbox,
3014
+ approvalStrategy,
3015
+ approvalPolicy,
3016
+ mcpServers,
3017
+ sessionId,
3018
+ resumeLatest,
3019
+ createNewSession,
3020
+ correlationId,
3021
+ optimizePrompt,
3022
+ idleTimeoutMs,
3023
+ forceRefresh,
3024
+ outputFormat,
3025
+ outputSchema,
3026
+ search,
3027
+ profile,
3028
+ configOverrides,
3029
+ ephemeral,
3030
+ images,
3031
+ ignoreUserConfig,
3032
+ ignoreRules,
3033
+ });
3034
+ });
3035
+ server.tool("gemini_request_async", {
3036
+ prompt: z
3037
+ .string()
3038
+ .min(1, "Prompt cannot be empty")
3039
+ .max(100000, "Prompt too long (max 100k chars)")
3040
+ .describe("Prompt text for Gemini"),
3041
+ model: z
3042
+ .string()
3043
+ .optional()
3044
+ .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
3045
+ sessionId: z
3046
+ .string()
3047
+ .optional()
3048
+ .describe("Session ID (user-provided CLI handle for --resume)"),
3049
+ resumeLatest: z.boolean().default(false).describe("Resume latest session"),
3050
+ createNewSession: z.boolean().default(false).describe("Force new session"),
3051
+ approvalMode: z
3052
+ .enum(GEMINI_APPROVAL_MODES)
3053
+ .optional()
3054
+ .describe("Approval: default|auto_edit|yolo|plan"),
3055
+ approvalStrategy: z
3056
+ .enum(["legacy", "mcp_managed"])
3057
+ .default("legacy")
3058
+ .describe("Approval strategy"),
3059
+ approvalPolicy: z
3060
+ .enum(["strict", "balanced", "permissive"])
3061
+ .optional()
3062
+ .describe("Approval policy override"),
3063
+ mcpServers: z
3064
+ .array(MCP_SERVER_ENUM)
3065
+ .default(["sqry"])
3066
+ .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
3067
+ allowedTools: z
3068
+ .array(z.string())
3069
+ .optional()
3070
+ .describe("Allowed tools (['Write','Edit','Bash'])"),
3071
+ includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
3072
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3073
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3074
+ idleTimeoutMs: z
3075
+ .number()
3076
+ .int()
3077
+ .min(30_000)
3078
+ .max(3_600_000)
3079
+ .optional()
3080
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3081
+ forceRefresh: z
3082
+ .boolean()
3083
+ .default(false)
3084
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3085
+ // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
3086
+ // remains text so existing callers see no behavior change.
3087
+ outputFormat: z
3088
+ .enum(["text", "json"])
3089
+ .default("text")
3090
+ .describe("Gemini output format. `json` emits `-o json` so usageMetadata is parsed and reported."),
3091
+ sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
3092
+ policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
3093
+ adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
3094
+ attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
3095
+ }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
3096
+ return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3097
+ prompt,
3098
+ model,
3099
+ sessionId,
3100
+ resumeLatest,
3101
+ createNewSession,
3102
+ approvalMode,
3103
+ approvalStrategy,
3104
+ approvalPolicy,
3105
+ mcpServers,
3106
+ allowedTools,
3107
+ includeDirs,
3108
+ correlationId,
3109
+ optimizePrompt,
3110
+ idleTimeoutMs,
3111
+ forceRefresh,
3112
+ outputFormat,
3113
+ sandbox,
3114
+ policyFiles,
3115
+ adminPolicyFiles,
3116
+ attachments,
3117
+ });
3118
+ });
3119
+ server.tool("grok_request_async", {
3120
+ prompt: z
3121
+ .string()
3122
+ .min(1, "Prompt cannot be empty")
3123
+ .max(100000, "Prompt too long (max 100k chars)")
3124
+ .describe("Prompt text for Grok"),
3125
+ model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
3126
+ outputFormat: z
3127
+ .enum(["plain", "json", "streaming-json"])
3128
+ .optional()
3129
+ .describe("Output format (plain|json|streaming-json). Grok default is plain."),
3130
+ sessionId: z
3131
+ .string()
3132
+ .optional()
3133
+ .describe("Session ID (user-provided CLI handle for --resume)"),
3134
+ resumeLatest: z
3135
+ .boolean()
3136
+ .default(false)
3137
+ .describe("Resume most recent Grok session in cwd (--continue)"),
3138
+ createNewSession: z.boolean().default(false).describe("Force new session"),
3139
+ alwaysApprove: z
3140
+ .boolean()
3141
+ .default(false)
3142
+ .describe("Auto-approve all tool executions (--always-approve)"),
3143
+ permissionMode: z
3144
+ .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
3145
+ .optional()
3146
+ .describe("Grok permission mode"),
3147
+ effort: z
3148
+ .enum(["low", "medium", "high", "xhigh", "max"])
3149
+ .optional()
3150
+ .describe("Grok effort level"),
3151
+ reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
3152
+ approvalStrategy: z
3153
+ .enum(["legacy", "mcp_managed"])
3154
+ .default("legacy")
3155
+ .describe("Approval strategy"),
3156
+ approvalPolicy: z
3157
+ .enum(["strict", "balanced", "permissive"])
3158
+ .optional()
3159
+ .describe("Approval policy override"),
3160
+ mcpServers: z
3161
+ .array(MCP_SERVER_ENUM)
3162
+ .default(["sqry"])
3163
+ .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
3164
+ allowedTools: z
3165
+ .array(z.string())
3166
+ .optional()
3167
+ .describe("Allowed built-in tools (passed as --tools comma list)"),
3168
+ disallowedTools: z
3169
+ .array(z.string())
3170
+ .optional()
3171
+ .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
3172
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3173
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3174
+ idleTimeoutMs: z
3175
+ .number()
3176
+ .int()
3177
+ .min(30_000)
3178
+ .max(3_600_000)
3179
+ .optional()
3180
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3181
+ forceRefresh: z
3182
+ .boolean()
3183
+ .default(false)
3184
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3185
+ }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3186
+ return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3187
+ prompt,
3188
+ model,
3189
+ outputFormat,
3190
+ sessionId,
3191
+ resumeLatest,
3192
+ createNewSession,
3193
+ alwaysApprove,
3194
+ permissionMode,
3195
+ effort,
3196
+ reasoningEffort,
3197
+ approvalStrategy,
3198
+ approvalPolicy,
3199
+ mcpServers,
3200
+ allowedTools,
3201
+ disallowedTools,
3202
+ correlationId,
3203
+ optimizePrompt,
3204
+ idleTimeoutMs,
3205
+ forceRefresh,
3206
+ });
3207
+ });
3208
+ server.tool("mistral_request_async", {
3209
+ prompt: z
3210
+ .string()
3211
+ .min(1, "Prompt cannot be empty")
3212
+ .max(100000, "Prompt too long (max 100k chars)")
3213
+ .describe("Prompt text for Mistral Vibe"),
3214
+ model: z
3215
+ .string()
3216
+ .optional()
3217
+ .describe("Model alias (resolved into VIBE_ACTIVE_MODEL env var — Vibe has no --model flag)"),
3218
+ outputFormat: z
3219
+ .enum(["plain", "json", "stream-json"])
3220
+ .optional()
3221
+ .describe("Output format (plain|json|stream-json). Vibe default is plain."),
3222
+ sessionId: z
3223
+ .string()
3224
+ .optional()
3225
+ .describe("Session ID (user-provided CLI handle for --resume). Requires [session_logging] enabled = true in ~/.vibe/config.toml."),
3226
+ resumeLatest: z
3227
+ .boolean()
3228
+ .default(false)
3229
+ .describe("Resume most recent Vibe session in cwd (--continue)"),
3230
+ createNewSession: z.boolean().default(false).describe("Force new session"),
3231
+ permissionMode: z
3232
+ .enum(MISTRAL_AGENT_MODES)
3233
+ .optional()
3234
+ .describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
3235
+ effort: z
3236
+ .enum(["low", "medium", "high", "xhigh", "max"])
3237
+ .optional()
3238
+ .describe("Vibe effort level"),
3239
+ reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
3240
+ approvalStrategy: z
3241
+ .enum(["legacy", "mcp_managed"])
3242
+ .default("legacy")
3243
+ .describe("Approval strategy"),
3244
+ approvalPolicy: z
3245
+ .enum(["strict", "balanced", "permissive"])
3246
+ .optional()
3247
+ .describe("Approval policy override"),
3248
+ mcpServers: z
3249
+ .array(MCP_SERVER_ENUM)
3250
+ .default(["sqry"])
3251
+ .describe("MCP server names for approval tracking (Vibe manages its own MCP config via `vibe mcp`)"),
3252
+ allowedTools: z
3253
+ .array(z.string())
3254
+ .optional()
3255
+ .describe("Allowlist of built-in tools — each emitted as a separate --enabled-tools <tool> flag"),
3256
+ disallowedTools: z
3257
+ .array(z.string())
3258
+ .optional()
3259
+ .describe("Accepted for caller parity; Vibe has no deny-list flag, so values are ignored (a warning is logged)."),
3260
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3261
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3262
+ idleTimeoutMs: z
3263
+ .number()
3264
+ .int()
3265
+ .min(30_000)
3266
+ .max(3_600_000)
3267
+ .optional()
3268
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3269
+ forceRefresh: z
3270
+ .boolean()
3271
+ .default(false)
3272
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3273
+ }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3274
+ return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3275
+ prompt,
3276
+ model,
3277
+ outputFormat,
3278
+ sessionId,
3279
+ resumeLatest,
3280
+ createNewSession,
3281
+ permissionMode,
3282
+ effort,
3283
+ reasoningEffort,
3284
+ approvalStrategy,
3285
+ approvalPolicy,
3286
+ mcpServers,
3287
+ allowedTools,
3288
+ disallowedTools,
3289
+ correlationId,
3290
+ optimizePrompt,
3291
+ idleTimeoutMs,
3292
+ forceRefresh,
3293
+ });
3294
+ });
3295
+ server.tool("llm_job_status", {
3296
+ jobId: z.string().describe("Async job ID from *_request_async"),
3297
+ }, async ({ jobId }) => {
3298
+ const job = asyncJobManager.getJobSnapshot(jobId);
3299
+ if (!job) {
3300
+ return {
3301
+ content: [
3302
+ {
3303
+ type: "text",
3304
+ text: JSON.stringify({
3305
+ success: false,
3306
+ error: "Job not found",
3307
+ jobId,
3308
+ }, null, 2),
3309
+ },
3310
+ ],
3311
+ isError: true,
3312
+ };
3313
+ }
1570
3314
  return {
1571
3315
  content: [
1572
3316
  {
1573
3317
  type: "text",
1574
3318
  text: JSON.stringify({
1575
- success: false,
1576
- error: "Job not found",
1577
- jobId,
3319
+ success: true,
3320
+ job,
1578
3321
  }, null, 2),
1579
3322
  },
1580
3323
  ],
1581
- isError: true,
1582
3324
  };
1583
- }
1584
- return {
1585
- content: [
1586
- {
1587
- type: "text",
1588
- text: JSON.stringify({
1589
- success: true,
1590
- job,
1591
- }, null, 2),
1592
- },
1593
- ],
1594
- };
1595
- });
1596
- server.tool("llm_job_result", {
1597
- jobId: z.string().describe("Async job ID from *_request_async"),
1598
- maxChars: z
1599
- .number()
1600
- .int()
1601
- .min(1000)
1602
- .max(2000000)
1603
- .default(200000)
1604
- .describe("Max chars returned per stream"),
1605
- }, async ({ jobId, maxChars }) => {
1606
- const result = asyncJobManager.getJobResult(jobId, maxChars);
1607
- if (!result) {
3325
+ });
3326
+ server.tool("llm_job_result", {
3327
+ jobId: z.string().describe("Async job ID from *_request_async"),
3328
+ maxChars: z
3329
+ .number()
3330
+ .int()
3331
+ .min(1000)
3332
+ .max(2000000)
3333
+ .default(200000)
3334
+ .describe("Max chars returned per stream"),
3335
+ }, async ({ jobId, maxChars }) => {
3336
+ const result = asyncJobManager.getJobResult(jobId, maxChars);
3337
+ if (!result) {
3338
+ return {
3339
+ content: [
3340
+ {
3341
+ type: "text",
3342
+ text: JSON.stringify({
3343
+ success: false,
3344
+ error: "Job not found",
3345
+ jobId,
3346
+ }, null, 2),
3347
+ },
3348
+ ],
3349
+ isError: true,
3350
+ };
3351
+ }
3352
+ // Parse stream-json output for Claude async jobs
3353
+ const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
3354
+ let parsed;
3355
+ if (outputFormat === "stream-json" && result.stdout) {
3356
+ parsed = parseStreamJson(result.stdout);
3357
+ }
1608
3358
  return {
1609
3359
  content: [
1610
3360
  {
1611
3361
  type: "text",
1612
3362
  text: JSON.stringify({
1613
- success: false,
1614
- error: "Job not found",
1615
- jobId,
3363
+ success: true,
3364
+ result,
3365
+ ...(parsed
3366
+ ? {
3367
+ parsed: {
3368
+ text: parsed.text,
3369
+ costUsd: parsed.costUsd,
3370
+ usage: parsed.usage,
3371
+ model: parsed.model,
3372
+ numTurns: parsed.numTurns,
3373
+ },
3374
+ }
3375
+ : {}),
1616
3376
  }, null, 2),
1617
3377
  },
1618
3378
  ],
1619
- isError: true,
1620
3379
  };
1621
- }
1622
- // Parse stream-json output for Claude async jobs
1623
- const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
1624
- let parsed;
1625
- if (outputFormat === "stream-json" && result.stdout) {
1626
- parsed = parseStreamJson(result.stdout);
1627
- }
1628
- return {
1629
- content: [
1630
- {
1631
- type: "text",
1632
- text: JSON.stringify({
1633
- success: true,
1634
- result,
1635
- ...(parsed
1636
- ? {
1637
- parsed: {
1638
- text: parsed.text,
1639
- costUsd: parsed.costUsd,
1640
- usage: parsed.usage,
1641
- model: parsed.model,
1642
- numTurns: parsed.numTurns,
1643
- },
1644
- }
1645
- : {}),
1646
- }, null, 2),
1647
- },
1648
- ],
1649
- };
1650
- });
1651
- server.tool("llm_job_cancel", {
1652
- jobId: z.string().describe("Async job ID from *_request_async"),
1653
- }, async ({ jobId }) => {
1654
- const cancel = asyncJobManager.cancelJob(jobId);
1655
- if (!cancel.canceled) {
3380
+ });
3381
+ server.tool("llm_job_cancel", {
3382
+ jobId: z.string().describe("Async job ID from *_request_async"),
3383
+ }, async ({ jobId }) => {
3384
+ const cancel = asyncJobManager.cancelJob(jobId);
3385
+ if (!cancel.canceled) {
3386
+ return {
3387
+ content: [
3388
+ {
3389
+ type: "text",
3390
+ text: JSON.stringify({
3391
+ success: false,
3392
+ jobId,
3393
+ reason: cancel.reason || "Unable to cancel",
3394
+ }, null, 2),
3395
+ },
3396
+ ],
3397
+ isError: true,
3398
+ };
3399
+ }
1656
3400
  return {
1657
3401
  content: [
1658
3402
  {
1659
3403
  type: "text",
1660
3404
  text: JSON.stringify({
1661
- success: false,
3405
+ success: true,
1662
3406
  jobId,
1663
- reason: cancel.reason || "Unable to cancel",
1664
3407
  }, null, 2),
1665
3408
  },
1666
3409
  ],
1667
- isError: true,
1668
3410
  };
1669
- }
1670
- return {
1671
- content: [
1672
- {
1673
- type: "text",
1674
- text: JSON.stringify({
1675
- success: true,
1676
- jobId,
1677
- }, null, 2),
1678
- },
1679
- ],
1680
- };
1681
- });
1682
- server.tool("llm_process_health", {}, async () => {
1683
- const health = asyncJobManager.getJobHealth();
1684
- return {
1685
- content: [
1686
- {
1687
- type: "text",
1688
- text: JSON.stringify({ success: true, ...health }, null, 2),
1689
- },
1690
- ],
1691
- };
1692
- });
1693
- //──────────────────────────────────────────────────────────────────────────────
1694
- // Approval Audit Tools
1695
- //──────────────────────────────────────────────────────────────────────────────
1696
- server.tool("approval_list", {
1697
- limit: z.number().int().min(1).max(500).default(50).describe("Max number of approval records"),
1698
- cli: z.enum(["claude", "codex", "gemini"]).optional().describe("Optional CLI filter"),
1699
- }, async ({ limit, cli }) => {
1700
- const approvals = approvalManager.list(limit, cli);
1701
- return {
1702
- content: [
1703
- {
1704
- type: "text",
1705
- text: JSON.stringify({
1706
- success: true,
1707
- count: approvals.length,
1708
- approvals,
1709
- }, null, 2),
1710
- },
1711
- ],
1712
- };
1713
- });
1714
- //──────────────────────────────────────────────────────────────────────────────
1715
- // List Models Tool
1716
- //──────────────────────────────────────────────────────────────────────────────
1717
- server.tool("list_models", {
1718
- cli: z
1719
- .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini"]).optional())
1720
- .describe("CLI filter (claude|codex|gemini)"),
1721
- }, async ({ cli }) => {
1722
- const cliInfo = getCliInfo();
1723
- const result = cli ? { [cli]: cliInfo[cli] } : cliInfo;
1724
- return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
1725
- });
1726
- //──────────────────────────────────────────────────────────────────────────────
1727
- // Session Management Tools
1728
- //──────────────────────────────────────────────────────────────────────────────
1729
- server.tool("session_create", {
1730
- cli: z.enum(["claude", "codex", "gemini"]).describe("CLI type (claude|codex|gemini)"),
1731
- description: z.string().optional().describe("Session description"),
1732
- setAsActive: z.boolean().default(true).describe("Set as active session"),
1733
- }, async ({ cli, description, setAsActive }) => {
1734
- try {
1735
- const session = await sessionManager.createSession(cli, description);
1736
- if (setAsActive) {
1737
- await sessionManager.setActiveSession(cli, session.id);
1738
- }
1739
- logger.info(`Created new ${cli} session: ${session.id}`);
3411
+ });
3412
+ server.tool("llm_process_health", {}, async () => {
3413
+ const health = asyncJobManager.getJobHealth();
1740
3414
  return {
1741
3415
  content: [
1742
3416
  {
1743
3417
  type: "text",
1744
- text: JSON.stringify({
1745
- success: true,
1746
- session: {
1747
- id: session.id,
1748
- cli: session.cli,
1749
- description: session.description,
1750
- createdAt: session.createdAt,
1751
- isActive: setAsActive,
1752
- },
1753
- }, null, 2),
3418
+ text: JSON.stringify({ success: true, ...health }, null, 2),
1754
3419
  },
1755
3420
  ],
1756
3421
  };
1757
- }
1758
- catch (error) {
1759
- return createErrorResponse("session_create", 1, "", undefined, error);
1760
- }
1761
- });
1762
- server.tool("session_list", {
1763
- cli: z
1764
- .enum(["claude", "codex", "gemini"])
1765
- .optional()
1766
- .describe("CLI filter (claude|codex|gemini)"),
1767
- }, async ({ cli }) => {
1768
- try {
1769
- const sessions = await sessionManager.listSessions(cli);
1770
- const activeSessions = {
1771
- claude: await sessionManager.getActiveSession("claude"),
1772
- codex: await sessionManager.getActiveSession("codex"),
1773
- gemini: await sessionManager.getActiveSession("gemini"),
1774
- };
1775
- const sessionList = sessions.map(s => ({
1776
- id: s.id,
1777
- cli: s.cli,
1778
- description: s.description,
1779
- createdAt: s.createdAt,
1780
- lastUsedAt: s.lastUsedAt,
1781
- isActive: activeSessions[s.cli]?.id === s.id,
1782
- }));
3422
+ });
3423
+ //──────────────────────────────────────────────────────────────────────────────
3424
+ // Approval Audit Tools
3425
+ //──────────────────────────────────────────────────────────────────────────────
3426
+ server.tool("approval_list", {
3427
+ limit: z
3428
+ .number()
3429
+ .int()
3430
+ .min(1)
3431
+ .max(500)
3432
+ .default(50)
3433
+ .describe("Max number of approval records"),
3434
+ cli: z
3435
+ .enum(["claude", "codex", "gemini", "grok", "mistral"])
3436
+ .optional()
3437
+ .describe("Optional CLI filter"),
3438
+ }, async ({ limit, cli }) => {
3439
+ const approvals = approvalManager.list(limit, cli);
1783
3440
  return {
1784
3441
  content: [
1785
3442
  {
1786
3443
  type: "text",
1787
3444
  text: JSON.stringify({
1788
- total: sessionList.length,
1789
- sessions: sessionList,
1790
- activeSessions: {
1791
- claude: activeSessions.claude?.id || null,
1792
- codex: activeSessions.codex?.id || null,
1793
- gemini: activeSessions.gemini?.id || null,
1794
- },
3445
+ success: true,
3446
+ count: approvals.length,
3447
+ approvals,
1795
3448
  }, null, 2),
1796
3449
  },
1797
3450
  ],
1798
3451
  };
1799
- }
1800
- catch (error) {
1801
- return createErrorResponse("session_list", 1, "", undefined, error);
1802
- }
1803
- });
1804
- server.tool("session_set_active", {
1805
- cli: z.enum(["claude", "codex", "gemini"]).describe("CLI type (claude|codex|gemini)"),
1806
- sessionId: z.string().nullable().describe("Session ID (null to clear)"),
1807
- }, async ({ cli, sessionId }) => {
1808
- try {
1809
- const success = await sessionManager.setActiveSession(cli, sessionId || null);
1810
- if (!success) {
3452
+ });
3453
+ //──────────────────────────────────────────────────────────────────────────────
3454
+ // List Models Tool
3455
+ //──────────────────────────────────────────────────────────────────────────────
3456
+ server.tool("list_models", {
3457
+ cli: z
3458
+ .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
3459
+ .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
3460
+ }, async ({ cli }) => {
3461
+ const cliInfo = getCliInfo();
3462
+ const result = cli ? { [cli]: cliInfo[cli] } : cliInfo;
3463
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
3464
+ });
3465
+ server.tool("cli_versions", {
3466
+ cli: z
3467
+ .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
3468
+ .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
3469
+ }, async ({ cli }) => {
3470
+ const versions = await getCliVersions(cli);
3471
+ return { content: [{ type: "text", text: JSON.stringify({ versions }, null, 2) }] };
3472
+ });
3473
+ server.tool("cli_upgrade", {
3474
+ cli: z.enum(["claude", "codex", "gemini", "grok", "mistral"]).describe("CLI to upgrade"),
3475
+ target: z
3476
+ .string()
3477
+ .min(1)
3478
+ .default("latest")
3479
+ .describe("Package tag/version/target to install (default: latest)"),
3480
+ dryRun: z
3481
+ .boolean()
3482
+ .default(true)
3483
+ .describe("When true, return the upgrade plan without running it"),
3484
+ timeoutMs: z
3485
+ .number()
3486
+ .int()
3487
+ .min(30_000)
3488
+ .max(3_600_000)
3489
+ .optional()
3490
+ .describe("Upgrade timeout in ms when dryRun=false"),
3491
+ }, async ({ cli, target, dryRun, timeoutMs }) => {
3492
+ try {
3493
+ const result = await runCliUpgrade({ cli, target, dryRun, timeoutMs, logger });
1811
3494
  return {
1812
3495
  content: [
1813
3496
  {
1814
3497
  type: "text",
1815
3498
  text: JSON.stringify({
1816
- success: false,
1817
- error: "Session not found or does not belong to the specified CLI",
3499
+ success: true,
3500
+ ...result,
1818
3501
  }, null, 2),
1819
3502
  },
1820
3503
  ],
1821
- isError: true,
1822
3504
  };
1823
3505
  }
1824
- logger.info(`Set active ${cli} session to: ${sessionId}`);
1825
- return {
1826
- content: [
1827
- {
1828
- type: "text",
1829
- text: JSON.stringify({
1830
- success: true,
1831
- cli,
1832
- activeSessionId: sessionId,
1833
- }, null, 2),
1834
- },
1835
- ],
1836
- };
1837
- }
1838
- catch (error) {
1839
- return createErrorResponse("session_set_active", 1, "", undefined, error);
1840
- }
1841
- });
1842
- server.tool("session_delete", {
1843
- sessionId: z.string().describe("Session ID"),
1844
- }, async ({ sessionId }) => {
1845
- try {
1846
- const session = await sessionManager.getSession(sessionId);
1847
- if (!session) {
3506
+ catch (error) {
3507
+ const message = error instanceof Error ? error.message : String(error);
1848
3508
  return {
1849
3509
  content: [
1850
3510
  {
1851
3511
  type: "text",
1852
3512
  text: JSON.stringify({
1853
3513
  success: false,
1854
- error: "Session not found",
3514
+ error: message,
1855
3515
  }, null, 2),
1856
3516
  },
1857
3517
  ],
1858
3518
  isError: true,
1859
3519
  };
1860
3520
  }
1861
- const success = await sessionManager.deleteSession(sessionId);
1862
- logger.info(`Deleted session: ${sessionId}`);
1863
- return {
1864
- content: [
1865
- {
1866
- type: "text",
1867
- text: JSON.stringify({
1868
- success,
1869
- deletedSession: {
1870
- id: session.id,
1871
- cli: session.cli,
1872
- description: session.description,
3521
+ });
3522
+ //──────────────────────────────────────────────────────────────────────────────
3523
+ // Session Management Tools
3524
+ //──────────────────────────────────────────────────────────────────────────────
3525
+ server.tool("session_create", {
3526
+ cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
3527
+ description: z.string().optional().describe("Session description"),
3528
+ setAsActive: z.boolean().default(true).describe("Set as active session"),
3529
+ }, async ({ cli, description, setAsActive }) => {
3530
+ try {
3531
+ const session = await sessionManager.createSession(cli, description);
3532
+ if (setAsActive) {
3533
+ await sessionManager.setActiveSession(cli, session.id);
3534
+ }
3535
+ logger.info(`Created new ${cli} session: ${session.id}`);
3536
+ return {
3537
+ content: [
3538
+ {
3539
+ type: "text",
3540
+ text: JSON.stringify({
3541
+ success: true,
3542
+ session: {
3543
+ id: session.id,
3544
+ cli: session.cli,
3545
+ description: session.description,
3546
+ createdAt: session.createdAt,
3547
+ isActive: setAsActive,
3548
+ },
3549
+ }, null, 2),
3550
+ },
3551
+ ],
3552
+ };
3553
+ }
3554
+ catch (error) {
3555
+ return createErrorResponse("session_create", 1, "", undefined, error);
3556
+ }
3557
+ });
3558
+ server.tool("session_list", {
3559
+ cli: SESSION_PROVIDER_ENUM.optional().describe("CLI filter (claude|codex|gemini|grok|mistral)"),
3560
+ }, async ({ cli }) => {
3561
+ try {
3562
+ const sessions = await sessionManager.listSessions(cli);
3563
+ const activeSessions = {
3564
+ claude: await sessionManager.getActiveSession("claude"),
3565
+ codex: await sessionManager.getActiveSession("codex"),
3566
+ gemini: await sessionManager.getActiveSession("gemini"),
3567
+ grok: await sessionManager.getActiveSession("grok"),
3568
+ mistral: await sessionManager.getActiveSession("mistral"),
3569
+ };
3570
+ const sessionList = sessions.map(s => ({
3571
+ id: s.id,
3572
+ cli: s.cli,
3573
+ description: s.description,
3574
+ createdAt: s.createdAt,
3575
+ lastUsedAt: s.lastUsedAt,
3576
+ isActive: activeSessions[s.cli]?.id === s.id,
3577
+ }));
3578
+ return {
3579
+ content: [
3580
+ {
3581
+ type: "text",
3582
+ text: JSON.stringify({
3583
+ total: sessionList.length,
3584
+ sessions: sessionList,
3585
+ activeSessions: {
3586
+ claude: activeSessions.claude?.id || null,
3587
+ codex: activeSessions.codex?.id || null,
3588
+ gemini: activeSessions.gemini?.id || null,
3589
+ grok: activeSessions.grok?.id || null,
3590
+ mistral: activeSessions.mistral?.id || null,
3591
+ },
3592
+ }, null, 2),
3593
+ },
3594
+ ],
3595
+ };
3596
+ }
3597
+ catch (error) {
3598
+ return createErrorResponse("session_list", 1, "", undefined, error);
3599
+ }
3600
+ });
3601
+ server.tool("session_set_active", {
3602
+ cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
3603
+ sessionId: z.string().nullable().describe("Session ID (null to clear)"),
3604
+ }, async ({ cli, sessionId }) => {
3605
+ try {
3606
+ const success = await sessionManager.setActiveSession(cli, sessionId || null);
3607
+ if (!success) {
3608
+ return {
3609
+ content: [
3610
+ {
3611
+ type: "text",
3612
+ text: JSON.stringify({
3613
+ success: false,
3614
+ error: "Session not found or does not belong to the specified CLI",
3615
+ }, null, 2),
1873
3616
  },
1874
- }, null, 2),
1875
- },
1876
- ],
1877
- };
1878
- }
1879
- catch (error) {
1880
- return createErrorResponse("session_delete", 1, "", undefined, error);
1881
- }
1882
- });
1883
- server.tool("session_get", {
1884
- sessionId: z.string().describe("Session ID"),
1885
- }, async ({ sessionId }) => {
1886
- try {
1887
- const session = await sessionManager.getSession(sessionId);
1888
- if (!session) {
3617
+ ],
3618
+ isError: true,
3619
+ };
3620
+ }
3621
+ logger.info(`Set active ${cli} session to: ${sessionId}`);
1889
3622
  return {
1890
3623
  content: [
1891
3624
  {
1892
3625
  type: "text",
1893
3626
  text: JSON.stringify({
1894
- success: false,
1895
- error: "Session not found",
3627
+ success: true,
3628
+ cli,
3629
+ activeSessionId: sessionId,
1896
3630
  }, null, 2),
1897
3631
  },
1898
3632
  ],
1899
- isError: true,
1900
3633
  };
1901
3634
  }
1902
- const activeSession = await sessionManager.getActiveSession(session.cli);
1903
- return {
1904
- content: [
1905
- {
1906
- type: "text",
1907
- text: JSON.stringify({
1908
- success: true,
1909
- session: {
1910
- ...session,
1911
- isActive: activeSession?.id === session.id,
3635
+ catch (error) {
3636
+ return createErrorResponse("session_set_active", 1, "", undefined, error);
3637
+ }
3638
+ });
3639
+ server.tool("session_delete", {
3640
+ sessionId: z.string().describe("Session ID"),
3641
+ }, async ({ sessionId }) => {
3642
+ try {
3643
+ const session = await sessionManager.getSession(sessionId);
3644
+ if (!session) {
3645
+ return {
3646
+ content: [
3647
+ {
3648
+ type: "text",
3649
+ text: JSON.stringify({
3650
+ success: false,
3651
+ error: "Session not found",
3652
+ }, null, 2),
1912
3653
  },
1913
- }, null, 2),
1914
- },
1915
- ],
1916
- };
1917
- }
1918
- catch (error) {
1919
- return createErrorResponse("session_get", 1, "", undefined, error);
1920
- }
1921
- });
1922
- server.tool("session_clear_all", {
1923
- cli: z
1924
- .enum(["claude", "codex", "gemini"])
1925
- .optional()
1926
- .describe("CLI filter (claude|codex|gemini)"),
1927
- }, async ({ cli }) => {
1928
- try {
1929
- const count = await sessionManager.clearAllSessions(cli);
1930
- logger.info(`Cleared ${count} sessions${cli ? ` for ${cli}` : ""}`);
1931
- return {
1932
- content: [
1933
- {
1934
- type: "text",
1935
- text: JSON.stringify({
1936
- success: true,
1937
- deletedCount: count,
1938
- cli: cli || "all",
1939
- }, null, 2),
1940
- },
1941
- ],
1942
- };
1943
- }
1944
- catch (error) {
1945
- return createErrorResponse("session_clear_all", 1, "", undefined, error);
1946
- }
1947
- });
3654
+ ],
3655
+ isError: true,
3656
+ };
3657
+ }
3658
+ const success = await sessionManager.deleteSession(sessionId);
3659
+ logger.info(`Deleted session: ${sessionId}`);
3660
+ return {
3661
+ content: [
3662
+ {
3663
+ type: "text",
3664
+ text: JSON.stringify({
3665
+ success,
3666
+ deletedSession: {
3667
+ id: session.id,
3668
+ cli: session.cli,
3669
+ description: session.description,
3670
+ },
3671
+ }, null, 2),
3672
+ },
3673
+ ],
3674
+ };
3675
+ }
3676
+ catch (error) {
3677
+ return createErrorResponse("session_delete", 1, "", undefined, error);
3678
+ }
3679
+ });
3680
+ server.tool("session_get", {
3681
+ sessionId: z.string().describe("Session ID"),
3682
+ }, async ({ sessionId }) => {
3683
+ try {
3684
+ const session = await sessionManager.getSession(sessionId);
3685
+ if (!session) {
3686
+ return {
3687
+ content: [
3688
+ {
3689
+ type: "text",
3690
+ text: JSON.stringify({
3691
+ success: false,
3692
+ error: "Session not found",
3693
+ }, null, 2),
3694
+ },
3695
+ ],
3696
+ isError: true,
3697
+ };
3698
+ }
3699
+ const activeSession = await sessionManager.getActiveSession(session.cli);
3700
+ return {
3701
+ content: [
3702
+ {
3703
+ type: "text",
3704
+ text: JSON.stringify({
3705
+ success: true,
3706
+ session: {
3707
+ ...session,
3708
+ isActive: activeSession?.id === session.id,
3709
+ },
3710
+ }, null, 2),
3711
+ },
3712
+ ],
3713
+ };
3714
+ }
3715
+ catch (error) {
3716
+ return createErrorResponse("session_get", 1, "", undefined, error);
3717
+ }
3718
+ });
3719
+ server.tool("session_clear_all", {
3720
+ cli: SESSION_PROVIDER_ENUM.optional().describe("CLI filter (claude|codex|gemini|grok|mistral)"),
3721
+ }, async ({ cli }) => {
3722
+ try {
3723
+ const count = await sessionManager.clearAllSessions(cli);
3724
+ logger.info(`Cleared ${count} sessions${cli ? ` for ${cli}` : ""}`);
3725
+ return {
3726
+ content: [
3727
+ {
3728
+ type: "text",
3729
+ text: JSON.stringify({
3730
+ success: true,
3731
+ deletedCount: count,
3732
+ cli: cli || "all",
3733
+ }, null, 2),
3734
+ },
3735
+ ],
3736
+ };
3737
+ }
3738
+ catch (error) {
3739
+ return createErrorResponse("session_clear_all", 1, "", undefined, error);
3740
+ }
3741
+ });
3742
+ return server;
3743
+ }
1948
3744
  //──────────────────────────────────────────────────────────────────────────────
1949
3745
  // Async Initialization
1950
3746
  //──────────────────────────────────────────────────────────────────────────────
@@ -1967,7 +3763,7 @@ async function initializeSessionManager() {
1967
3763
  //──────────────────────────────────────────────────────────────────────────────
1968
3764
  // Health Check Resource (only if using PostgreSQL)
1969
3765
  //──────────────────────────────────────────────────────────────────────────────
1970
- function registerHealthResource() {
3766
+ function registerHealthResource(server) {
1971
3767
  if (db) {
1972
3768
  server.registerResource("health", "health://status", {
1973
3769
  title: "🏥 Health Status",
@@ -2015,8 +3811,16 @@ async function shutdown(signal) {
2015
3811
  // Kill all active process groups (SIGTERM → wait 3s → SIGKILL)
2016
3812
  await killAllProcessGroups();
2017
3813
  logger.info("All process groups terminated");
2018
- await server.close();
2019
- logger.info("MCP server closed");
3814
+ if (activeHttpGateway) {
3815
+ await activeHttpGateway.close();
3816
+ logger.info("HTTP MCP transport closed");
3817
+ activeHttpGateway = null;
3818
+ }
3819
+ if (activeServer) {
3820
+ await activeServer.close();
3821
+ logger.info("MCP server closed");
3822
+ activeServer = null;
3823
+ }
2020
3824
  if (db) {
2021
3825
  await db.disconnect();
2022
3826
  logger.info("Database connections closed");
@@ -2036,13 +3840,52 @@ process.on("SIGINT", () => shutdown("SIGINT"));
2036
3840
  // Server Startup
2037
3841
  //──────────────────────────────────────────────────────────────────────────────
2038
3842
  async function main() {
2039
- logger.info("Starting llm-cli-gateway MCP server");
3843
+ const args = process.argv.slice(2);
3844
+ if (args[0] === "doctor") {
3845
+ if (args.includes("--json")) {
3846
+ printDoctorJson();
3847
+ return;
3848
+ }
3849
+ process.stderr.write("Only doctor --json is supported in this layer.\n");
3850
+ process.exit(2);
3851
+ }
3852
+ const transportArg = args.find(arg => arg.startsWith("--transport="));
3853
+ const transportMode = transportArg?.split("=")[1] ||
3854
+ process.env.LLM_GATEWAY_TRANSPORT ||
3855
+ process.env.MCP_TRANSPORT ||
3856
+ "stdio";
3857
+ logger.info(`Starting llm-cli-gateway MCP server with ${transportMode} transport`);
2040
3858
  // Initialize session manager first
2041
3859
  await initializeSessionManager();
3860
+ const serverDeps = {
3861
+ sessionManager,
3862
+ resourceProvider,
3863
+ db,
3864
+ performanceMetrics,
3865
+ asyncJobManager,
3866
+ approvalManager,
3867
+ flightRecorder,
3868
+ logger,
3869
+ };
3870
+ if (transportMode === "http") {
3871
+ activeHttpGateway = await startHttpGateway({
3872
+ deps: serverDeps,
3873
+ createGatewayServer,
3874
+ logger,
3875
+ });
3876
+ logger.info(`llm-cli-gateway HTTP MCP server connected and ready at ${activeHttpGateway.url}`);
3877
+ return;
3878
+ }
3879
+ if (transportMode !== "stdio") {
3880
+ throw new Error(`Unsupported transport: ${transportMode}`);
3881
+ }
3882
+ activeServer = createGatewayServer({
3883
+ ...serverDeps,
3884
+ });
2042
3885
  // Register health check resource if using PostgreSQL
2043
- registerHealthResource();
3886
+ registerHealthResource(activeServer);
2044
3887
  const transport = new StdioServerTransport();
2045
- await server.connect(transport);
3888
+ await activeServer.connect(transport);
2046
3889
  logger.info("llm-cli-gateway MCP server connected and ready");
2047
3890
  }
2048
3891
  // Guard: only auto-start when run directly (not imported for testing)