llm-cli-gateway 1.4.0 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +67 -1
  2. package/README.md +111 -8
  3. package/dist/approval-manager.d.ts +1 -1
  4. package/dist/async-job-manager.d.ts +24 -2
  5. package/dist/async-job-manager.js +71 -7
  6. package/dist/auth.d.ts +15 -0
  7. package/dist/auth.js +46 -0
  8. package/dist/cli-updater.d.ts +19 -2
  9. package/dist/cli-updater.js +110 -7
  10. package/dist/codex-json-parser.d.ts +34 -0
  11. package/dist/codex-json-parser.js +105 -0
  12. package/dist/doctor.d.ts +110 -0
  13. package/dist/doctor.js +280 -0
  14. package/dist/endpoint-exposure.d.ts +22 -0
  15. package/dist/endpoint-exposure.js +231 -0
  16. package/dist/executor.d.ts +2 -0
  17. package/dist/executor.js +2 -2
  18. package/dist/flight-recorder.d.ts +3 -1
  19. package/dist/flight-recorder.js +31 -2
  20. package/dist/gateway-server.d.ts +2 -0
  21. package/dist/gateway-server.js +1 -0
  22. package/dist/gemini-json-parser.d.ts +21 -0
  23. package/dist/gemini-json-parser.js +47 -0
  24. package/dist/health.d.ts +7 -0
  25. package/dist/health.js +22 -0
  26. package/dist/http-transport.d.ts +22 -0
  27. package/dist/http-transport.js +164 -0
  28. package/dist/index.d.ts +183 -2
  29. package/dist/index.js +2629 -1411
  30. package/dist/logger.d.ts +9 -0
  31. package/dist/logger.js +14 -0
  32. package/dist/model-registry.js +40 -6
  33. package/dist/provider-login-guidance.d.ts +21 -0
  34. package/dist/provider-login-guidance.js +98 -0
  35. package/dist/provider-status.d.ts +41 -0
  36. package/dist/provider-status.js +203 -0
  37. package/dist/request-helpers.d.ts +484 -4
  38. package/dist/request-helpers.js +613 -0
  39. package/dist/resources.js +44 -0
  40. package/dist/session-manager-pg.js +1 -0
  41. package/dist/session-manager.d.ts +1 -1
  42. package/dist/session-manager.js +2 -1
  43. package/dist/validation-normalizer.d.ts +23 -0
  44. package/dist/validation-normalizer.js +79 -0
  45. package/dist/validation-orchestrator.d.ts +47 -0
  46. package/dist/validation-orchestrator.js +145 -0
  47. package/dist/validation-prompts.d.ts +15 -0
  48. package/dist/validation-prompts.js +52 -0
  49. package/dist/validation-report.d.ts +57 -0
  50. package/dist/validation-report.js +129 -0
  51. package/dist/validation-tools.d.ts +7 -0
  52. package/dist/validation-tools.js +198 -0
  53. package/package.json +15 -5
  54. package/setup/status.schema.json +271 -0
package/dist/index.js CHANGED
@@ -8,6 +8,8 @@ import { fileURLToPath } from "url";
8
8
  import { z } from "zod";
9
9
  import { executeCli, killAllProcessGroups } from "./executor.js";
10
10
  import { parseStreamJson } from "./stream-json-parser.js";
11
+ import { parseCodexJsonStream } from "./codex-json-parser.js";
12
+ import { parseGeminiJson } from "./gemini-json-parser.js";
11
13
  import { createSessionManager } from "./session-manager.js";
12
14
  import { ResourceProvider } from "./resources.js";
13
15
  import { PerformanceMetrics } from "./metrics.js";
@@ -20,14 +22,20 @@ import { JobStore, resolveJobStoreDbPath } from "./job-store.js";
20
22
  import { ApprovalManager } from "./approval-manager.js";
21
23
  import { checkReviewIntegrity } from "./review-integrity.js";
22
24
  import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES, } from "./claude-mcp-config.js";
23
- import { resolveSessionResumeArgs, resolveGrokSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, GATEWAY_SESSION_PREFIX, } from "./request-helpers.js";
25
+ import { resolveGrokSessionArgs, resolveMistralSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, prepareMistralRequest as buildMistralCliInvocation, MISTRAL_AGENT_MODES, GATEWAY_SESSION_PREFIX, resolveClaudePermissionFlags, resolveCodexSandboxFlags, CLAUDE_PERMISSION_MODES, GEMINI_APPROVAL_MODES, CODEX_SANDBOX_MODES, CODEX_ASK_FOR_APPROVAL_MODES, CLAUDE_EFFORT_LEVELS, prepareClaudeHighImpactFlags, validateClaudeAgentsMap, prepareCodexHighImpactFlags, prepareCodexForkRequest, CODEX_CONFIG_OVERRIDES_SCHEMA, prepareGeminiHighImpactFlags, prependGeminiAttachments, resolveGeminiSessionPlan, GEMINI_HIGH_IMPACT_PARAMS_SCHEMA, } from "./request-helpers.js";
24
26
  import { createFlightRecorder } from "./flight-recorder.js";
25
27
  import { getCliVersions, runCliUpgrade } from "./cli-updater.js";
28
+ import { startHttpGateway } from "./http-transport.js";
29
+ import { printDoctorJson } from "./doctor.js";
30
+ import { registerValidationTools } from "./validation-tools.js";
26
31
  // Simple logger that writes to stderr (stdout is used for MCP protocol)
27
32
  const logger = {
28
33
  info: (message, ...args) => {
29
34
  console.error(`[INFO] ${new Date().toISOString()} - ${message}`, ...args);
30
35
  },
36
+ warn: (message, ...args) => {
37
+ console.error(`[WARN] ${new Date().toISOString()} - ${message}`, ...args);
38
+ },
31
39
  error: (message, ...args) => {
32
40
  console.error(`[ERROR] ${new Date().toISOString()} - ${message}`, ...args);
33
41
  },
@@ -90,20 +98,23 @@ const loadedSkills = loadSkills();
90
98
  // system prompt at connection time. Covers key patterns + pointers to L2 resources.
91
99
  const SERVER_INSTRUCTIONS = `llm-cli-gateway: Multi-LLM orchestration via MCP.
92
100
 
93
- Tools: claude_request, codex_request, gemini_request, grok_request (sync) | *_request_async (async)
101
+ Tools: claude_request, codex_request, gemini_request, grok_request, mistral_request (sync) | *_request_async (async)
102
+ Validation: validate_with_models, second_opinion, compare_answers, red_team_review, consensus_check, ask_model, synthesize_validation
94
103
  Jobs: llm_job_status, llm_job_result, llm_job_cancel
95
104
  Sessions: session_create, session_list, session_set_active, session_get, session_delete, session_clear_all
96
105
  Other: list_models, cli_versions, cli_upgrade, approval_list, llm_process_health
97
106
 
98
107
  Key behaviors:
99
108
  - Sync auto-defers at ${SYNC_DEADLINE_MS}ms. Poll deferred jobs via llm_job_status/llm_job_result.
100
- - Sessions: Claude --continue, Gemini --resume, Grok --resume/--continue, Codex \`exec resume <ID>\` / \`exec resume --last\` (all real CLI continuity). For Codex, sessionId must be a real Codex UUID (from ~/.codex/sessions/); gateway-generated gw-* IDs are rejected.
109
+ - Sessions: Claude --continue, Gemini --resume, Grok --resume/--continue, Mistral --resume/--continue (requires session_logging.enabled=true in ~/.vibe/config.toml), Codex \`exec resume <ID>\` / \`exec resume --last\` (all real CLI continuity). For Codex, sessionId must be a real Codex UUID (from ~/.codex/sessions/); gateway-generated gw-* IDs are rejected.
101
110
  - Approval gates: opt-in via approvalStrategy:"mcp_managed".
102
111
  - Idle timeout kills stuck processes (default 10min, configurable via idleTimeoutMs).
103
112
 
104
113
  Skills (full docs via MCP resources):
105
114
  ${loadedSkills.map(s => `- skills://${s.name} — ${s.description}`).join("\n")}`;
106
- const server = new McpServer({ name: "llm-cli-gateway", version: "1.0.0" }, { instructions: SERVER_INSTRUCTIONS });
115
+ function newGatewayMcpServer() {
116
+ return new McpServer({ name: "llm-cli-gateway", version: "1.0.0" }, { instructions: SERVER_INSTRUCTIONS });
117
+ }
107
118
  // Global state (initialized asynchronously)
108
119
  let sessionManager;
109
120
  let db = null;
@@ -127,11 +138,48 @@ const jobStore = (() => {
127
138
  return null;
128
139
  }
129
140
  })();
130
- const asyncJobManager = new AsyncJobManager(logger, (cli, durationMs, success) => {
131
- performanceMetrics.recordRequest(cli, durationMs, success);
132
- }, jobStore);
141
+ function newAsyncJobManager(metrics, runtimeLogger, store = jobStore) {
142
+ return new AsyncJobManager(runtimeLogger, (cli, durationMs, success) => {
143
+ metrics.recordRequest(cli, durationMs, success);
144
+ }, store);
145
+ }
146
+ const asyncJobManager = newAsyncJobManager(performanceMetrics, logger);
133
147
  const approvalManager = new ApprovalManager(undefined, logger);
134
148
  const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
149
+ // U22: Session-provider enum extended to five providers. The storage layer's
150
+ // CLI_TYPES already includes "mistral"; the MCP-tool layer mirrors that here so
151
+ // session_create / session_list / session_clear_all accept the fifth provider.
152
+ export const SESSION_PROVIDER_VALUES = ["claude", "codex", "gemini", "grok", "mistral"];
153
+ export const SESSION_PROVIDER_ENUM = z.enum(SESSION_PROVIDER_VALUES);
154
+ let activeServer = null;
155
+ let activeHttpGateway = null;
156
+ function resolveGatewayServerRuntime(deps = {}, options = {}) {
157
+ const runtimeLogger = deps.logger ?? logger;
158
+ const runtimeSessionManager = deps.sessionManager ?? sessionManager;
159
+ const runtimePerformanceMetrics = deps.performanceMetrics ??
160
+ (options.isolateState ? new PerformanceMetrics() : performanceMetrics);
161
+ const runtimeAsyncJobManager = deps.asyncJobManager ??
162
+ (options.isolateState
163
+ ? // Factory-created test/HTTP session servers must not mark another instance's
164
+ // durable jobs orphaned. Stdio startup injects the process-global manager.
165
+ newAsyncJobManager(runtimePerformanceMetrics, runtimeLogger, null)
166
+ : asyncJobManager);
167
+ const runtimeApprovalManager = deps.approvalManager ??
168
+ (options.isolateState ? new ApprovalManager(undefined, runtimeLogger) : approvalManager);
169
+ return {
170
+ sessionManager: runtimeSessionManager,
171
+ resourceProvider: deps.resourceProvider ??
172
+ (options.isolateState
173
+ ? new ResourceProvider(runtimeSessionManager, runtimePerformanceMetrics)
174
+ : resourceProvider),
175
+ db: "db" in deps ? (deps.db ?? null) : db,
176
+ performanceMetrics: runtimePerformanceMetrics,
177
+ asyncJobManager: runtimeAsyncJobManager,
178
+ approvalManager: runtimeApprovalManager,
179
+ flightRecorder: deps.flightRecorder ?? flightRecorder,
180
+ logger: runtimeLogger,
181
+ };
182
+ }
135
183
  // Per-CLI idle timeouts: kill process if no stdout/stderr activity for this duration.
136
184
  // Claude idle timeout only applies in stream-json mode (with --include-partial-messages).
137
185
  // In text/json mode, Claude produces no output until done, so idle timeout would false-positive.
@@ -140,6 +188,7 @@ const CLI_IDLE_TIMEOUTS = {
140
188
  codex: 600_000, // 10 minutes — Codex streams stderr progress
141
189
  gemini: 600_000, // 10 minutes — Gemini streams stdout in real-time
142
190
  grok: 600_000, // 10 minutes — Grok streams stderr/stdout activity in headless mode
191
+ mistral: 600_000, // 10 minutes — Vibe streams stdout/stderr in headless mode
143
192
  };
144
193
  function resolveIdleTimeout(cli, override) {
145
194
  if (override !== undefined)
@@ -151,27 +200,70 @@ const SYNC_POLL_INTERVAL_MS = 1_000;
151
200
  * Start an async job and poll until completion or deadline.
152
201
  * Returns the job result if it finishes in time, or a deferral marker.
153
202
  */
154
- async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh) {
203
+ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete) {
204
+ // U26 fix: ownership of onComplete is a contract. Once this function returns
205
+ // OR throws, the caller MUST consider onComplete consumed — i.e. it has
206
+ // either been run, or the AsyncJobManager has taken ownership of it. The
207
+ // caller never needs to reclaim.
208
+ let onCompleteOwnedByCaller = onComplete !== undefined;
209
+ const consumeOnComplete = () => {
210
+ if (!onCompleteOwnedByCaller || !onComplete)
211
+ return;
212
+ onCompleteOwnedByCaller = false;
213
+ try {
214
+ onComplete();
215
+ }
216
+ catch (err) {
217
+ runtime.logger.error(`awaitJobOrDefer onComplete (${cli}) threw`, err);
218
+ }
219
+ };
155
220
  if (SYNC_DEADLINE_MS === 0) {
156
221
  // Disabled — fall through to direct execution.
157
222
  // Note: direct execution bypasses dedup. forceRefresh is implied.
158
- return executeCli(cli, args, { idleTimeout: idleTimeoutMs, logger });
223
+ const command = cli === "mistral" ? "vibe" : cli;
224
+ try {
225
+ return await executeCli(command, args, {
226
+ idleTimeout: idleTimeoutMs,
227
+ logger: runtime.logger,
228
+ env: env ? { ...process.env, ...env } : undefined,
229
+ });
230
+ }
231
+ finally {
232
+ // Direct-execution path completes inline; release per-request resources
233
+ // (e.g. outputSchema temp files) here.
234
+ consumeOnComplete();
235
+ }
236
+ }
237
+ let outcome;
238
+ try {
239
+ outcome = runtime.asyncJobManager.startJobWithDedup(cli, args, corrId, {
240
+ idleTimeoutMs,
241
+ outputFormat,
242
+ forceRefresh,
243
+ env,
244
+ onComplete,
245
+ });
246
+ // Handoff succeeded: AsyncJobManager owns onComplete (it'll fire via
247
+ // fireOnComplete on terminal status, or run inline immediately for dedup).
248
+ onCompleteOwnedByCaller = false;
249
+ }
250
+ catch (err) {
251
+ // Spawn or pre-spawn failure inside AsyncJobManager. The record was never
252
+ // registered, so onComplete will never be called by the manager. Reclaim
253
+ // here so the temp file is not leaked.
254
+ consumeOnComplete();
255
+ throw err;
159
256
  }
160
- const outcome = asyncJobManager.startJobWithDedup(cli, args, corrId, {
161
- idleTimeoutMs,
162
- outputFormat,
163
- forceRefresh,
164
- });
165
257
  const job = outcome.snapshot;
166
258
  if (outcome.deduped) {
167
- logger.info(`[${corrId}] sync request deduped onto running job ${job.id} (original corrId=${outcome.originalCorrelationId})`);
259
+ runtime.logger.info(`[${corrId}] sync request deduped onto running job ${job.id} (original corrId=${outcome.originalCorrelationId})`);
168
260
  }
169
261
  const deadline = Date.now() + SYNC_DEADLINE_MS;
170
262
  while (Date.now() < deadline) {
171
- const snapshot = asyncJobManager.getJobSnapshot(job.id);
263
+ const snapshot = runtime.asyncJobManager.getJobSnapshot(job.id);
172
264
  if (snapshot && snapshot.status !== "running") {
173
265
  // Job finished within deadline — extract result
174
- const result = asyncJobManager.getJobResult(job.id);
266
+ const result = runtime.asyncJobManager.getJobResult(job.id);
175
267
  if (!result) {
176
268
  return { stdout: "", stderr: "Job result unavailable", code: 1 };
177
269
  }
@@ -184,7 +276,7 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, f
184
276
  await new Promise(resolve => setTimeout(resolve, SYNC_POLL_INTERVAL_MS));
185
277
  }
186
278
  // Deadline exceeded — return deferral
187
- logger.info(`[${corrId}] ${cli} sync deadline exceeded (${SYNC_DEADLINE_MS}ms), deferring to async job ${job.id}`);
279
+ runtime.logger.info(`[${corrId}] ${cli} sync deadline exceeded (${SYNC_DEADLINE_MS}ms), deferring to async job ${job.id}`);
188
280
  return {
189
281
  deferred: true,
190
282
  jobId: job.id,
@@ -262,28 +354,60 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
262
354
  function extractUsageAndCost(cli, output, outputFormat) {
263
355
  if (cli === "claude" && outputFormat === "stream-json") {
264
356
  const parsed = parseStreamJson(output);
357
+ if (!parsed.usage) {
358
+ return { costUsd: parsed.costUsd ?? undefined };
359
+ }
265
360
  return {
266
- inputTokens: parsed.usage?.inputTokens,
267
- outputTokens: parsed.usage?.outputTokens,
361
+ inputTokens: parsed.usage.inputTokens,
362
+ outputTokens: parsed.usage.outputTokens,
363
+ cacheReadTokens: parsed.usage.cacheReadInputTokens || undefined,
364
+ cacheCreationTokens: parsed.usage.cacheCreationInputTokens || undefined,
268
365
  costUsd: parsed.costUsd ?? undefined,
269
366
  };
270
367
  }
368
+ if (cli === "codex" && outputFormat === "json") {
369
+ const parsed = parseCodexJsonStream(output);
370
+ if (!parsed.usage) {
371
+ return {};
372
+ }
373
+ return {
374
+ inputTokens: parsed.usage.input_tokens,
375
+ outputTokens: parsed.usage.output_tokens,
376
+ cacheReadTokens: parsed.usage.cache_read_tokens,
377
+ cacheCreationTokens: parsed.usage.cache_creation_tokens,
378
+ costUsd: parsed.usage.cost_usd,
379
+ };
380
+ }
381
+ if (cli === "gemini" && outputFormat === "json") {
382
+ const parsed = parseGeminiJson(output);
383
+ if (!parsed || !parsed.usage) {
384
+ return {};
385
+ }
386
+ return {
387
+ inputTokens: parsed.usage.input_tokens,
388
+ outputTokens: parsed.usage.output_tokens,
389
+ cacheReadTokens: parsed.usage.cache_read_tokens,
390
+ };
391
+ }
392
+ // Mistral/Vibe: does not surface usage in its stdout/stream-json output. A
393
+ // future unit can read it from `~/.vibe/logs/session/<id>/metadata.json`
394
+ // once we resolve the session id post-run.
271
395
  return {};
272
396
  }
273
- function safeFlightStart(entry) {
397
+ function safeFlightStart(entry, runtime = resolveGatewayServerRuntime()) {
274
398
  try {
275
- flightRecorder.logStart(entry);
399
+ runtime.flightRecorder.logStart(entry);
276
400
  }
277
401
  catch (error) {
278
- logger.error("Flight recorder logStart failed", error);
402
+ runtime.logger.error("Flight recorder logStart failed", error);
279
403
  }
280
404
  }
281
- function safeFlightComplete(correlationId, result) {
405
+ function safeFlightComplete(correlationId, result, runtime = resolveGatewayServerRuntime()) {
282
406
  try {
283
- flightRecorder.logComplete(correlationId, result);
407
+ runtime.flightRecorder.logComplete(correlationId, result);
284
408
  }
285
409
  catch (error) {
286
- logger.error("Flight recorder logComplete failed", error);
410
+ runtime.logger.error("Flight recorder logComplete failed", error);
287
411
  }
288
412
  }
289
413
  function createApprovalDeniedResponse(operation, decision) {
@@ -350,124 +474,146 @@ function resolveClaudeMcpConfig(operation, correlationId, requestedMcpServers, s
350
474
  //──────────────────────────────────────────────────────────────────────────────
351
475
  // MCP Resources
352
476
  //──────────────────────────────────────────────────────────────────────────────
353
- // Register skill resources (L2: full docs, read on demand)
354
- for (const skill of loadedSkills) {
355
- server.registerResource(`skill-${skill.name}`, `skills://${skill.name}`, {
356
- title: skill.name,
357
- description: skill.description,
358
- mimeType: "text/markdown",
359
- }, async () => ({
360
- contents: [
361
- {
362
- uri: `skills://${skill.name}`,
363
- mimeType: "text/markdown",
364
- text: skill.content,
365
- },
366
- ],
367
- }));
477
+ function registerBaseResources(server, runtime) {
478
+ // Register skill resources (L2: full docs, read on demand)
479
+ for (const skill of loadedSkills) {
480
+ server.registerResource(`skill-${skill.name}`, `skills://${skill.name}`, {
481
+ title: skill.name,
482
+ description: skill.description,
483
+ mimeType: "text/markdown",
484
+ }, async () => ({
485
+ contents: [
486
+ {
487
+ uri: `skills://${skill.name}`,
488
+ mimeType: "text/markdown",
489
+ text: skill.content,
490
+ },
491
+ ],
492
+ }));
493
+ }
494
+ runtime.logger.info(`Registered ${loadedSkills.length} skill resources`);
495
+ // Register all sessions resource
496
+ server.registerResource("all-sessions", "sessions://all", {
497
+ title: "📋 All Sessions",
498
+ description: "All conversation sessions across CLIs",
499
+ mimeType: "application/json",
500
+ }, async (uri) => {
501
+ runtime.logger.debug("Reading all sessions resource");
502
+ const contents = await runtime.resourceProvider.readResource(uri.href);
503
+ return { contents: contents ? [contents] : [] };
504
+ });
505
+ // Register Claude sessions resource
506
+ server.registerResource("claude-sessions", "sessions://claude", {
507
+ title: "🤖 Claude Sessions",
508
+ description: "Claude conversation sessions",
509
+ mimeType: "application/json",
510
+ }, async (uri) => {
511
+ runtime.logger.debug("Reading Claude sessions resource");
512
+ const contents = await runtime.resourceProvider.readResource(uri.href);
513
+ return { contents: contents ? [contents] : [] };
514
+ });
515
+ // Register Codex sessions resource
516
+ server.registerResource("codex-sessions", "sessions://codex", {
517
+ title: "💻 Codex Sessions",
518
+ description: "Codex conversation sessions",
519
+ mimeType: "application/json",
520
+ }, async (uri) => {
521
+ runtime.logger.debug("Reading Codex sessions resource");
522
+ const contents = await runtime.resourceProvider.readResource(uri.href);
523
+ return { contents: contents ? [contents] : [] };
524
+ });
525
+ // Register Gemini sessions resource
526
+ server.registerResource("gemini-sessions", "sessions://gemini", {
527
+ title: "✨ Gemini Sessions",
528
+ description: "Gemini conversation sessions",
529
+ mimeType: "application/json",
530
+ }, async (uri) => {
531
+ runtime.logger.debug("Reading Gemini sessions resource");
532
+ const contents = await runtime.resourceProvider.readResource(uri.href);
533
+ return { contents: contents ? [contents] : [] };
534
+ });
535
+ // Register Grok sessions resource
536
+ server.registerResource("grok-sessions", "sessions://grok", {
537
+ title: "⚡ Grok Sessions",
538
+ description: "Grok conversation sessions",
539
+ mimeType: "application/json",
540
+ }, async (uri) => {
541
+ runtime.logger.debug("Reading Grok sessions resource");
542
+ const contents = await runtime.resourceProvider.readResource(uri.href);
543
+ return { contents: contents ? [contents] : [] };
544
+ });
545
+ // Register Mistral sessions resource
546
+ server.registerResource("mistral-sessions", "sessions://mistral", {
547
+ title: "🌬 Mistral Sessions",
548
+ description: "Mistral Vibe conversation sessions",
549
+ mimeType: "application/json",
550
+ }, async (uri) => {
551
+ runtime.logger.debug("Reading Mistral sessions resource");
552
+ const contents = await runtime.resourceProvider.readResource(uri.href);
553
+ return { contents: contents ? [contents] : [] };
554
+ });
555
+ // Register Claude models resource
556
+ server.registerResource("claude-models", "models://claude", {
557
+ title: "🧠 Claude Models",
558
+ description: "Claude models and capabilities",
559
+ mimeType: "application/json",
560
+ }, async (uri) => {
561
+ runtime.logger.debug("Reading Claude models resource");
562
+ const contents = await runtime.resourceProvider.readResource(uri.href);
563
+ return { contents: contents ? [contents] : [] };
564
+ });
565
+ // Register Codex models resource
566
+ server.registerResource("codex-models", "models://codex", {
567
+ title: "🔧 Codex Models",
568
+ description: "Codex models and capabilities",
569
+ mimeType: "application/json",
570
+ }, async (uri) => {
571
+ runtime.logger.debug("Reading Codex models resource");
572
+ const contents = await runtime.resourceProvider.readResource(uri.href);
573
+ return { contents: contents ? [contents] : [] };
574
+ });
575
+ // Register Gemini models resource
576
+ server.registerResource("gemini-models", "models://gemini", {
577
+ title: "🌟 Gemini Models",
578
+ description: "Gemini models and capabilities",
579
+ mimeType: "application/json",
580
+ }, async (uri) => {
581
+ runtime.logger.debug("Reading Gemini models resource");
582
+ const contents = await runtime.resourceProvider.readResource(uri.href);
583
+ return { contents: contents ? [contents] : [] };
584
+ });
585
+ // Register Grok models resource
586
+ server.registerResource("grok-models", "models://grok", {
587
+ title: "⚡ Grok Models",
588
+ description: "Grok models and capabilities",
589
+ mimeType: "application/json",
590
+ }, async (uri) => {
591
+ runtime.logger.debug("Reading Grok models resource");
592
+ const contents = await runtime.resourceProvider.readResource(uri.href);
593
+ return { contents: contents ? [contents] : [] };
594
+ });
595
+ // Register Mistral models resource
596
+ server.registerResource("mistral-models", "models://mistral", {
597
+ title: "🌬 Mistral Models",
598
+ description: "Mistral Vibe models and capabilities",
599
+ mimeType: "application/json",
600
+ }, async (uri) => {
601
+ runtime.logger.debug("Reading Mistral models resource");
602
+ const contents = await runtime.resourceProvider.readResource(uri.href);
603
+ return { contents: contents ? [contents] : [] };
604
+ });
605
+ // Register performance metrics resource
606
+ server.registerResource("performance-metrics", "metrics://performance", {
607
+ title: "📈 Performance Metrics",
608
+ description: "Request counts, latency, success/failure rates",
609
+ mimeType: "application/json",
610
+ }, async (uri) => {
611
+ runtime.logger.debug("Reading performance metrics resource");
612
+ const contents = await runtime.resourceProvider.readResource(uri.href);
613
+ return { contents: contents ? [contents] : [] };
614
+ });
368
615
  }
369
- logger.info(`Registered ${loadedSkills.length} skill resources`);
370
- // Register all sessions resource
371
- server.registerResource("all-sessions", "sessions://all", {
372
- title: "📋 All Sessions",
373
- description: "All conversation sessions across CLIs",
374
- mimeType: "application/json",
375
- }, async (uri) => {
376
- logger.debug("Reading all sessions resource");
377
- const contents = await resourceProvider.readResource(uri.href);
378
- return { contents: contents ? [contents] : [] };
379
- });
380
- // Register Claude sessions resource
381
- server.registerResource("claude-sessions", "sessions://claude", {
382
- title: "🤖 Claude Sessions",
383
- description: "Claude conversation sessions",
384
- mimeType: "application/json",
385
- }, async (uri) => {
386
- logger.debug("Reading Claude sessions resource");
387
- const contents = await resourceProvider.readResource(uri.href);
388
- return { contents: contents ? [contents] : [] };
389
- });
390
- // Register Codex sessions resource
391
- server.registerResource("codex-sessions", "sessions://codex", {
392
- title: "💻 Codex Sessions",
393
- description: "Codex conversation sessions",
394
- mimeType: "application/json",
395
- }, async (uri) => {
396
- logger.debug("Reading Codex sessions resource");
397
- const contents = await resourceProvider.readResource(uri.href);
398
- return { contents: contents ? [contents] : [] };
399
- });
400
- // Register Gemini sessions resource
401
- server.registerResource("gemini-sessions", "sessions://gemini", {
402
- title: "✨ Gemini Sessions",
403
- description: "Gemini conversation sessions",
404
- mimeType: "application/json",
405
- }, async (uri) => {
406
- logger.debug("Reading Gemini sessions resource");
407
- const contents = await resourceProvider.readResource(uri.href);
408
- return { contents: contents ? [contents] : [] };
409
- });
410
- // Register Grok sessions resource
411
- server.registerResource("grok-sessions", "sessions://grok", {
412
- title: "⚡ Grok Sessions",
413
- description: "Grok conversation sessions",
414
- mimeType: "application/json",
415
- }, async (uri) => {
416
- logger.debug("Reading Grok sessions resource");
417
- const contents = await resourceProvider.readResource(uri.href);
418
- return { contents: contents ? [contents] : [] };
419
- });
420
- // Register Claude models resource
421
- server.registerResource("claude-models", "models://claude", {
422
- title: "🧠 Claude Models",
423
- description: "Claude models and capabilities",
424
- mimeType: "application/json",
425
- }, async (uri) => {
426
- logger.debug("Reading Claude models resource");
427
- const contents = await resourceProvider.readResource(uri.href);
428
- return { contents: contents ? [contents] : [] };
429
- });
430
- // Register Codex models resource
431
- server.registerResource("codex-models", "models://codex", {
432
- title: "🔧 Codex Models",
433
- description: "Codex models and capabilities",
434
- mimeType: "application/json",
435
- }, async (uri) => {
436
- logger.debug("Reading Codex models resource");
437
- const contents = await resourceProvider.readResource(uri.href);
438
- return { contents: contents ? [contents] : [] };
439
- });
440
- // Register Gemini models resource
441
- server.registerResource("gemini-models", "models://gemini", {
442
- title: "🌟 Gemini Models",
443
- description: "Gemini models and capabilities",
444
- mimeType: "application/json",
445
- }, async (uri) => {
446
- logger.debug("Reading Gemini models resource");
447
- const contents = await resourceProvider.readResource(uri.href);
448
- return { contents: contents ? [contents] : [] };
449
- });
450
- // Register Grok models resource
451
- server.registerResource("grok-models", "models://grok", {
452
- title: "⚡ Grok Models",
453
- description: "Grok models and capabilities",
454
- mimeType: "application/json",
455
- }, async (uri) => {
456
- logger.debug("Reading Grok models resource");
457
- const contents = await resourceProvider.readResource(uri.href);
458
- return { contents: contents ? [contents] : [] };
459
- });
460
- // Register performance metrics resource
461
- server.registerResource("performance-metrics", "metrics://performance", {
462
- title: "📈 Performance Metrics",
463
- description: "Request counts, latency, success/failure rates",
464
- mimeType: "application/json",
465
- }, async (uri) => {
466
- logger.debug("Reading performance metrics resource");
467
- const contents = await resourceProvider.readResource(uri.href);
468
- return { contents: contents ? [contents] : [] };
469
- });
470
- function prepareClaudeRequest(params) {
616
+ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRuntime()) {
471
617
  const corrId = params.correlationId || randomUUID();
472
618
  const cliInfo = getCliInfo();
473
619
  const resolvedModel = resolveModelAlias("claude", params.model, cliInfo);
@@ -478,7 +624,7 @@ function prepareClaudeRequest(params) {
478
624
  disallowedTools: params.disallowedTools,
479
625
  });
480
626
  if (reviewIntegrity.violations.length > 0) {
481
- logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
627
+ runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
482
628
  cli: "claude",
483
629
  operation: params.operation,
484
630
  score: reviewIntegrity.totalScore,
@@ -498,7 +644,7 @@ function prepareClaudeRequest(params) {
498
644
  const mcpConfig = mcpConfigResolution.config;
499
645
  let approvalDecision = null;
500
646
  if (params.approvalStrategy === "mcp_managed") {
501
- approvalDecision = approvalManager.decide({
647
+ approvalDecision = runtime.approvalManager.decide({
502
648
  cli: "claude",
503
649
  operation: params.operation,
504
650
  prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
@@ -535,8 +681,15 @@ function prepareClaudeRequest(params) {
535
681
  if (params.approvalStrategy === "mcp_managed") {
536
682
  args.push("--permission-mode", "bypassPermissions");
537
683
  }
538
- else if (params.dangerouslySkipPermissions) {
539
- args.push("--permission-mode", "bypassPermissions");
684
+ else {
685
+ const permFlags = resolveClaudePermissionFlags({
686
+ permissionMode: params.permissionMode,
687
+ dangerouslySkipPermissions: params.dangerouslySkipPermissions,
688
+ });
689
+ if (permFlags.warning) {
690
+ runtime.logger.warn(`[${corrId}] ${permFlags.warning}`);
691
+ }
692
+ args.push(...permFlags.args);
540
693
  }
541
694
  if (params.strictMcpConfig || mcpConfig.enabled.length > 0) {
542
695
  args.push("--mcp-config", mcpConfig.path);
@@ -544,6 +697,26 @@ function prepareClaudeRequest(params) {
544
697
  args.push("--strict-mcp-config");
545
698
  }
546
699
  }
700
+ // U25: Claude high-impact features (agent, agents, fork, system-prompt, budget, effort, …)
701
+ let validatedAgents;
702
+ if (params.agents && Object.keys(params.agents).length > 0) {
703
+ const result = validateClaudeAgentsMap(params.agents);
704
+ if (!result.ok) {
705
+ return createErrorResponse("claude", 1, "", corrId, new Error(result.message));
706
+ }
707
+ validatedAgents = result.value;
708
+ }
709
+ args.push(...prepareClaudeHighImpactFlags({
710
+ agent: params.agent,
711
+ agents: validatedAgents,
712
+ forkSession: params.forkSession,
713
+ systemPrompt: params.systemPrompt,
714
+ appendSystemPrompt: params.appendSystemPrompt,
715
+ maxBudgetUsd: params.maxBudgetUsd,
716
+ maxTurns: params.maxTurns,
717
+ effort: params.effort,
718
+ excludeDynamicSystemPromptSections: params.excludeDynamicSystemPromptSections,
719
+ }));
547
720
  return {
548
721
  corrId,
549
722
  effectivePrompt,
@@ -555,14 +728,14 @@ function prepareClaudeRequest(params) {
555
728
  args,
556
729
  };
557
730
  }
558
- function prepareCodexRequest(params) {
731
+ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntime()) {
559
732
  const corrId = params.correlationId || randomUUID();
560
733
  const cliInfo = getCliInfo();
561
734
  const resolvedModel = resolveModelAlias("codex", params.model, cliInfo);
562
735
  // Review integrity check on raw prompt (before optimization)
563
736
  const reviewIntegrity = checkReviewIntegrity({ prompt: params.prompt });
564
737
  if (reviewIntegrity.violations.length > 0) {
565
- logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
738
+ runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
566
739
  cli: "codex",
567
740
  operation: params.operation,
568
741
  score: reviewIntegrity.totalScore,
@@ -577,7 +750,7 @@ function prepareCodexRequest(params) {
577
750
  const requestedMcpServers = normalizeMcpServers(params.mcpServers);
578
751
  let approvalDecision = null;
579
752
  if (params.approvalStrategy === "mcp_managed") {
580
- approvalDecision = approvalManager.decide({
753
+ approvalDecision = runtime.approvalManager.decide({
581
754
  cli: "codex",
582
755
  operation: params.operation,
583
756
  prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
@@ -615,13 +788,73 @@ function prepareCodexRequest(params) {
615
788
  }
616
789
  if (resolvedModel)
617
790
  args.push("--model", resolvedModel);
618
- if (sessionPlan.mode === "new" && params.fullAuto) {
619
- args.push("--full-auto");
791
+ // Codex sandbox / approval: resolve modern flags + legacy fullAuto shorthand.
792
+ // `codex exec resume` rejects all of these (the original session's policy is
793
+ // inherited), so we only emit them when starting a NEW session.
794
+ if (sessionPlan.mode === "new") {
795
+ const sandboxFlags = resolveCodexSandboxFlags({
796
+ sandboxMode: params.sandboxMode,
797
+ askForApproval: params.askForApproval,
798
+ fullAuto: params.fullAuto,
799
+ useLegacyFullAutoFlag: params.useLegacyFullAutoFlag,
800
+ });
801
+ if (sandboxFlags.warning) {
802
+ runtime.logger.warn(`[${corrId}] ${sandboxFlags.warning}`);
803
+ }
804
+ args.push(...sandboxFlags.args);
620
805
  }
621
806
  if (params.dangerouslyBypassApprovalsAndSandbox) {
622
807
  args.push("--dangerously-bypass-approvals-and-sandbox");
623
808
  }
809
+ // U23 fix: emit `--json` when the caller asked for JSON output so the
810
+ // codex-json-parser actually receives JSONL events. This is what makes
811
+ // extractUsageAndCost() reachable from the tool surface; without it, the
812
+ // U23 parser is dead code.
813
+ if (params.outputFormat === "json") {
814
+ args.push("--json");
815
+ }
624
816
  args.push("--skip-git-repo-check");
817
+ // U26: High-impact feature flags. Some of these (`--output-schema`,
818
+ // `--search`, `-C`, `--add-dir`) are rejected by `codex exec resume`, so we
819
+ // only emit them on a NEW session. Images / ephemeral / profile /
820
+ // ignore-rules / ignore-user-config are allowed on resume per the audited
821
+ // CLI help; we emit them in both branches.
822
+ let highImpactCleanup;
823
+ if (sessionPlan.mode === "new") {
824
+ const high = prepareCodexHighImpactFlags({
825
+ outputSchema: params.outputSchema,
826
+ search: params.search,
827
+ profile: params.profile,
828
+ configOverrides: params.configOverrides,
829
+ ephemeral: params.ephemeral,
830
+ images: params.images,
831
+ ignoreUserConfig: params.ignoreUserConfig,
832
+ ignoreRules: params.ignoreRules,
833
+ });
834
+ if (high.missingImagePath) {
835
+ return createErrorResponse(params.operation, 1, "", corrId, new Error(`images: path does not exist: ${high.missingImagePath}`));
836
+ }
837
+ args.push(...high.args);
838
+ highImpactCleanup = high.cleanup;
839
+ }
840
+ else {
841
+ // On resume, emit only the resume-safe subset (profile, ephemeral,
842
+ // images, ignoreUserConfig, ignoreRules). outputSchema, search, and
843
+ // configOverrides are dropped silently to mirror existing behavior for
844
+ // sandbox/ask-for-approval on resume.
845
+ const high = prepareCodexHighImpactFlags({
846
+ profile: params.profile,
847
+ ephemeral: params.ephemeral,
848
+ images: params.images,
849
+ ignoreUserConfig: params.ignoreUserConfig,
850
+ ignoreRules: params.ignoreRules,
851
+ });
852
+ if (high.missingImagePath) {
853
+ return createErrorResponse(params.operation, 1, "", corrId, new Error(`images: path does not exist: ${high.missingImagePath}`));
854
+ }
855
+ args.push(...high.args);
856
+ highImpactCleanup = high.cleanup;
857
+ }
625
858
  if (sessionPlan.mode === "resume-by-id" && sessionPlan.sessionId) {
626
859
  args.push(sessionPlan.sessionId);
627
860
  }
@@ -634,9 +867,10 @@ function prepareCodexRequest(params) {
634
867
  approvalDecision,
635
868
  reviewIntegrity,
636
869
  args,
870
+ cleanup: highImpactCleanup,
637
871
  };
638
872
  }
639
- function prepareGeminiRequest(params) {
873
+ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRuntime()) {
640
874
  const corrId = params.correlationId || randomUUID();
641
875
  const cliInfo = getCliInfo();
642
876
  const resolvedModel = resolveModelAlias("gemini", params.model, cliInfo);
@@ -646,7 +880,7 @@ function prepareGeminiRequest(params) {
646
880
  allowedTools: params.allowedTools,
647
881
  });
648
882
  if (reviewIntegrity.violations.length > 0) {
649
- logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
883
+ runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
650
884
  cli: "gemini",
651
885
  operation: params.operation,
652
886
  score: reviewIntegrity.totalScore,
@@ -661,7 +895,7 @@ function prepareGeminiRequest(params) {
661
895
  const requestedMcpServers = normalizeMcpServers(params.mcpServers);
662
896
  let approvalDecision = null;
663
897
  if (params.approvalStrategy === "mcp_managed") {
664
- approvalDecision = approvalManager.decide({
898
+ approvalDecision = runtime.approvalManager.decide({
665
899
  cli: "gemini",
666
900
  operation: params.operation,
667
901
  prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
@@ -678,7 +912,29 @@ function prepareGeminiRequest(params) {
678
912
  }
679
913
  }
680
914
  const effectiveApprovalMode = params.approvalStrategy === "mcp_managed" ? "yolo" : params.approvalMode;
681
- const args = [effectivePrompt];
915
+ // U27: Validate high-impact policy paths and prepend attachment tokens
916
+ // BEFORE the `-p` pair is emitted, preserving the U21 ordering invariant.
917
+ const highImpact = prepareGeminiHighImpactFlags({
918
+ sandbox: params.sandbox,
919
+ policyFiles: params.policyFiles,
920
+ adminPolicyFiles: params.adminPolicyFiles,
921
+ });
922
+ if (highImpact.missingPolicyPath) {
923
+ return createErrorResponse(params.operation, 1, "", corrId, new Error(`${highImpact.missingPolicyField}: path does not exist: ${highImpact.missingPolicyPath}`));
924
+ }
925
+ if (params.attachments && params.attachments.length > 0) {
926
+ try {
927
+ effectivePrompt = prependGeminiAttachments(effectivePrompt, params.attachments);
928
+ }
929
+ catch (err) {
930
+ return createErrorResponse(params.operation, 1, "", corrId, err instanceof Error ? err : new Error(String(err)));
931
+ }
932
+ }
933
+ // U21: Emit the prompt via -p/--prompt rather than as a positional argument.
934
+ // Positional prompts depend on Gemini's TTY/mode-detection heuristics; -p is
935
+ // the documented non-interactive flag and is robust against future CLI mode
936
+ // changes.
937
+ const args = ["-p", effectivePrompt];
682
938
  if (resolvedModel)
683
939
  args.push("--model", resolvedModel);
684
940
  if (effectiveApprovalMode)
@@ -695,6 +951,15 @@ function prepareGeminiRequest(params) {
695
951
  sanitizeCliArgValues(params.includeDirs, "includeDirs");
696
952
  params.includeDirs.forEach(dir => args.push("--include-directories", dir));
697
953
  }
954
+ // U27 high-impact flags (-s / --policy / --admin-policy) appended after the
955
+ // existing flag set so positional ordering relative to `-p` is preserved.
956
+ args.push(...highImpact.args);
957
+ // U23 fix: emit `-o json` when the caller asked for JSON output. The Gemini
958
+ // JSON parser is otherwise unreachable from the tool surface and the
959
+ // structured usageMetadata is silently dropped.
960
+ if (params.outputFormat === "json") {
961
+ args.push("-o", "json");
962
+ }
698
963
  return {
699
964
  corrId,
700
965
  effectivePrompt,
@@ -705,7 +970,7 @@ function prepareGeminiRequest(params) {
705
970
  args,
706
971
  };
707
972
  }
708
- function prepareGrokRequest(params) {
973
+ function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime()) {
709
974
  const corrId = params.correlationId || randomUUID();
710
975
  const cliInfo = getCliInfo();
711
976
  const resolvedModel = resolveModelAlias("grok", params.model, cliInfo);
@@ -716,7 +981,7 @@ function prepareGrokRequest(params) {
716
981
  disallowedTools: params.disallowedTools,
717
982
  });
718
983
  if (reviewIntegrity.violations.length > 0) {
719
- logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
984
+ runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
720
985
  cli: "grok",
721
986
  operation: params.operation,
722
987
  score: reviewIntegrity.totalScore,
@@ -731,7 +996,7 @@ function prepareGrokRequest(params) {
731
996
  const requestedMcpServers = normalizeMcpServers(params.mcpServers);
732
997
  let approvalDecision = null;
733
998
  if (params.approvalStrategy === "mcp_managed") {
734
- approvalDecision = approvalManager.decide({
999
+ approvalDecision = runtime.approvalManager.decide({
735
1000
  cli: "grok",
736
1001
  operation: params.operation,
737
1002
  prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
@@ -780,6 +1045,78 @@ function prepareGrokRequest(params) {
780
1045
  args,
781
1046
  };
782
1047
  }
1048
+ function prepareMistralRequest(params, runtime = resolveGatewayServerRuntime()) {
1049
+ const corrId = params.correlationId || randomUUID();
1050
+ const cliInfo = getCliInfo();
1051
+ const resolvedModel = resolveModelAlias("mistral", params.model, cliInfo) || "devstral-medium";
1052
+ const reviewIntegrity = checkReviewIntegrity({
1053
+ prompt: params.prompt,
1054
+ allowedTools: params.allowedTools,
1055
+ disallowedTools: params.disallowedTools,
1056
+ });
1057
+ if (reviewIntegrity.violations.length > 0) {
1058
+ runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
1059
+ cli: "mistral",
1060
+ operation: params.operation,
1061
+ score: reviewIntegrity.totalScore,
1062
+ });
1063
+ }
1064
+ let effectivePrompt = params.prompt;
1065
+ if (params.optimizePrompt) {
1066
+ const optimized = optimizePromptText(effectivePrompt);
1067
+ logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
1068
+ effectivePrompt = optimized;
1069
+ }
1070
+ const requestedMcpServers = normalizeMcpServers(params.mcpServers);
1071
+ let approvalDecision = null;
1072
+ if (params.approvalStrategy === "mcp_managed") {
1073
+ approvalDecision = runtime.approvalManager.decide({
1074
+ cli: "mistral",
1075
+ operation: params.operation,
1076
+ prompt: params.prompt,
1077
+ bypassRequested: params.permissionMode === "auto-approve",
1078
+ fullAuto: false,
1079
+ requestedMcpServers,
1080
+ allowedTools: params.allowedTools,
1081
+ disallowedTools: params.disallowedTools,
1082
+ policy: params.approvalPolicy,
1083
+ metadata: { model: resolvedModel, vibeActiveModelEnv: true },
1084
+ reviewIntegrity,
1085
+ });
1086
+ if (approvalDecision.status !== "approved") {
1087
+ return createApprovalDeniedResponse(params.operation, approvalDecision);
1088
+ }
1089
+ }
1090
+ // Under mcp_managed, force --agent auto-approve so the approval gate's
1091
+ // verdict carries through to the CLI invocation (mirrors Grok's --always-approve
1092
+ // forcing under mcp_managed).
1093
+ const effectivePermissionMode = params.approvalStrategy === "mcp_managed"
1094
+ ? "auto-approve"
1095
+ : (params.permissionMode ?? "auto-approve");
1096
+ const prep = buildMistralCliInvocation({
1097
+ prompt: effectivePrompt,
1098
+ resolvedModel,
1099
+ outputFormat: params.outputFormat,
1100
+ permissionMode: effectivePermissionMode,
1101
+ effort: params.effort,
1102
+ reasoningEffort: params.reasoningEffort,
1103
+ allowedTools: params.allowedTools,
1104
+ disallowedTools: params.disallowedTools,
1105
+ });
1106
+ if (prep.ignoredDisallowedTools) {
1107
+ runtime.logger.info(`[${corrId}] Mistral does not support disallowedTools; ignoring (caller passed ${params.disallowedTools?.length ?? 0} entries)`);
1108
+ }
1109
+ return {
1110
+ corrId,
1111
+ effectivePrompt,
1112
+ resolvedModel,
1113
+ requestedMcpServers,
1114
+ approvalDecision,
1115
+ reviewIntegrity,
1116
+ args: prep.args,
1117
+ mistralEnv: prep.env,
1118
+ };
1119
+ }
783
1120
  function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat) {
784
1121
  let finalStdout = stdout;
785
1122
  // Skip response optimization for JSON output to prevent corrupting structured data
@@ -831,7 +1168,26 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
831
1168
  }
832
1169
  return response;
833
1170
  }
1171
+ function resolveHandlerRuntime(deps) {
1172
+ if (deps.runtime)
1173
+ return deps.runtime;
1174
+ const asyncDeps = deps;
1175
+ // Older HandlerDeps callers may not provide `warn`; default-route to `info`.
1176
+ const depLogger = deps.logger;
1177
+ const normalizedLogger = {
1178
+ info: depLogger.info,
1179
+ warn: depLogger.warn ?? ((msg, ...rest) => depLogger.info(`[WARN] ${msg}`, ...rest)),
1180
+ error: depLogger.error,
1181
+ debug: depLogger.debug,
1182
+ };
1183
+ return resolveGatewayServerRuntime({
1184
+ sessionManager: deps.sessionManager,
1185
+ logger: normalizedLogger,
1186
+ asyncJobManager: asyncDeps.asyncJobManager,
1187
+ });
1188
+ }
834
1189
  export async function handleGeminiRequest(deps, params) {
1190
+ const runtime = resolveHandlerRuntime(deps);
835
1191
  const startTime = Date.now();
836
1192
  const prep = prepareGeminiRequest({
837
1193
  prompt: params.prompt,
@@ -845,7 +1201,12 @@ export async function handleGeminiRequest(deps, params) {
845
1201
  correlationId: params.correlationId,
846
1202
  optimizePrompt: params.optimizePrompt,
847
1203
  operation: "gemini_request",
848
- });
1204
+ outputFormat: params.outputFormat,
1205
+ sandbox: params.sandbox,
1206
+ policyFiles: params.policyFiles,
1207
+ adminPolicyFiles: params.adminPolicyFiles,
1208
+ attachments: params.attachments,
1209
+ }, runtime);
849
1210
  if (!("args" in prep))
850
1211
  return prep;
851
1212
  const { corrId, args } = prep;
@@ -857,20 +1218,24 @@ export async function handleGeminiRequest(deps, params) {
857
1218
  model: prep.resolvedModel || "default",
858
1219
  prompt: params.prompt,
859
1220
  sessionId: params.sessionId,
860
- });
1221
+ }, runtime);
861
1222
  deps.logger.info(`[${corrId}] gemini_request invoked with model=${prep.resolvedModel || "default"}, approvalMode=${params.approvalMode}, prompt length=${params.prompt.length}`);
862
1223
  try {
863
- // Session arg planning (pure, no I/O)
864
- const sessionResult = resolveSessionResumeArgs({
1224
+ // U27: Session arg planning. For fresh sessions, emit `--session-id <uuid>`
1225
+ // so the gateway and Gemini agree on the session identifier from turn 1.
1226
+ // For resume flows, fall back to `--resume <id>` (existing behavior).
1227
+ const sessionPlan = resolveGeminiSessionPlan({
865
1228
  sessionId: params.sessionId,
866
1229
  resumeLatest: params.resumeLatest,
867
1230
  createNewSession: params.createNewSession,
868
1231
  });
869
- args.push(...sessionResult.resumeArgs);
870
- const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), undefined, params.forceRefresh);
1232
+ args.push(...sessionPlan.args);
1233
+ const userProvidedSession = sessionPlan.resumed;
1234
+ const effectiveSessionIdHint = sessionPlan.emittedSessionId ?? params.sessionId;
1235
+ const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime);
871
1236
  // Deferred — job still running, return async reference
872
1237
  if (isDeferredResponse(result)) {
873
- return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
1238
+ return buildDeferredToolResponse(result, effectiveSessionIdHint);
874
1239
  }
875
1240
  const { stdout, stderr, code } = result;
876
1241
  durationMs = Math.max(0, Date.now() - startTime);
@@ -885,13 +1250,15 @@ export async function handleGeminiRequest(deps, params) {
885
1250
  exitCode: code,
886
1251
  errorMessage: stderr || `Exit code ${code}`,
887
1252
  status: "failed",
888
- });
1253
+ }, runtime);
889
1254
  return createErrorResponse("gemini", code, stderr, corrId);
890
1255
  }
891
1256
  wasSuccessful = true;
892
- // Post-success session I/O (sync handlers: no phantom sessions on CLI failure)
893
- let effectiveSessionId = sessionResult.effectiveSessionId;
894
- if (sessionResult.userProvidedSession && effectiveSessionId) {
1257
+ // U27 Post-success session I/O. Mirror the gateway store 1:1 to whatever
1258
+ // session id Gemini is using (either the user-supplied resume id or the
1259
+ // deterministic --session-id we emitted).
1260
+ let effectiveSessionId = effectiveSessionIdHint;
1261
+ if (effectiveSessionId) {
895
1262
  const existing = await deps.sessionManager.getSession(effectiveSessionId);
896
1263
  if (!existing) {
897
1264
  try {
@@ -905,12 +1272,9 @@ export async function handleGeminiRequest(deps, params) {
905
1272
  }
906
1273
  await deps.sessionManager.updateSessionUsage(effectiveSessionId);
907
1274
  }
908
- else if (!params.createNewSession && !effectiveSessionId) {
909
- const newSession = await deps.sessionManager.createSession("gemini", "Gemini Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
910
- effectiveSessionId = newSession.id;
911
- }
912
1275
  deps.logger.info(`[${corrId}] gemini_request completed successfully in ${durationMs}ms`);
913
- const response = buildCliResponse("gemini", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession);
1276
+ const response = buildCliResponse("gemini", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, userProvidedSession, params.outputFormat);
1277
+ const geminiUsage = extractUsageAndCost("gemini", stdout, params.outputFormat);
914
1278
  safeFlightComplete(corrId, {
915
1279
  response: stdout,
916
1280
  durationMs,
@@ -920,7 +1284,12 @@ export async function handleGeminiRequest(deps, params) {
920
1284
  optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
921
1285
  exitCode: 0,
922
1286
  status: "completed",
923
- });
1287
+ inputTokens: geminiUsage.inputTokens,
1288
+ outputTokens: geminiUsage.outputTokens,
1289
+ cacheReadTokens: geminiUsage.cacheReadTokens,
1290
+ cacheCreationTokens: geminiUsage.cacheCreationTokens,
1291
+ costUsd: geminiUsage.costUsd,
1292
+ }, runtime);
924
1293
  return response;
925
1294
  }
926
1295
  catch (error) {
@@ -935,15 +1304,16 @@ export async function handleGeminiRequest(deps, params) {
935
1304
  exitCode: 1,
936
1305
  errorMessage: error.message,
937
1306
  status: "failed",
938
- });
1307
+ }, runtime);
939
1308
  return createErrorResponse("gemini", 1, "", corrId, error);
940
1309
  }
941
1310
  finally {
942
1311
  const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
943
- performanceMetrics.recordRequest("gemini", finalizedDurationMs, wasSuccessful);
1312
+ runtime.performanceMetrics.recordRequest("gemini", finalizedDurationMs, wasSuccessful);
944
1313
  }
945
1314
  }
946
1315
  export async function handleGeminiRequestAsync(deps, params) {
1316
+ const runtime = resolveHandlerRuntime(deps);
947
1317
  const prep = prepareGeminiRequest({
948
1318
  prompt: params.prompt,
949
1319
  model: params.model,
@@ -956,21 +1326,26 @@ export async function handleGeminiRequestAsync(deps, params) {
956
1326
  correlationId: params.correlationId,
957
1327
  optimizePrompt: params.optimizePrompt,
958
1328
  operation: "gemini_request_async",
959
- });
1329
+ outputFormat: params.outputFormat,
1330
+ sandbox: params.sandbox,
1331
+ policyFiles: params.policyFiles,
1332
+ adminPolicyFiles: params.adminPolicyFiles,
1333
+ attachments: params.attachments,
1334
+ }, runtime);
960
1335
  if (!("args" in prep))
961
1336
  return prep;
962
1337
  const { corrId, args, requestedMcpServers, approvalDecision } = prep;
963
1338
  try {
964
- // Session arg planning (pure, no I/O)
965
- const sessionResult = resolveSessionResumeArgs({
1339
+ // U27: Session arg planning with deterministic --session-id for fresh sessions.
1340
+ const sessionPlan = resolveGeminiSessionPlan({
966
1341
  sessionId: params.sessionId,
967
1342
  resumeLatest: params.resumeLatest,
968
1343
  createNewSession: params.createNewSession,
969
1344
  });
970
- args.push(...sessionResult.resumeArgs);
1345
+ args.push(...sessionPlan.args);
971
1346
  // Pre-start session I/O (async handlers: prevent orphaned jobs)
972
- let effectiveSessionId = sessionResult.effectiveSessionId;
973
- if (sessionResult.userProvidedSession && effectiveSessionId) {
1347
+ let effectiveSessionId = sessionPlan.emittedSessionId ?? params.sessionId;
1348
+ if (effectiveSessionId) {
974
1349
  const existing = await deps.sessionManager.getSession(effectiveSessionId);
975
1350
  if (!existing) {
976
1351
  try {
@@ -984,18 +1359,16 @@ export async function handleGeminiRequestAsync(deps, params) {
984
1359
  }
985
1360
  await deps.sessionManager.updateSessionUsage(effectiveSessionId);
986
1361
  }
987
- else if (!params.createNewSession && !effectiveSessionId) {
988
- const newSession = await deps.sessionManager.createSession("gemini", "Gemini Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
989
- effectiveSessionId = newSession.id;
990
- }
991
- // Start job only after all session I/O succeeds
992
- const job = deps.asyncJobManager.startJob("gemini", args, corrId, undefined, resolveIdleTimeout("gemini", params.idleTimeoutMs), undefined, params.forceRefresh);
1362
+ // Start job only after all session I/O succeeds. U23: forward outputFormat
1363
+ // so AsyncJobManager records it in the durable store (the manager also
1364
+ // surfaces it in the snapshot).
1365
+ const job = deps.asyncJobManager.startJob("gemini", args, corrId, undefined, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh);
993
1366
  deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
994
1367
  const asyncResponse = {
995
1368
  success: true,
996
1369
  job,
997
1370
  sessionId: effectiveSessionId || null,
998
- resumable: sessionResult.userProvidedSession,
1371
+ resumable: sessionPlan.resumed,
999
1372
  approval: approvalDecision,
1000
1373
  mcpServers: { requested: requestedMcpServers },
1001
1374
  };
@@ -1016,6 +1389,7 @@ export async function handleGeminiRequestAsync(deps, params) {
1016
1389
  }
1017
1390
  }
1018
1391
  export async function handleGrokRequest(deps, params) {
1392
+ const runtime = resolveHandlerRuntime(deps);
1019
1393
  const startTime = Date.now();
1020
1394
  const prep = prepareGrokRequest({
1021
1395
  prompt: params.prompt,
@@ -1033,7 +1407,7 @@ export async function handleGrokRequest(deps, params) {
1033
1407
  correlationId: params.correlationId,
1034
1408
  optimizePrompt: params.optimizePrompt,
1035
1409
  operation: "grok_request",
1036
- });
1410
+ }, runtime);
1037
1411
  if (!("args" in prep))
1038
1412
  return prep;
1039
1413
  const { corrId, args } = prep;
@@ -1045,7 +1419,7 @@ export async function handleGrokRequest(deps, params) {
1045
1419
  model: prep.resolvedModel || "default",
1046
1420
  prompt: params.prompt,
1047
1421
  sessionId: params.sessionId,
1048
- });
1422
+ }, runtime);
1049
1423
  deps.logger.info(`[${corrId}] grok_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode}, prompt length=${params.prompt.length}`);
1050
1424
  try {
1051
1425
  // Session arg planning (pure, no I/O)
@@ -1055,7 +1429,7 @@ export async function handleGrokRequest(deps, params) {
1055
1429
  createNewSession: params.createNewSession,
1056
1430
  });
1057
1431
  args.push(...sessionResult.resumeArgs);
1058
- const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh);
1432
+ const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime);
1059
1433
  // Deferred — job still running, return async reference
1060
1434
  if (isDeferredResponse(result)) {
1061
1435
  return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
@@ -1073,7 +1447,7 @@ export async function handleGrokRequest(deps, params) {
1073
1447
  exitCode: code,
1074
1448
  errorMessage: stderr || `Exit code ${code}`,
1075
1449
  status: "failed",
1076
- });
1450
+ }, runtime);
1077
1451
  return createErrorResponse("grok", code, stderr, corrId);
1078
1452
  }
1079
1453
  wasSuccessful = true;
@@ -1108,7 +1482,7 @@ export async function handleGrokRequest(deps, params) {
1108
1482
  optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
1109
1483
  exitCode: 0,
1110
1484
  status: "completed",
1111
- });
1485
+ }, runtime);
1112
1486
  return response;
1113
1487
  }
1114
1488
  catch (error) {
@@ -1123,15 +1497,16 @@ export async function handleGrokRequest(deps, params) {
1123
1497
  exitCode: 1,
1124
1498
  errorMessage: error.message,
1125
1499
  status: "failed",
1126
- });
1500
+ }, runtime);
1127
1501
  return createErrorResponse("grok", 1, "", corrId, error);
1128
1502
  }
1129
1503
  finally {
1130
1504
  const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
1131
- performanceMetrics.recordRequest("grok", finalizedDurationMs, wasSuccessful);
1505
+ runtime.performanceMetrics.recordRequest("grok", finalizedDurationMs, wasSuccessful);
1132
1506
  }
1133
1507
  }
1134
1508
  export async function handleGrokRequestAsync(deps, params) {
1509
+ const runtime = resolveHandlerRuntime(deps);
1135
1510
  const prep = prepareGrokRequest({
1136
1511
  prompt: params.prompt,
1137
1512
  model: params.model,
@@ -1148,7 +1523,7 @@ export async function handleGrokRequestAsync(deps, params) {
1148
1523
  correlationId: params.correlationId,
1149
1524
  optimizePrompt: params.optimizePrompt,
1150
1525
  operation: "grok_request_async",
1151
- });
1526
+ }, runtime);
1152
1527
  if (!("args" in prep))
1153
1528
  return prep;
1154
1529
  const { corrId, args, requestedMcpServers, approvalDecision } = prep;
@@ -1207,694 +1582,284 @@ export async function handleGrokRequestAsync(deps, params) {
1207
1582
  return createErrorResponse("grok_request_async", 1, "", corrId, error);
1208
1583
  }
1209
1584
  }
1210
- export async function handleCodexRequestAsync(deps, params) {
1211
- const prep = prepareCodexRequest({
1585
+ export async function handleMistralRequest(deps, params) {
1586
+ const runtime = resolveHandlerRuntime(deps);
1587
+ const startTime = Date.now();
1588
+ const prep = prepareMistralRequest({
1212
1589
  prompt: params.prompt,
1213
1590
  model: params.model,
1214
- fullAuto: params.fullAuto,
1215
- dangerouslyBypassApprovalsAndSandbox: params.dangerouslyBypassApprovalsAndSandbox,
1591
+ outputFormat: params.outputFormat,
1592
+ permissionMode: params.permissionMode,
1593
+ effort: params.effort,
1594
+ reasoningEffort: params.reasoningEffort,
1595
+ allowedTools: params.allowedTools,
1596
+ disallowedTools: params.disallowedTools,
1216
1597
  approvalStrategy: params.approvalStrategy,
1217
1598
  approvalPolicy: params.approvalPolicy,
1218
1599
  mcpServers: params.mcpServers,
1219
- sessionId: params.sessionId,
1220
- resumeLatest: params.resumeLatest,
1221
- createNewSession: params.createNewSession,
1222
1600
  correlationId: params.correlationId,
1223
1601
  optimizePrompt: params.optimizePrompt,
1224
- operation: "codex_request_async",
1225
- });
1602
+ operation: "mistral_request",
1603
+ }, runtime);
1226
1604
  if (!("args" in prep))
1227
1605
  return prep;
1228
- const { corrId, args, requestedMcpServers, approvalDecision } = prep;
1606
+ const { corrId, args, mistralEnv } = prep;
1607
+ let durationMs = 0;
1608
+ let wasSuccessful = false;
1609
+ safeFlightStart({
1610
+ correlationId: corrId,
1611
+ cli: "mistral",
1612
+ model: prep.resolvedModel || "default",
1613
+ prompt: params.prompt,
1614
+ sessionId: params.sessionId,
1615
+ }, runtime);
1616
+ deps.logger.info(`[${corrId}] mistral_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode || "auto-approve"}, prompt length=${params.prompt.length}`);
1229
1617
  try {
1230
- // Pre-start session I/O (async handlers: prevent orphaned jobs)
1231
- let effectiveSessionId = params.sessionId;
1232
- if (!params.createNewSession && !params.sessionId) {
1233
- const activeSession = await deps.sessionManager.getActiveSession("codex");
1234
- if (activeSession) {
1235
- effectiveSessionId = activeSession.id;
1236
- }
1237
- else {
1238
- const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
1239
- effectiveSessionId = newSession.id;
1240
- }
1241
- }
1242
- else if (params.sessionId) {
1243
- await deps.sessionManager.updateSessionUsage(params.sessionId);
1618
+ const sessionResult = resolveMistralSessionArgs({
1619
+ sessionId: params.sessionId,
1620
+ resumeLatest: params.resumeLatest,
1621
+ createNewSession: params.createNewSession,
1622
+ });
1623
+ args.push(...sessionResult.resumeArgs);
1624
+ const result = await awaitJobOrDefer("mistral", args, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, mistralEnv);
1625
+ if (isDeferredResponse(result)) {
1626
+ return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
1244
1627
  }
1245
- else if (params.createNewSession) {
1246
- const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
1247
- effectiveSessionId = newSession.id;
1248
- }
1249
- // Start job only after all session I/O succeeds
1250
- const job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs), undefined, params.forceRefresh);
1251
- deps.logger.info(`[${corrId}] codex_request_async started job ${job.id}`);
1252
- const asyncResponse = {
1253
- success: true,
1254
- job,
1255
- sessionId: effectiveSessionId || null,
1256
- approval: approvalDecision,
1257
- mcpServers: { requested: requestedMcpServers },
1258
- };
1259
- if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
1260
- asyncResponse.reviewIntegrity = prep.reviewIntegrity;
1261
- }
1262
- return {
1263
- content: [
1264
- {
1265
- type: "text",
1266
- text: JSON.stringify(asyncResponse, null, 2),
1267
- },
1268
- ],
1269
- };
1270
- }
1271
- catch (error) {
1272
- return createErrorResponse("codex_request_async", 1, "", corrId, error);
1273
- }
1274
- }
1275
- //──────────────────────────────────────────────────────────────────────────────
1276
- // Claude Code Tool
1277
- //──────────────────────────────────────────────────────────────────────────────
1278
- server.tool("claude_request", {
1279
- prompt: z
1280
- .string()
1281
- .min(1, "Prompt cannot be empty")
1282
- .max(100000, "Prompt too long (max 100k chars)")
1283
- .describe("Prompt text for Claude"),
1284
- model: z
1285
- .string()
1286
- .optional()
1287
- .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
1288
- outputFormat: z
1289
- .enum(["text", "json", "stream-json"])
1290
- .default("text")
1291
- .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
1292
- sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
1293
- continueSession: z.boolean().default(false).describe("Continue active session"),
1294
- createNewSession: z.boolean().default(false).describe("Force new session"),
1295
- allowedTools: z
1296
- .array(z.string())
1297
- .optional()
1298
- .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
1299
- disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
1300
- dangerouslySkipPermissions: z
1301
- .boolean()
1302
- .default(false)
1303
- .describe("Bypass permissions (sandbox only)"),
1304
- approvalStrategy: z
1305
- .enum(["legacy", "mcp_managed"])
1306
- .default("legacy")
1307
- .describe("Approval strategy"),
1308
- approvalPolicy: z
1309
- .enum(["strict", "balanced", "permissive"])
1310
- .optional()
1311
- .describe("Approval policy override"),
1312
- mcpServers: z
1313
- .array(MCP_SERVER_ENUM)
1314
- .default(["sqry"])
1315
- .describe("MCP servers exposed to Claude"),
1316
- strictMcpConfig: z
1317
- .boolean()
1318
- .default(false)
1319
- .describe("Restrict Claude to provided MCP config only"),
1320
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1321
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1322
- optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
1323
- idleTimeoutMs: z
1324
- .number()
1325
- .int()
1326
- .min(30_000)
1327
- .max(3_600_000)
1328
- .optional()
1329
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1330
- forceRefresh: z
1331
- .boolean()
1332
- .default(false)
1333
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1334
- }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
1335
- const startTime = Date.now();
1336
- const prep = prepareClaudeRequest({
1337
- prompt,
1338
- model,
1339
- outputFormat,
1340
- allowedTools,
1341
- disallowedTools,
1342
- dangerouslySkipPermissions,
1343
- approvalStrategy,
1344
- approvalPolicy,
1345
- mcpServers,
1346
- strictMcpConfig,
1347
- correlationId,
1348
- optimizePrompt,
1349
- operation: "claude_request",
1350
- });
1351
- if (!("args" in prep))
1352
- return prep;
1353
- const { corrId, args } = prep;
1354
- let durationMs = 0;
1355
- let wasSuccessful = false;
1356
- safeFlightStart({
1357
- correlationId: corrId,
1358
- cli: "claude",
1359
- model: prep.resolvedModel || "default",
1360
- prompt,
1361
- sessionId,
1362
- });
1363
- logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prompt.length}, sessionId=${sessionId}`);
1364
- try {
1365
- // Session management
1366
- let effectiveSessionId = sessionId;
1367
- let useContinue = continueSession;
1368
- const activeSession = await sessionManager.getActiveSession("claude");
1369
- if (!createNewSession && !continueSession && !sessionId && activeSession) {
1370
- effectiveSessionId = activeSession.id;
1371
- useContinue = true;
1372
- }
1373
- if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
1374
- useContinue = true;
1375
- }
1376
- if (useContinue) {
1377
- args.push("--continue");
1378
- }
1379
- else if (effectiveSessionId) {
1380
- args.push("--session-id", effectiveSessionId);
1381
- await sessionManager.updateSessionUsage(effectiveSessionId);
1382
- }
1383
- // Idle timeout only for stream-json (text/json produce no output until done)
1384
- const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
1385
- const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh);
1386
- // Deferred — job still running, return async reference
1387
- if (isDeferredResponse(result)) {
1388
- return buildDeferredToolResponse(result, effectiveSessionId);
1389
- }
1390
- const { stdout, stderr, code } = result;
1391
- durationMs = Math.max(0, Date.now() - startTime);
1392
- if (code !== 0) {
1393
- logger.info(`[${corrId}] claude_request failed in ${durationMs}ms`);
1394
- safeFlightComplete(corrId, {
1395
- response: stderr || "",
1396
- durationMs,
1397
- retryCount: 0,
1398
- circuitBreakerState: "closed",
1399
- optimizationApplied: optimizePrompt || optimizeResponse,
1400
- exitCode: code,
1401
- errorMessage: stderr || `Exit code ${code}`,
1402
- status: "failed",
1403
- });
1404
- return createErrorResponse("claude", code, stderr, corrId);
1628
+ const { stdout, stderr, code } = result;
1629
+ durationMs = Math.max(0, Date.now() - startTime);
1630
+ if (code !== 0) {
1631
+ deps.logger.info(`[${corrId}] mistral_request failed in ${durationMs}ms`);
1632
+ safeFlightComplete(corrId, {
1633
+ response: stderr || "",
1634
+ durationMs,
1635
+ retryCount: 0,
1636
+ circuitBreakerState: "closed",
1637
+ optimizationApplied: false,
1638
+ exitCode: code,
1639
+ errorMessage: stderr || `Exit code ${code}`,
1640
+ status: "failed",
1641
+ }, runtime);
1642
+ return createErrorResponse("mistral", code, stderr, corrId);
1405
1643
  }
1406
1644
  wasSuccessful = true;
1407
- // If we used a session ID and it's not tracked yet, create a session record
1408
- if (effectiveSessionId) {
1409
- const existingSession = await sessionManager.getSession(effectiveSessionId);
1410
- if (!existingSession) {
1411
- await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
1645
+ let effectiveSessionId = sessionResult.effectiveSessionId;
1646
+ if (sessionResult.userProvidedSession && effectiveSessionId) {
1647
+ const existing = await deps.sessionManager.getSession(effectiveSessionId);
1648
+ if (!existing) {
1649
+ try {
1650
+ await deps.sessionManager.createSession("mistral", "Mistral Session", effectiveSessionId);
1651
+ }
1652
+ catch {
1653
+ const rechecked = await deps.sessionManager.getSession(effectiveSessionId);
1654
+ if (!rechecked)
1655
+ throw new Error(`Failed to create or find session ${effectiveSessionId}`);
1656
+ }
1412
1657
  }
1658
+ await deps.sessionManager.updateSessionUsage(effectiveSessionId);
1413
1659
  }
1414
- logger.info(`[${corrId}] claude_request completed successfully in ${durationMs}ms`);
1415
- // Parse stream-json NDJSON output to extract result text
1416
- if (outputFormat === "stream-json") {
1417
- const parsed = parseStreamJson(stdout);
1418
- if (parsed.costUsd !== null) {
1419
- logger.debug(`[${corrId}] stream-json cost=$${parsed.costUsd}, model=${parsed.model}, turns=${parsed.numTurns}`);
1420
- }
1421
- safeFlightComplete(corrId, {
1422
- response: parsed.text,
1423
- inputTokens: parsed.usage?.inputTokens,
1424
- outputTokens: parsed.usage?.outputTokens,
1425
- durationMs,
1426
- retryCount: 0,
1427
- circuitBreakerState: "closed",
1428
- costUsd: parsed.costUsd ?? undefined,
1429
- optimizationApplied: optimizePrompt || optimizeResponse,
1430
- exitCode: 0,
1431
- status: "completed",
1432
- });
1433
- return buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
1660
+ else if (!params.createNewSession && !effectiveSessionId) {
1661
+ const newSession = await deps.sessionManager.createSession("mistral", "Mistral Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
1662
+ effectiveSessionId = newSession.id;
1434
1663
  }
1664
+ deps.logger.info(`[${corrId}] mistral_request completed successfully in ${durationMs}ms`);
1665
+ const response = buildCliResponse("mistral", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession, params.outputFormat);
1435
1666
  safeFlightComplete(corrId, {
1436
1667
  response: stdout,
1437
1668
  durationMs,
1438
1669
  retryCount: 0,
1439
1670
  circuitBreakerState: "closed",
1440
- optimizationApplied: optimizePrompt || optimizeResponse,
1671
+ approvalDecision: prep.approvalDecision?.status,
1672
+ optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
1441
1673
  exitCode: 0,
1442
1674
  status: "completed",
1443
- });
1444
- return buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
1675
+ }, runtime);
1676
+ return response;
1445
1677
  }
1446
1678
  catch (error) {
1447
1679
  const elapsedMs = Math.max(0, Date.now() - startTime);
1448
- logger.info(`[${corrId}] claude_request threw exception after ${elapsedMs}ms`);
1680
+ deps.logger.info(`[${corrId}] mistral_request threw exception after ${elapsedMs}ms`);
1449
1681
  safeFlightComplete(corrId, {
1450
1682
  response: "",
1451
1683
  durationMs: elapsedMs,
1452
1684
  retryCount: 0,
1453
1685
  circuitBreakerState: "closed",
1454
- optimizationApplied: optimizePrompt || optimizeResponse,
1686
+ optimizationApplied: false,
1455
1687
  exitCode: 1,
1456
1688
  errorMessage: error.message,
1457
1689
  status: "failed",
1458
- });
1459
- return createErrorResponse("claude", 1, "", corrId, error);
1690
+ }, runtime);
1691
+ return createErrorResponse("mistral", 1, "", corrId, error);
1460
1692
  }
1461
1693
  finally {
1462
1694
  const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
1463
- performanceMetrics.recordRequest("claude", finalizedDurationMs, wasSuccessful);
1695
+ runtime.performanceMetrics.recordRequest("mistral", finalizedDurationMs, wasSuccessful);
1464
1696
  }
1465
- });
1466
- //──────────────────────────────────────────────────────────────────────────────
1467
- // Codex Tool
1468
- //──────────────────────────────────────────────────────────────────────────────
1469
- server.tool("codex_request", {
1470
- prompt: z
1471
- .string()
1472
- .min(1, "Prompt cannot be empty")
1473
- .max(100000, "Prompt too long (max 100k chars)")
1474
- .describe("Prompt text for Codex"),
1475
- model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
1476
- fullAuto: z.boolean().default(false).describe("Full-auto mode (sandboxed execution)"),
1477
- dangerouslyBypassApprovalsAndSandbox: z
1478
- .boolean()
1479
- .default(false)
1480
- .describe("Run Codex without approvals/sandbox"),
1481
- approvalStrategy: z
1482
- .enum(["legacy", "mcp_managed"])
1483
- .default("legacy")
1484
- .describe("Approval strategy"),
1485
- approvalPolicy: z
1486
- .enum(["strict", "balanced", "permissive"])
1487
- .optional()
1488
- .describe("Approval policy override"),
1489
- mcpServers: z
1490
- .array(MCP_SERVER_ENUM)
1491
- .default(["sqry"])
1492
- .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
1493
- sessionId: z
1494
- .string()
1495
- .optional()
1496
- .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
1497
- resumeLatest: z
1498
- .boolean()
1499
- .default(false)
1500
- .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
1501
- createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
1502
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1503
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1504
- optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
1505
- idleTimeoutMs: z
1506
- .number()
1507
- .int()
1508
- .min(30_000)
1509
- .max(3_600_000)
1510
- .optional()
1511
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1512
- forceRefresh: z
1513
- .boolean()
1514
- .default(false)
1515
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1516
- }, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
1517
- const startTime = Date.now();
1518
- const prep = prepareCodexRequest({
1519
- prompt,
1520
- model,
1521
- fullAuto,
1522
- dangerouslyBypassApprovalsAndSandbox,
1523
- approvalStrategy,
1524
- approvalPolicy,
1525
- mcpServers,
1526
- sessionId,
1527
- resumeLatest,
1528
- createNewSession,
1529
- correlationId,
1530
- optimizePrompt,
1531
- operation: "codex_request",
1532
- });
1697
+ }
1698
+ export async function handleMistralRequestAsync(deps, params) {
1699
+ const runtime = resolveHandlerRuntime(deps);
1700
+ const prep = prepareMistralRequest({
1701
+ prompt: params.prompt,
1702
+ model: params.model,
1703
+ outputFormat: params.outputFormat,
1704
+ permissionMode: params.permissionMode,
1705
+ effort: params.effort,
1706
+ reasoningEffort: params.reasoningEffort,
1707
+ allowedTools: params.allowedTools,
1708
+ disallowedTools: params.disallowedTools,
1709
+ approvalStrategy: params.approvalStrategy,
1710
+ approvalPolicy: params.approvalPolicy,
1711
+ mcpServers: params.mcpServers,
1712
+ correlationId: params.correlationId,
1713
+ optimizePrompt: params.optimizePrompt,
1714
+ operation: "mistral_request_async",
1715
+ }, runtime);
1533
1716
  if (!("args" in prep))
1534
1717
  return prep;
1535
- const { corrId, args } = prep;
1536
- let durationMs = 0;
1537
- let wasSuccessful = false;
1538
- safeFlightStart({
1539
- correlationId: corrId,
1540
- cli: "codex",
1541
- model: prep.resolvedModel || "default",
1542
- prompt,
1543
- sessionId,
1544
- });
1545
- logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prompt.length}`);
1718
+ const { corrId, args, requestedMcpServers, approvalDecision, mistralEnv } = prep;
1546
1719
  try {
1547
- const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), undefined, forceRefresh);
1548
- // Deferred — job still running, return async reference
1549
- if (isDeferredResponse(result)) {
1550
- return buildDeferredToolResponse(result, sessionId);
1551
- }
1552
- const { stdout, stderr, code } = result;
1553
- durationMs = Math.max(0, Date.now() - startTime);
1554
- if (code !== 0) {
1555
- logger.info(`[${corrId}] codex_request failed in ${durationMs}ms`);
1556
- safeFlightComplete(corrId, {
1557
- response: stderr || "",
1558
- durationMs,
1559
- retryCount: 0,
1560
- circuitBreakerState: "closed",
1561
- optimizationApplied: optimizePrompt || optimizeResponse,
1562
- exitCode: code,
1563
- errorMessage: stderr || `Exit code ${code}`,
1564
- status: "failed",
1565
- });
1566
- return createErrorResponse("codex", code, stderr, corrId);
1567
- }
1568
- wasSuccessful = true;
1569
- // Track session usage
1570
- let effectiveSessionId = sessionId;
1571
- if (!createNewSession && !sessionId) {
1572
- const activeSession = await sessionManager.getActiveSession("codex");
1573
- if (activeSession) {
1574
- effectiveSessionId = activeSession.id;
1575
- }
1576
- else {
1577
- const newSession = await sessionManager.createSession("codex", "Codex Session");
1578
- effectiveSessionId = newSession.id;
1720
+ const sessionResult = resolveMistralSessionArgs({
1721
+ sessionId: params.sessionId,
1722
+ resumeLatest: params.resumeLatest,
1723
+ createNewSession: params.createNewSession,
1724
+ });
1725
+ args.push(...sessionResult.resumeArgs);
1726
+ let effectiveSessionId = sessionResult.effectiveSessionId;
1727
+ if (sessionResult.userProvidedSession && effectiveSessionId) {
1728
+ const existing = await deps.sessionManager.getSession(effectiveSessionId);
1729
+ if (!existing) {
1730
+ try {
1731
+ await deps.sessionManager.createSession("mistral", "Mistral Session", effectiveSessionId);
1732
+ }
1733
+ catch {
1734
+ const rechecked = await deps.sessionManager.getSession(effectiveSessionId);
1735
+ if (!rechecked)
1736
+ throw new Error(`Failed to create or find session ${effectiveSessionId}`);
1737
+ }
1579
1738
  }
1739
+ await deps.sessionManager.updateSessionUsage(effectiveSessionId);
1580
1740
  }
1581
- else if (sessionId) {
1582
- await sessionManager.updateSessionUsage(sessionId);
1583
- }
1584
- else if (createNewSession) {
1585
- const newSession = await sessionManager.createSession("codex", "Codex Session");
1741
+ else if (!params.createNewSession && !effectiveSessionId) {
1742
+ const newSession = await deps.sessionManager.createSession("mistral", "Mistral Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
1586
1743
  effectiveSessionId = newSession.id;
1587
1744
  }
1588
- logger.info(`[${corrId}] codex_request completed successfully in ${durationMs}ms`);
1589
- safeFlightComplete(corrId, {
1590
- response: stdout,
1591
- durationMs,
1592
- retryCount: 0,
1593
- circuitBreakerState: "closed",
1594
- optimizationApplied: optimizePrompt || optimizeResponse,
1595
- exitCode: 0,
1596
- status: "completed",
1597
- });
1598
- return buildCliResponse("codex", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs);
1745
+ const job = deps.asyncJobManager.startJob("mistral", args, corrId, undefined, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, mistralEnv);
1746
+ deps.logger.info(`[${corrId}] mistral_request_async started job ${job.id}`);
1747
+ const asyncResponse = {
1748
+ success: true,
1749
+ job,
1750
+ sessionId: effectiveSessionId || null,
1751
+ resumable: sessionResult.userProvidedSession,
1752
+ approval: approvalDecision,
1753
+ mcpServers: { requested: requestedMcpServers },
1754
+ };
1755
+ if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
1756
+ asyncResponse.reviewIntegrity = prep.reviewIntegrity;
1757
+ }
1758
+ return {
1759
+ content: [
1760
+ {
1761
+ type: "text",
1762
+ text: JSON.stringify(asyncResponse, null, 2),
1763
+ },
1764
+ ],
1765
+ };
1599
1766
  }
1600
1767
  catch (error) {
1601
- const elapsedMs = Math.max(0, Date.now() - startTime);
1602
- logger.info(`[${corrId}] codex_request threw exception after ${elapsedMs}ms`);
1603
- safeFlightComplete(corrId, {
1604
- response: "",
1605
- durationMs: elapsedMs,
1606
- retryCount: 0,
1607
- circuitBreakerState: "closed",
1608
- optimizationApplied: optimizePrompt || optimizeResponse,
1609
- exitCode: 1,
1610
- errorMessage: error.message,
1611
- status: "failed",
1612
- });
1613
- return createErrorResponse("codex", 1, "", corrId, error);
1768
+ return createErrorResponse("mistral_request_async", 1, "", corrId, error);
1614
1769
  }
1615
- finally {
1616
- const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
1617
- performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
1618
- }
1619
- });
1620
- //──────────────────────────────────────────────────────────────────────────────
1621
- // Gemini Tool
1622
- //──────────────────────────────────────────────────────────────────────────────
1623
- server.tool("gemini_request", {
1624
- prompt: z
1625
- .string()
1626
- .min(1, "Prompt cannot be empty")
1627
- .max(100000, "Prompt too long (max 100k chars)")
1628
- .describe("Prompt text for Gemini"),
1629
- model: z
1630
- .string()
1631
- .optional()
1632
- .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
1633
- sessionId: z.string().optional().describe("Session ID or 'latest'"),
1634
- resumeLatest: z.boolean().default(false).describe("Resume latest session"),
1635
- createNewSession: z.boolean().default(false).describe("Force new session"),
1636
- approvalMode: z
1637
- .enum(["default", "auto_edit", "yolo"])
1638
- .optional()
1639
- .describe("Approval: default|auto_edit|yolo"),
1640
- approvalStrategy: z
1641
- .enum(["legacy", "mcp_managed"])
1642
- .default("legacy")
1643
- .describe("Approval strategy"),
1644
- approvalPolicy: z
1645
- .enum(["strict", "balanced", "permissive"])
1646
- .optional()
1647
- .describe("Approval policy override"),
1648
- mcpServers: z
1649
- .array(MCP_SERVER_ENUM)
1650
- .default(["sqry"])
1651
- .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
1652
- allowedTools: z
1653
- .array(z.string())
1654
- .optional()
1655
- .describe("Allowed tools (['Write','Edit','Bash'])"),
1656
- includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
1657
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1658
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1659
- optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
1660
- idleTimeoutMs: z
1661
- .number()
1662
- .int()
1663
- .min(30_000)
1664
- .max(3_600_000)
1665
- .optional()
1666
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1667
- forceRefresh: z
1668
- .boolean()
1669
- .default(false)
1670
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1671
- }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
1672
- return handleGeminiRequest({ sessionManager, logger }, {
1673
- prompt,
1674
- model,
1675
- sessionId,
1676
- resumeLatest,
1677
- createNewSession,
1678
- approvalMode,
1679
- approvalStrategy,
1680
- approvalPolicy,
1681
- mcpServers,
1682
- allowedTools,
1683
- includeDirs,
1684
- correlationId,
1685
- optimizePrompt,
1686
- optimizeResponse,
1687
- idleTimeoutMs,
1688
- forceRefresh,
1689
- });
1690
- });
1691
- //──────────────────────────────────────────────────────────────────────────────
1692
- // Grok Tool
1693
- //──────────────────────────────────────────────────────────────────────────────
1694
- server.tool("grok_request", {
1695
- prompt: z
1696
- .string()
1697
- .min(1, "Prompt cannot be empty")
1698
- .max(100000, "Prompt too long (max 100k chars)")
1699
- .describe("Prompt text for Grok"),
1700
- model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
1701
- outputFormat: z
1702
- .enum(["plain", "json", "streaming-json"])
1703
- .optional()
1704
- .describe("Output format (plain|json|streaming-json). Grok default is plain."),
1705
- sessionId: z.string().optional().describe("Session ID (user-provided CLI handle for --resume)"),
1706
- resumeLatest: z
1707
- .boolean()
1708
- .default(false)
1709
- .describe("Resume most recent Grok session in cwd (--continue)"),
1710
- createNewSession: z.boolean().default(false).describe("Force new session"),
1711
- alwaysApprove: z
1712
- .boolean()
1713
- .default(false)
1714
- .describe("Auto-approve all tool executions (--always-approve)"),
1715
- permissionMode: z
1716
- .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
1717
- .optional()
1718
- .describe("Grok permission mode"),
1719
- effort: z
1720
- .enum(["low", "medium", "high", "xhigh", "max"])
1721
- .optional()
1722
- .describe("Grok effort level"),
1723
- reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
1724
- approvalStrategy: z
1725
- .enum(["legacy", "mcp_managed"])
1726
- .default("legacy")
1727
- .describe("Approval strategy"),
1728
- approvalPolicy: z
1729
- .enum(["strict", "balanced", "permissive"])
1730
- .optional()
1731
- .describe("Approval policy override"),
1732
- mcpServers: z
1733
- .array(MCP_SERVER_ENUM)
1734
- .default(["sqry"])
1735
- .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
1736
- allowedTools: z
1737
- .array(z.string())
1738
- .optional()
1739
- .describe("Allowed built-in tools (passed as --tools comma list)"),
1740
- disallowedTools: z
1741
- .array(z.string())
1742
- .optional()
1743
- .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
1744
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1745
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1746
- optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
1747
- idleTimeoutMs: z
1748
- .number()
1749
- .int()
1750
- .min(30_000)
1751
- .max(3_600_000)
1752
- .optional()
1753
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1754
- forceRefresh: z
1755
- .boolean()
1756
- .default(false)
1757
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1758
- }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
1759
- return handleGrokRequest({ sessionManager, logger }, {
1760
- prompt,
1761
- model,
1762
- outputFormat,
1763
- sessionId,
1764
- resumeLatest,
1765
- createNewSession,
1766
- alwaysApprove,
1767
- permissionMode,
1768
- effort,
1769
- reasoningEffort,
1770
- approvalStrategy,
1771
- approvalPolicy,
1772
- mcpServers,
1773
- allowedTools,
1774
- disallowedTools,
1775
- correlationId,
1776
- optimizePrompt,
1777
- optimizeResponse,
1778
- idleTimeoutMs,
1779
- forceRefresh,
1780
- });
1781
- });
1782
- //──────────────────────────────────────────────────────────────────────────────
1783
- // Async Long-Running Job Tools (No Time-Bound LLM Execution)
1784
- //──────────────────────────────────────────────────────────────────────────────
1785
- server.tool("claude_request_async", {
1786
- prompt: z
1787
- .string()
1788
- .min(1, "Prompt cannot be empty")
1789
- .max(100000, "Prompt too long (max 100k chars)")
1790
- .describe("Prompt text for Claude"),
1791
- model: z
1792
- .string()
1793
- .optional()
1794
- .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
1795
- outputFormat: z
1796
- .enum(["text", "json", "stream-json"])
1797
- .default("text")
1798
- .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
1799
- sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
1800
- continueSession: z.boolean().default(false).describe("Continue active session"),
1801
- createNewSession: z.boolean().default(false).describe("Force new session"),
1802
- allowedTools: z
1803
- .array(z.string())
1804
- .optional()
1805
- .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
1806
- disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
1807
- dangerouslySkipPermissions: z
1808
- .boolean()
1809
- .default(false)
1810
- .describe("Bypass permissions (sandbox only)"),
1811
- approvalStrategy: z
1812
- .enum(["legacy", "mcp_managed"])
1813
- .default("legacy")
1814
- .describe("Approval strategy"),
1815
- approvalPolicy: z
1816
- .enum(["strict", "balanced", "permissive"])
1817
- .optional()
1818
- .describe("Approval policy override"),
1819
- mcpServers: z
1820
- .array(MCP_SERVER_ENUM)
1821
- .default(["sqry"])
1822
- .describe("MCP servers exposed to Claude"),
1823
- strictMcpConfig: z
1824
- .boolean()
1825
- .default(false)
1826
- .describe("Restrict Claude to provided MCP config only"),
1827
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1828
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1829
- idleTimeoutMs: z
1830
- .number()
1831
- .int()
1832
- .min(30_000)
1833
- .max(3_600_000)
1834
- .optional()
1835
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1836
- forceRefresh: z
1837
- .boolean()
1838
- .default(false)
1839
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1840
- }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
1841
- const prep = prepareClaudeRequest({
1842
- prompt,
1843
- model,
1844
- outputFormat,
1845
- allowedTools,
1846
- disallowedTools,
1847
- dangerouslySkipPermissions,
1848
- approvalStrategy,
1849
- approvalPolicy,
1850
- mcpServers,
1851
- strictMcpConfig,
1852
- correlationId,
1853
- optimizePrompt,
1854
- operation: "claude_request_async",
1855
- });
1770
+ }
1771
+ export async function handleCodexRequestAsync(deps, params) {
1772
+ const runtime = resolveHandlerRuntime(deps);
1773
+ const prep = prepareCodexRequest({
1774
+ prompt: params.prompt,
1775
+ model: params.model,
1776
+ fullAuto: params.fullAuto,
1777
+ sandboxMode: params.sandboxMode,
1778
+ askForApproval: params.askForApproval,
1779
+ useLegacyFullAutoFlag: params.useLegacyFullAutoFlag,
1780
+ dangerouslyBypassApprovalsAndSandbox: params.dangerouslyBypassApprovalsAndSandbox,
1781
+ approvalStrategy: params.approvalStrategy,
1782
+ approvalPolicy: params.approvalPolicy,
1783
+ mcpServers: params.mcpServers,
1784
+ sessionId: params.sessionId,
1785
+ resumeLatest: params.resumeLatest,
1786
+ createNewSession: params.createNewSession,
1787
+ correlationId: params.correlationId,
1788
+ optimizePrompt: params.optimizePrompt,
1789
+ operation: "codex_request_async",
1790
+ outputFormat: params.outputFormat,
1791
+ outputSchema: params.outputSchema,
1792
+ search: params.search,
1793
+ profile: params.profile,
1794
+ configOverrides: params.configOverrides,
1795
+ ephemeral: params.ephemeral,
1796
+ images: params.images,
1797
+ ignoreUserConfig: params.ignoreUserConfig,
1798
+ ignoreRules: params.ignoreRules,
1799
+ }, runtime);
1856
1800
  if (!("args" in prep))
1857
1801
  return prep;
1858
- const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
1802
+ const { corrId, args, requestedMcpServers, approvalDecision } = prep;
1803
+ // U26 fix: outputSchema temp-file ownership. The cleanup callable lives in
1804
+ // exactly one place at a time: this scope until startJob succeeds, then
1805
+ // AsyncJobManager (via onComplete → persistComplete → fireOnComplete) once
1806
+ // the job is registered. Any code path that fails to hand it off MUST run
1807
+ // it locally.
1808
+ const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
1809
+ let prepCleanupOwnedHere = prepCleanup !== undefined;
1810
+ const runPrepCleanupLocally = () => {
1811
+ if (!prepCleanupOwnedHere || !prepCleanup)
1812
+ return;
1813
+ prepCleanupOwnedHere = false;
1814
+ try {
1815
+ prepCleanup();
1816
+ }
1817
+ catch (err) {
1818
+ deps.logger.error(`[${corrId}] codex_request_async outputSchema cleanup threw`, err);
1819
+ }
1820
+ };
1859
1821
  try {
1860
- // Session management (before job start for async)
1861
- let effectiveSessionId = sessionId;
1862
- let useContinue = continueSession;
1863
- const activeSession = await sessionManager.getActiveSession("claude");
1864
- if (!createNewSession && !continueSession && !sessionId && activeSession) {
1865
- effectiveSessionId = activeSession.id;
1866
- useContinue = true;
1822
+ // Pre-start session I/O (async handlers: prevent orphaned jobs)
1823
+ let effectiveSessionId = params.sessionId;
1824
+ if (!params.createNewSession && !params.sessionId) {
1825
+ const activeSession = await deps.sessionManager.getActiveSession("codex");
1826
+ if (activeSession) {
1827
+ effectiveSessionId = activeSession.id;
1828
+ }
1829
+ else {
1830
+ const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
1831
+ effectiveSessionId = newSession.id;
1832
+ }
1867
1833
  }
1868
- if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
1869
- useContinue = true;
1834
+ else if (params.sessionId) {
1835
+ await deps.sessionManager.updateSessionUsage(params.sessionId);
1870
1836
  }
1871
- if (useContinue) {
1872
- args.push("--continue");
1837
+ else if (params.createNewSession) {
1838
+ const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
1839
+ effectiveSessionId = newSession.id;
1873
1840
  }
1874
- else if (effectiveSessionId) {
1875
- args.push("--session-id", effectiveSessionId);
1876
- await sessionManager.updateSessionUsage(effectiveSessionId);
1841
+ // Start job only after all session I/O succeeds. If startJob throws before
1842
+ // registering the record, ownership stays here and we run it in the catch.
1843
+ let job;
1844
+ try {
1845
+ job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup);
1846
+ // Handoff succeeded: AsyncJobManager will fire prepCleanup on terminal
1847
+ // status. Release our local ownership claim so the catch path doesn't
1848
+ // double-fire.
1849
+ prepCleanupOwnedHere = false;
1877
1850
  }
1878
- if (effectiveSessionId) {
1879
- const existingSession = await sessionManager.getSession(effectiveSessionId);
1880
- if (!existingSession) {
1881
- await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
1882
- }
1851
+ catch (startErr) {
1852
+ // startJob never stored the record → manager won't call onComplete. We
1853
+ // still own the cleanup; let the outer catch run it.
1854
+ throw startErr;
1883
1855
  }
1884
- // Idle timeout only for stream-json (text/json produce no output until done)
1885
- const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
1886
- const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat, forceRefresh);
1887
- logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
1856
+ deps.logger.info(`[${corrId}] codex_request_async started job ${job.id}`);
1888
1857
  const asyncResponse = {
1889
1858
  success: true,
1890
1859
  job,
1891
- sessionId: effectiveSessionId || activeSession?.id || null,
1860
+ sessionId: effectiveSessionId || null,
1892
1861
  approval: approvalDecision,
1893
- mcpServers: {
1894
- requested: requestedMcpServers,
1895
- enabled: mcpConfig?.enabled,
1896
- missing: mcpConfig?.missing,
1897
- },
1862
+ mcpServers: { requested: requestedMcpServers },
1898
1863
  };
1899
1864
  if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
1900
1865
  asyncResponse.reviewIntegrity = prep.reviewIntegrity;
@@ -1909,667 +1874,1873 @@ server.tool("claude_request_async", {
1909
1874
  };
1910
1875
  }
1911
1876
  catch (error) {
1912
- return createErrorResponse("claude_request_async", 1, "", corrId, error);
1913
- }
1914
- });
1915
- server.tool("codex_request_async", {
1916
- prompt: z
1917
- .string()
1918
- .min(1, "Prompt cannot be empty")
1919
- .max(100000, "Prompt too long (max 100k chars)")
1920
- .describe("Prompt text for Codex"),
1921
- model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
1922
- fullAuto: z.boolean().default(false).describe("Full-auto mode (sandboxed execution)"),
1923
- dangerouslyBypassApprovalsAndSandbox: z
1924
- .boolean()
1925
- .default(false)
1926
- .describe("Run Codex without approvals/sandbox"),
1927
- approvalStrategy: z
1928
- .enum(["legacy", "mcp_managed"])
1929
- .default("legacy")
1930
- .describe("Approval strategy"),
1931
- approvalPolicy: z
1932
- .enum(["strict", "balanced", "permissive"])
1933
- .optional()
1934
- .describe("Approval policy override"),
1935
- mcpServers: z
1936
- .array(MCP_SERVER_ENUM)
1937
- .default(["sqry"])
1938
- .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
1939
- sessionId: z
1940
- .string()
1941
- .optional()
1942
- .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
1943
- resumeLatest: z
1944
- .boolean()
1945
- .default(false)
1946
- .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
1947
- createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
1948
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1949
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1950
- idleTimeoutMs: z
1951
- .number()
1952
- .int()
1953
- .min(30_000)
1954
- .max(3_600_000)
1955
- .optional()
1956
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1957
- forceRefresh: z
1958
- .boolean()
1959
- .default(false)
1960
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1961
- }, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
1962
- return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger }, {
1963
- prompt,
1964
- model,
1965
- fullAuto,
1966
- dangerouslyBypassApprovalsAndSandbox,
1967
- approvalStrategy,
1968
- approvalPolicy,
1969
- mcpServers,
1970
- sessionId,
1971
- resumeLatest,
1972
- createNewSession,
1973
- correlationId,
1974
- optimizePrompt,
1975
- idleTimeoutMs,
1976
- forceRefresh,
1877
+ // Pre-start failure: either session I/O threw, or startJob threw before
1878
+ // registering the record. In either case the manager will NOT fire
1879
+ // prepCleanup, so we must run it here.
1880
+ runPrepCleanupLocally();
1881
+ return createErrorResponse("codex_request_async", 1, "", corrId, error);
1882
+ }
1883
+ }
1884
+ //──────────────────────────────────────────────────────────────────────────────
1885
+ // Claude Code Tool
1886
+ //──────────────────────────────────────────────────────────────────────────────
1887
+ export function createGatewayServer(deps = {}) {
1888
+ const runtime = resolveGatewayServerRuntime(deps, { isolateState: true });
1889
+ const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger } = runtime;
1890
+ const server = newGatewayMcpServer();
1891
+ registerBaseResources(server, runtime);
1892
+ registerValidationTools(server, { asyncJobManager });
1893
+ server.tool("claude_request", {
1894
+ prompt: z
1895
+ .string()
1896
+ .min(1, "Prompt cannot be empty")
1897
+ .max(100000, "Prompt too long (max 100k chars)")
1898
+ .describe("Prompt text for Claude"),
1899
+ model: z
1900
+ .string()
1901
+ .optional()
1902
+ .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
1903
+ outputFormat: z
1904
+ .enum(["text", "json", "stream-json"])
1905
+ .default("text")
1906
+ .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
1907
+ sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
1908
+ continueSession: z.boolean().default(false).describe("Continue active session"),
1909
+ createNewSession: z.boolean().default(false).describe("Force new session"),
1910
+ allowedTools: z
1911
+ .array(z.string())
1912
+ .optional()
1913
+ .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
1914
+ disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
1915
+ dangerouslySkipPermissions: z
1916
+ .boolean()
1917
+ .default(false)
1918
+ .describe('DEPRECATED: prefer `permissionMode: "bypassPermissions"`. Maps to it when `permissionMode` is unset.'),
1919
+ permissionMode: z
1920
+ .enum(CLAUDE_PERMISSION_MODES)
1921
+ .optional()
1922
+ .describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op (no flag emitted)."),
1923
+ // U25 — Claude high-impact features
1924
+ agent: z
1925
+ .string()
1926
+ .optional()
1927
+ .describe("Claude --agent: dispatch to a named single sub-agent."),
1928
+ agents: z
1929
+ .record(z.record(z.unknown()))
1930
+ .optional()
1931
+ .describe("Claude --agents: inline JSON map of agent name → { description, prompt, tools?, model? }."),
1932
+ forkSession: z
1933
+ .boolean()
1934
+ .optional()
1935
+ .describe("Claude --fork-session: branch from an existing session into a fresh fork."),
1936
+ systemPrompt: z
1937
+ .string()
1938
+ .optional()
1939
+ .describe("Claude --system-prompt: replace the system prompt entirely."),
1940
+ appendSystemPrompt: z
1941
+ .string()
1942
+ .optional()
1943
+ .describe("Claude --append-system-prompt: append to the existing system prompt."),
1944
+ maxBudgetUsd: z
1945
+ .number()
1946
+ .positive()
1947
+ .optional()
1948
+ .describe("Claude --max-budget-usd: spend cap for this request in USD."),
1949
+ maxTurns: z
1950
+ .number()
1951
+ .int()
1952
+ .positive()
1953
+ .optional()
1954
+ .describe("Claude --max-turns: cap on agent loop iterations."),
1955
+ effort: z
1956
+ .enum(CLAUDE_EFFORT_LEVELS)
1957
+ .optional()
1958
+ .describe("Claude --effort: low|medium|high|xhigh|max."),
1959
+ excludeDynamicSystemPromptSections: z
1960
+ .boolean()
1961
+ .optional()
1962
+ .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
1963
+ approvalStrategy: z
1964
+ .enum(["legacy", "mcp_managed"])
1965
+ .default("legacy")
1966
+ .describe("Approval strategy"),
1967
+ approvalPolicy: z
1968
+ .enum(["strict", "balanced", "permissive"])
1969
+ .optional()
1970
+ .describe("Approval policy override"),
1971
+ mcpServers: z
1972
+ .array(MCP_SERVER_ENUM)
1973
+ .default(["sqry"])
1974
+ .describe("MCP servers exposed to Claude"),
1975
+ strictMcpConfig: z
1976
+ .boolean()
1977
+ .default(false)
1978
+ .describe("Restrict Claude to provided MCP config only"),
1979
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1980
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1981
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
1982
+ idleTimeoutMs: z
1983
+ .number()
1984
+ .int()
1985
+ .min(30_000)
1986
+ .max(3_600_000)
1987
+ .optional()
1988
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1989
+ forceRefresh: z
1990
+ .boolean()
1991
+ .default(false)
1992
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1993
+ }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
1994
+ const startTime = Date.now();
1995
+ if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
1996
+ return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
1997
+ }
1998
+ const prep = prepareClaudeRequest({
1999
+ prompt,
2000
+ model,
2001
+ outputFormat,
2002
+ allowedTools,
2003
+ disallowedTools,
2004
+ dangerouslySkipPermissions,
2005
+ permissionMode,
2006
+ approvalStrategy,
2007
+ approvalPolicy,
2008
+ mcpServers,
2009
+ strictMcpConfig,
2010
+ correlationId,
2011
+ optimizePrompt,
2012
+ operation: "claude_request",
2013
+ agent,
2014
+ agents,
2015
+ forkSession,
2016
+ systemPrompt,
2017
+ appendSystemPrompt,
2018
+ maxBudgetUsd,
2019
+ maxTurns,
2020
+ effort,
2021
+ excludeDynamicSystemPromptSections,
2022
+ }, runtime);
2023
+ if (!("args" in prep))
2024
+ return prep;
2025
+ const { corrId, args } = prep;
2026
+ let durationMs = 0;
2027
+ let wasSuccessful = false;
2028
+ safeFlightStart({
2029
+ correlationId: corrId,
2030
+ cli: "claude",
2031
+ model: prep.resolvedModel || "default",
2032
+ prompt,
2033
+ sessionId,
2034
+ }, runtime);
2035
+ logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prompt.length}, sessionId=${sessionId}`);
2036
+ try {
2037
+ // Session management
2038
+ let effectiveSessionId = sessionId;
2039
+ let useContinue = continueSession;
2040
+ const activeSession = await sessionManager.getActiveSession("claude");
2041
+ if (!createNewSession && !continueSession && !sessionId && activeSession) {
2042
+ effectiveSessionId = activeSession.id;
2043
+ useContinue = true;
2044
+ }
2045
+ if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
2046
+ useContinue = true;
2047
+ }
2048
+ if (useContinue) {
2049
+ args.push("--continue");
2050
+ }
2051
+ else if (effectiveSessionId) {
2052
+ args.push("--session-id", effectiveSessionId);
2053
+ await sessionManager.updateSessionUsage(effectiveSessionId);
2054
+ }
2055
+ // Idle timeout only for stream-json (text/json produce no output until done)
2056
+ const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
2057
+ const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh, runtime);
2058
+ // Deferred — job still running, return async reference
2059
+ if (isDeferredResponse(result)) {
2060
+ return buildDeferredToolResponse(result, effectiveSessionId);
2061
+ }
2062
+ const { stdout, stderr, code } = result;
2063
+ durationMs = Math.max(0, Date.now() - startTime);
2064
+ if (code !== 0) {
2065
+ logger.info(`[${corrId}] claude_request failed in ${durationMs}ms`);
2066
+ safeFlightComplete(corrId, {
2067
+ response: stderr || "",
2068
+ durationMs,
2069
+ retryCount: 0,
2070
+ circuitBreakerState: "closed",
2071
+ optimizationApplied: optimizePrompt || optimizeResponse,
2072
+ exitCode: code,
2073
+ errorMessage: stderr || `Exit code ${code}`,
2074
+ status: "failed",
2075
+ }, runtime);
2076
+ return createErrorResponse("claude", code, stderr, corrId);
2077
+ }
2078
+ wasSuccessful = true;
2079
+ // If we used a session ID and it's not tracked yet, create a session record
2080
+ if (effectiveSessionId) {
2081
+ const existingSession = await sessionManager.getSession(effectiveSessionId);
2082
+ if (!existingSession) {
2083
+ await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
2084
+ }
2085
+ }
2086
+ logger.info(`[${corrId}] claude_request completed successfully in ${durationMs}ms`);
2087
+ // Parse stream-json NDJSON output to extract result text
2088
+ if (outputFormat === "stream-json") {
2089
+ const parsed = parseStreamJson(stdout);
2090
+ if (parsed.costUsd !== null) {
2091
+ logger.debug(`[${corrId}] stream-json cost=$${parsed.costUsd}, model=${parsed.model}, turns=${parsed.numTurns}`);
2092
+ }
2093
+ safeFlightComplete(corrId, {
2094
+ response: parsed.text,
2095
+ inputTokens: parsed.usage?.inputTokens,
2096
+ outputTokens: parsed.usage?.outputTokens,
2097
+ cacheReadTokens: parsed.usage?.cacheReadInputTokens || undefined,
2098
+ cacheCreationTokens: parsed.usage?.cacheCreationInputTokens || undefined,
2099
+ durationMs,
2100
+ retryCount: 0,
2101
+ circuitBreakerState: "closed",
2102
+ costUsd: parsed.costUsd ?? undefined,
2103
+ optimizationApplied: optimizePrompt || optimizeResponse,
2104
+ exitCode: 0,
2105
+ status: "completed",
2106
+ }, runtime);
2107
+ return buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
2108
+ }
2109
+ safeFlightComplete(corrId, {
2110
+ response: stdout,
2111
+ durationMs,
2112
+ retryCount: 0,
2113
+ circuitBreakerState: "closed",
2114
+ optimizationApplied: optimizePrompt || optimizeResponse,
2115
+ exitCode: 0,
2116
+ status: "completed",
2117
+ }, runtime);
2118
+ return buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
2119
+ }
2120
+ catch (error) {
2121
+ const elapsedMs = Math.max(0, Date.now() - startTime);
2122
+ logger.info(`[${corrId}] claude_request threw exception after ${elapsedMs}ms`);
2123
+ safeFlightComplete(corrId, {
2124
+ response: "",
2125
+ durationMs: elapsedMs,
2126
+ retryCount: 0,
2127
+ circuitBreakerState: "closed",
2128
+ optimizationApplied: optimizePrompt || optimizeResponse,
2129
+ exitCode: 1,
2130
+ errorMessage: error.message,
2131
+ status: "failed",
2132
+ }, runtime);
2133
+ return createErrorResponse("claude", 1, "", corrId, error);
2134
+ }
2135
+ finally {
2136
+ const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
2137
+ performanceMetrics.recordRequest("claude", finalizedDurationMs, wasSuccessful);
2138
+ }
1977
2139
  });
1978
- });
1979
- server.tool("gemini_request_async", {
1980
- prompt: z
1981
- .string()
1982
- .min(1, "Prompt cannot be empty")
1983
- .max(100000, "Prompt too long (max 100k chars)")
1984
- .describe("Prompt text for Gemini"),
1985
- model: z
1986
- .string()
1987
- .optional()
1988
- .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
1989
- sessionId: z.string().optional().describe("Session ID (user-provided CLI handle for --resume)"),
1990
- resumeLatest: z.boolean().default(false).describe("Resume latest session"),
1991
- createNewSession: z.boolean().default(false).describe("Force new session"),
1992
- approvalMode: z
1993
- .enum(["default", "auto_edit", "yolo"])
1994
- .optional()
1995
- .describe("Approval: default|auto_edit|yolo"),
1996
- approvalStrategy: z
1997
- .enum(["legacy", "mcp_managed"])
1998
- .default("legacy")
1999
- .describe("Approval strategy"),
2000
- approvalPolicy: z
2001
- .enum(["strict", "balanced", "permissive"])
2002
- .optional()
2003
- .describe("Approval policy override"),
2004
- mcpServers: z
2005
- .array(MCP_SERVER_ENUM)
2006
- .default(["sqry"])
2007
- .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
2008
- allowedTools: z
2009
- .array(z.string())
2010
- .optional()
2011
- .describe("Allowed tools (['Write','Edit','Bash'])"),
2012
- includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
2013
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2014
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2015
- idleTimeoutMs: z
2016
- .number()
2017
- .int()
2018
- .min(30_000)
2019
- .max(3_600_000)
2020
- .optional()
2021
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2022
- forceRefresh: z
2023
- .boolean()
2024
- .default(false)
2025
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2026
- }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
2027
- return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger }, {
2028
- prompt,
2029
- model,
2030
- sessionId,
2031
- resumeLatest,
2032
- createNewSession,
2033
- approvalMode,
2034
- approvalStrategy,
2035
- approvalPolicy,
2036
- mcpServers,
2037
- allowedTools,
2038
- includeDirs,
2039
- correlationId,
2040
- optimizePrompt,
2041
- idleTimeoutMs,
2042
- forceRefresh,
2140
+ //──────────────────────────────────────────────────────────────────────────────
2141
+ // Codex Tool
2142
+ //──────────────────────────────────────────────────────────────────────────────
2143
+ server.tool("codex_request", {
2144
+ prompt: z
2145
+ .string()
2146
+ .min(1, "Prompt cannot be empty")
2147
+ .max(100000, "Prompt too long (max 100k chars)")
2148
+ .describe("Prompt text for Codex"),
2149
+ model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
2150
+ fullAuto: z
2151
+ .boolean()
2152
+ .default(false)
2153
+ .describe("DEPRECATED: prefer `sandboxMode` + `askForApproval`. Expands to `--sandbox workspace-write --ask-for-approval never`."),
2154
+ sandboxMode: z
2155
+ .enum(CODEX_SANDBOX_MODES)
2156
+ .optional()
2157
+ .describe("Codex --sandbox: read-only|workspace-write|danger-full-access."),
2158
+ askForApproval: z
2159
+ .enum(CODEX_ASK_FOR_APPROVAL_MODES)
2160
+ .optional()
2161
+ .describe("Codex --ask-for-approval: untrusted|on-request|never."),
2162
+ useLegacyFullAutoFlag: z
2163
+ .boolean()
2164
+ .default(false)
2165
+ .describe("Escape hatch: emit `--full-auto` directly instead of expanding (deprecated)."),
2166
+ dangerouslyBypassApprovalsAndSandbox: z
2167
+ .boolean()
2168
+ .default(false)
2169
+ .describe("Run Codex without approvals/sandbox"),
2170
+ approvalStrategy: z
2171
+ .enum(["legacy", "mcp_managed"])
2172
+ .default("legacy")
2173
+ .describe("Approval strategy"),
2174
+ approvalPolicy: z
2175
+ .enum(["strict", "balanced", "permissive"])
2176
+ .optional()
2177
+ .describe("Approval policy override"),
2178
+ mcpServers: z
2179
+ .array(MCP_SERVER_ENUM)
2180
+ .default(["sqry"])
2181
+ .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
2182
+ sessionId: z
2183
+ .string()
2184
+ .optional()
2185
+ .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
2186
+ resumeLatest: z
2187
+ .boolean()
2188
+ .default(false)
2189
+ .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
2190
+ createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
2191
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2192
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2193
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
2194
+ idleTimeoutMs: z
2195
+ .number()
2196
+ .int()
2197
+ .min(30_000)
2198
+ .max(3_600_000)
2199
+ .optional()
2200
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2201
+ forceRefresh: z
2202
+ .boolean()
2203
+ .default(false)
2204
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2205
+ // U23: emit `--json` so the codex-json-parser surfaces input/output/cache
2206
+ // tokens (and any cost) through extractUsageAndCost. Without "json", the
2207
+ // parser is unreachable and Codex usage is never reported.
2208
+ outputFormat: z
2209
+ .enum(["text", "json"])
2210
+ .default("text")
2211
+ .describe("Codex output format. `json` emits --json (JSONL events) so token usage and cost are parsed and reported in the flight recorder. `text` is the default."),
2212
+ // U26: high-impact feature flags. All optional.
2213
+ outputSchema: z
2214
+ .union([z.string(), z.record(z.unknown())])
2215
+ .optional()
2216
+ .describe("Codex --output-schema. Pass a path (string) or an inline JSON Schema object; object is materialised to a 0o600 temp file under os.tmpdir() and deleted after the run."),
2217
+ search: z.boolean().optional().describe("Emit Codex --search to enable web search."),
2218
+ profile: z
2219
+ .string()
2220
+ .optional()
2221
+ .describe("Codex --profile <name>: select a profile from ~/.codex/config.toml."),
2222
+ configOverrides: CODEX_CONFIG_OVERRIDES_SCHEMA.describe("Codex -c key=value overrides. Keys: /^[a-zA-Z0-9._]+$/. Values: no CR/LF."),
2223
+ ephemeral: z
2224
+ .boolean()
2225
+ .optional()
2226
+ .describe("Codex --ephemeral: do not persist the session to disk."),
2227
+ images: z
2228
+ .array(z.string())
2229
+ .optional()
2230
+ .describe("Codex -i <path>: image attachments. Each path must exist; missing paths fail fast."),
2231
+ ignoreUserConfig: z
2232
+ .boolean()
2233
+ .optional()
2234
+ .describe("Codex --ignore-user-config: ignore ~/.codex/config.toml for this run."),
2235
+ ignoreRules: z
2236
+ .boolean()
2237
+ .optional()
2238
+ .describe("Codex --ignore-rules: skip project rule files for this run."),
2239
+ }, async ({ prompt, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
2240
+ const startTime = Date.now();
2241
+ const prep = prepareCodexRequest({
2242
+ prompt,
2243
+ model,
2244
+ fullAuto,
2245
+ sandboxMode,
2246
+ askForApproval,
2247
+ useLegacyFullAutoFlag,
2248
+ dangerouslyBypassApprovalsAndSandbox,
2249
+ approvalStrategy,
2250
+ approvalPolicy,
2251
+ mcpServers,
2252
+ sessionId,
2253
+ resumeLatest,
2254
+ createNewSession,
2255
+ correlationId,
2256
+ optimizePrompt,
2257
+ operation: "codex_request",
2258
+ outputFormat,
2259
+ outputSchema,
2260
+ search,
2261
+ profile,
2262
+ configOverrides,
2263
+ ephemeral,
2264
+ images,
2265
+ ignoreUserConfig,
2266
+ ignoreRules,
2267
+ }, runtime);
2268
+ if (!("args" in prep))
2269
+ return prep;
2270
+ const { corrId, args } = prep;
2271
+ let durationMs = 0;
2272
+ let wasSuccessful = false;
2273
+ safeFlightStart({
2274
+ correlationId: corrId,
2275
+ cli: "codex",
2276
+ model: prep.resolvedModel || "default",
2277
+ prompt,
2278
+ sessionId,
2279
+ }, runtime);
2280
+ logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prompt.length}`);
2281
+ // U26 fix: pass the outputSchema cleanup to awaitJobOrDefer, which
2282
+ // guarantees the cleanup runs exactly once — inline for direct
2283
+ // execution, on terminal status for the job-backed path (sync
2284
+ // completion or deferred). The outer finally MUST NOT clean again.
2285
+ const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
2286
+ try {
2287
+ const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), outputFormat, forceRefresh, runtime, undefined, prepCleanup);
2288
+ // Deferred — job still running, return async reference. Cleanup
2289
+ // ownership belongs to AsyncJobManager via onComplete.
2290
+ if (isDeferredResponse(result)) {
2291
+ return buildDeferredToolResponse(result, sessionId);
2292
+ }
2293
+ const { stdout, stderr, code } = result;
2294
+ durationMs = Math.max(0, Date.now() - startTime);
2295
+ if (code !== 0) {
2296
+ logger.info(`[${corrId}] codex_request failed in ${durationMs}ms`);
2297
+ safeFlightComplete(corrId, {
2298
+ response: stderr || "",
2299
+ durationMs,
2300
+ retryCount: 0,
2301
+ circuitBreakerState: "closed",
2302
+ optimizationApplied: optimizePrompt || optimizeResponse,
2303
+ exitCode: code,
2304
+ errorMessage: stderr || `Exit code ${code}`,
2305
+ status: "failed",
2306
+ }, runtime);
2307
+ return createErrorResponse("codex", code, stderr, corrId);
2308
+ }
2309
+ wasSuccessful = true;
2310
+ // Track session usage
2311
+ let effectiveSessionId = sessionId;
2312
+ if (!createNewSession && !sessionId) {
2313
+ const activeSession = await sessionManager.getActiveSession("codex");
2314
+ if (activeSession) {
2315
+ effectiveSessionId = activeSession.id;
2316
+ }
2317
+ else {
2318
+ const newSession = await sessionManager.createSession("codex", "Codex Session");
2319
+ effectiveSessionId = newSession.id;
2320
+ }
2321
+ }
2322
+ else if (sessionId) {
2323
+ await sessionManager.updateSessionUsage(sessionId);
2324
+ }
2325
+ else if (createNewSession) {
2326
+ const newSession = await sessionManager.createSession("codex", "Codex Session");
2327
+ effectiveSessionId = newSession.id;
2328
+ }
2329
+ logger.info(`[${corrId}] codex_request completed successfully in ${durationMs}ms`);
2330
+ const codexUsage = extractUsageAndCost("codex", stdout, outputFormat);
2331
+ safeFlightComplete(corrId, {
2332
+ response: stdout,
2333
+ durationMs,
2334
+ retryCount: 0,
2335
+ circuitBreakerState: "closed",
2336
+ optimizationApplied: optimizePrompt || optimizeResponse,
2337
+ exitCode: 0,
2338
+ status: "completed",
2339
+ inputTokens: codexUsage.inputTokens,
2340
+ outputTokens: codexUsage.outputTokens,
2341
+ cacheReadTokens: codexUsage.cacheReadTokens,
2342
+ cacheCreationTokens: codexUsage.cacheCreationTokens,
2343
+ costUsd: codexUsage.costUsd,
2344
+ }, runtime);
2345
+ return buildCliResponse("codex", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
2346
+ }
2347
+ catch (error) {
2348
+ const elapsedMs = Math.max(0, Date.now() - startTime);
2349
+ logger.info(`[${corrId}] codex_request threw exception after ${elapsedMs}ms`);
2350
+ safeFlightComplete(corrId, {
2351
+ response: "",
2352
+ durationMs: elapsedMs,
2353
+ retryCount: 0,
2354
+ circuitBreakerState: "closed",
2355
+ optimizationApplied: optimizePrompt || optimizeResponse,
2356
+ exitCode: 1,
2357
+ errorMessage: error.message,
2358
+ status: "failed",
2359
+ }, runtime);
2360
+ return createErrorResponse("codex", 1, "", corrId, error);
2361
+ }
2362
+ finally {
2363
+ const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
2364
+ performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
2365
+ // Cleanup is owned by awaitJobOrDefer's contract; nothing to do here.
2366
+ }
2043
2367
  });
2044
- });
2045
- server.tool("grok_request_async", {
2046
- prompt: z
2047
- .string()
2048
- .min(1, "Prompt cannot be empty")
2049
- .max(100000, "Prompt too long (max 100k chars)")
2050
- .describe("Prompt text for Grok"),
2051
- model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
2052
- outputFormat: z
2053
- .enum(["plain", "json", "streaming-json"])
2054
- .optional()
2055
- .describe("Output format (plain|json|streaming-json). Grok default is plain."),
2056
- sessionId: z.string().optional().describe("Session ID (user-provided CLI handle for --resume)"),
2057
- resumeLatest: z
2058
- .boolean()
2059
- .default(false)
2060
- .describe("Resume most recent Grok session in cwd (--continue)"),
2061
- createNewSession: z.boolean().default(false).describe("Force new session"),
2062
- alwaysApprove: z
2063
- .boolean()
2064
- .default(false)
2065
- .describe("Auto-approve all tool executions (--always-approve)"),
2066
- permissionMode: z
2067
- .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
2068
- .optional()
2069
- .describe("Grok permission mode"),
2070
- effort: z
2071
- .enum(["low", "medium", "high", "xhigh", "max"])
2072
- .optional()
2073
- .describe("Grok effort level"),
2074
- reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
2075
- approvalStrategy: z
2076
- .enum(["legacy", "mcp_managed"])
2077
- .default("legacy")
2078
- .describe("Approval strategy"),
2079
- approvalPolicy: z
2080
- .enum(["strict", "balanced", "permissive"])
2081
- .optional()
2082
- .describe("Approval policy override"),
2083
- mcpServers: z
2084
- .array(MCP_SERVER_ENUM)
2085
- .default(["sqry"])
2086
- .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
2087
- allowedTools: z
2088
- .array(z.string())
2089
- .optional()
2090
- .describe("Allowed built-in tools (passed as --tools comma list)"),
2091
- disallowedTools: z
2092
- .array(z.string())
2093
- .optional()
2094
- .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
2095
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2096
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2097
- idleTimeoutMs: z
2098
- .number()
2099
- .int()
2100
- .min(30_000)
2101
- .max(3_600_000)
2102
- .optional()
2103
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2104
- forceRefresh: z
2105
- .boolean()
2106
- .default(false)
2107
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2108
- }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
2109
- return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger }, {
2110
- prompt,
2111
- model,
2112
- outputFormat,
2113
- sessionId,
2114
- resumeLatest,
2115
- createNewSession,
2116
- alwaysApprove,
2117
- permissionMode,
2118
- effort,
2119
- reasoningEffort,
2120
- approvalStrategy,
2121
- approvalPolicy,
2122
- mcpServers,
2123
- allowedTools,
2124
- disallowedTools,
2125
- correlationId,
2126
- optimizePrompt,
2127
- idleTimeoutMs,
2128
- forceRefresh,
2368
+ //──────────────────────────────────────────────────────────────────────────────
2369
+ // U26: codex_fork_session — `codex fork <SESSION_ID|--last> <prompt>`
2370
+ //──────────────────────────────────────────────────────────────────────────────
2371
+ server.tool("codex_fork_session", {
2372
+ prompt: z
2373
+ .string()
2374
+ .min(1, "Prompt cannot be empty")
2375
+ .max(100000, "Prompt too long (max 100k chars)")
2376
+ .describe("Prompt text for the forked Codex session"),
2377
+ sessionId: z
2378
+ .string()
2379
+ .optional()
2380
+ .describe("Codex session UUID to fork from. Mutually exclusive with `forkLast`."),
2381
+ forkLast: z
2382
+ .boolean()
2383
+ .optional()
2384
+ .describe("Fork from the most recent Codex session. Mutually exclusive with `sessionId`."),
2385
+ model: z.string().optional().describe("Model name or alias (e.g. gpt-5.5, latest)"),
2386
+ sandboxMode: z
2387
+ .enum(CODEX_SANDBOX_MODES)
2388
+ .optional()
2389
+ .describe("Codex --sandbox: read-only|workspace-write|danger-full-access."),
2390
+ askForApproval: z
2391
+ .enum(CODEX_ASK_FOR_APPROVAL_MODES)
2392
+ .optional()
2393
+ .describe("Codex --ask-for-approval: untrusted|on-request|never."),
2394
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2395
+ idleTimeoutMs: z
2396
+ .number()
2397
+ .int()
2398
+ .min(30_000)
2399
+ .max(3_600_000)
2400
+ .optional()
2401
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2402
+ }, async ({ prompt, sessionId, forkLast, model, sandboxMode, askForApproval, correlationId, idleTimeoutMs, }) => {
2403
+ const corrId = correlationId || randomUUID();
2404
+ const startTime = Date.now();
2405
+ let durationMs = 0;
2406
+ let wasSuccessful = false;
2407
+ // Enforce mutual exclusion at tool boundary (Zod records the params but
2408
+ // the SDK's `.tool(...)` does not accept top-level refines).
2409
+ if (sessionId && forkLast) {
2410
+ return createErrorResponse("codex_fork_session", 1, "", corrId, new Error("sessionId and forkLast are mutually exclusive"));
2411
+ }
2412
+ if (!sessionId && !forkLast) {
2413
+ return createErrorResponse("codex_fork_session", 1, "", corrId, new Error("one of sessionId or forkLast is required"));
2414
+ }
2415
+ let forkArgs;
2416
+ try {
2417
+ forkArgs = prepareCodexForkRequest({ prompt, sessionId, forkLast }).args;
2418
+ }
2419
+ catch (err) {
2420
+ return createErrorResponse("codex_fork_session", 1, "", corrId, err);
2421
+ }
2422
+ const cliInfo = getCliInfo();
2423
+ const resolvedModel = resolveModelAlias("codex", model, cliInfo);
2424
+ // Compose argv: forkArgs already starts with `fork`. Inject model and
2425
+ // sandbox/approval flags BEFORE the positional <sessionId|--last> +
2426
+ // prompt to keep them as flags rather than positionals. forkArgs layout
2427
+ // is either ["fork", "--last", prompt] or ["fork", sessionId, prompt];
2428
+ // we splice flags right after "fork".
2429
+ const flagSegment = [];
2430
+ if (resolvedModel)
2431
+ flagSegment.push("--model", resolvedModel);
2432
+ const sandboxFlags = resolveCodexSandboxFlags({
2433
+ sandboxMode,
2434
+ askForApproval,
2435
+ });
2436
+ if (sandboxFlags.warning) {
2437
+ logger.warn(`[${corrId}] ${sandboxFlags.warning}`);
2438
+ }
2439
+ flagSegment.push(...sandboxFlags.args);
2440
+ const finalArgs = [forkArgs[0], ...flagSegment, ...forkArgs.slice(1)];
2441
+ logger.info(`[${corrId}] codex_fork_session invoked (forkLast=${Boolean(forkLast)}, sessionId=${sessionId ? "set" : "unset"})`);
2442
+ try {
2443
+ const result = await awaitJobOrDefer("codex", finalArgs, corrId, resolveIdleTimeout("codex", idleTimeoutMs), undefined, false, runtime);
2444
+ if (isDeferredResponse(result)) {
2445
+ return buildDeferredToolResponse(result, sessionId);
2446
+ }
2447
+ const { stdout, stderr, code } = result;
2448
+ durationMs = Math.max(0, Date.now() - startTime);
2449
+ if (code !== 0) {
2450
+ return createErrorResponse("codex", code, stderr, corrId);
2451
+ }
2452
+ wasSuccessful = true;
2453
+ return {
2454
+ content: [{ type: "text", text: stdout }],
2455
+ };
2456
+ }
2457
+ catch (error) {
2458
+ return createErrorResponse("codex_fork_session", 1, "", corrId, error);
2459
+ }
2460
+ finally {
2461
+ const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
2462
+ performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
2463
+ }
2129
2464
  });
2130
- });
2131
- server.tool("llm_job_status", {
2132
- jobId: z.string().describe("Async job ID from *_request_async"),
2133
- }, async ({ jobId }) => {
2134
- const job = asyncJobManager.getJobSnapshot(jobId);
2135
- if (!job) {
2136
- return {
2137
- content: [
2138
- {
2139
- type: "text",
2140
- text: JSON.stringify({
2141
- success: false,
2142
- error: "Job not found",
2143
- jobId,
2144
- }, null, 2),
2465
+ //──────────────────────────────────────────────────────────────────────────────
2466
+ // Gemini Tool
2467
+ //──────────────────────────────────────────────────────────────────────────────
2468
+ server.tool("gemini_request", {
2469
+ prompt: z
2470
+ .string()
2471
+ .min(1, "Prompt cannot be empty")
2472
+ .max(100000, "Prompt too long (max 100k chars)")
2473
+ .describe("Prompt text for Gemini"),
2474
+ model: z
2475
+ .string()
2476
+ .optional()
2477
+ .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
2478
+ sessionId: z.string().optional().describe("Session ID or 'latest'"),
2479
+ resumeLatest: z.boolean().default(false).describe("Resume latest session"),
2480
+ createNewSession: z.boolean().default(false).describe("Force new session"),
2481
+ approvalMode: z
2482
+ .enum(GEMINI_APPROVAL_MODES)
2483
+ .optional()
2484
+ .describe("Approval: default|auto_edit|yolo|plan"),
2485
+ approvalStrategy: z
2486
+ .enum(["legacy", "mcp_managed"])
2487
+ .default("legacy")
2488
+ .describe("Approval strategy"),
2489
+ approvalPolicy: z
2490
+ .enum(["strict", "balanced", "permissive"])
2491
+ .optional()
2492
+ .describe("Approval policy override"),
2493
+ mcpServers: z
2494
+ .array(MCP_SERVER_ENUM)
2495
+ .default(["sqry"])
2496
+ .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
2497
+ allowedTools: z
2498
+ .array(z.string())
2499
+ .optional()
2500
+ .describe("Allowed tools (['Write','Edit','Bash'])"),
2501
+ includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
2502
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2503
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2504
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
2505
+ idleTimeoutMs: z
2506
+ .number()
2507
+ .int()
2508
+ .min(30_000)
2509
+ .max(3_600_000)
2510
+ .optional()
2511
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2512
+ forceRefresh: z
2513
+ .boolean()
2514
+ .default(false)
2515
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2516
+ // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
2517
+ // remains text so existing callers see no behavior change.
2518
+ outputFormat: z
2519
+ .enum(["text", "json"])
2520
+ .default("text")
2521
+ .describe("Gemini output format. `json` emits `-o json` so usageMetadata is parsed and reported."),
2522
+ sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
2523
+ policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
2524
+ adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
2525
+ attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
2526
+ }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
2527
+ return handleGeminiRequest({ sessionManager, logger, runtime }, {
2528
+ prompt,
2529
+ model,
2530
+ sessionId,
2531
+ resumeLatest,
2532
+ createNewSession,
2533
+ approvalMode,
2534
+ approvalStrategy,
2535
+ approvalPolicy,
2536
+ mcpServers,
2537
+ allowedTools,
2538
+ includeDirs,
2539
+ correlationId,
2540
+ optimizePrompt,
2541
+ optimizeResponse,
2542
+ idleTimeoutMs,
2543
+ forceRefresh,
2544
+ outputFormat,
2545
+ sandbox,
2546
+ policyFiles,
2547
+ adminPolicyFiles,
2548
+ attachments,
2549
+ });
2550
+ });
2551
+ //──────────────────────────────────────────────────────────────────────────────
2552
+ // Grok Tool
2553
+ //──────────────────────────────────────────────────────────────────────────────
2554
+ server.tool("grok_request", {
2555
+ prompt: z
2556
+ .string()
2557
+ .min(1, "Prompt cannot be empty")
2558
+ .max(100000, "Prompt too long (max 100k chars)")
2559
+ .describe("Prompt text for Grok"),
2560
+ model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
2561
+ outputFormat: z
2562
+ .enum(["plain", "json", "streaming-json"])
2563
+ .optional()
2564
+ .describe("Output format (plain|json|streaming-json). Grok default is plain."),
2565
+ sessionId: z
2566
+ .string()
2567
+ .optional()
2568
+ .describe("Session ID (user-provided CLI handle for --resume)"),
2569
+ resumeLatest: z
2570
+ .boolean()
2571
+ .default(false)
2572
+ .describe("Resume most recent Grok session in cwd (--continue)"),
2573
+ createNewSession: z.boolean().default(false).describe("Force new session"),
2574
+ alwaysApprove: z
2575
+ .boolean()
2576
+ .default(false)
2577
+ .describe("Auto-approve all tool executions (--always-approve)"),
2578
+ permissionMode: z
2579
+ .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
2580
+ .optional()
2581
+ .describe("Grok permission mode"),
2582
+ effort: z
2583
+ .enum(["low", "medium", "high", "xhigh", "max"])
2584
+ .optional()
2585
+ .describe("Grok effort level"),
2586
+ reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
2587
+ approvalStrategy: z
2588
+ .enum(["legacy", "mcp_managed"])
2589
+ .default("legacy")
2590
+ .describe("Approval strategy"),
2591
+ approvalPolicy: z
2592
+ .enum(["strict", "balanced", "permissive"])
2593
+ .optional()
2594
+ .describe("Approval policy override"),
2595
+ mcpServers: z
2596
+ .array(MCP_SERVER_ENUM)
2597
+ .default(["sqry"])
2598
+ .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
2599
+ allowedTools: z
2600
+ .array(z.string())
2601
+ .optional()
2602
+ .describe("Allowed built-in tools (passed as --tools comma list)"),
2603
+ disallowedTools: z
2604
+ .array(z.string())
2605
+ .optional()
2606
+ .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
2607
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2608
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2609
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
2610
+ idleTimeoutMs: z
2611
+ .number()
2612
+ .int()
2613
+ .min(30_000)
2614
+ .max(3_600_000)
2615
+ .optional()
2616
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2617
+ forceRefresh: z
2618
+ .boolean()
2619
+ .default(false)
2620
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2621
+ }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2622
+ return handleGrokRequest({ sessionManager, logger, runtime }, {
2623
+ prompt,
2624
+ model,
2625
+ outputFormat,
2626
+ sessionId,
2627
+ resumeLatest,
2628
+ createNewSession,
2629
+ alwaysApprove,
2630
+ permissionMode,
2631
+ effort,
2632
+ reasoningEffort,
2633
+ approvalStrategy,
2634
+ approvalPolicy,
2635
+ mcpServers,
2636
+ allowedTools,
2637
+ disallowedTools,
2638
+ correlationId,
2639
+ optimizePrompt,
2640
+ optimizeResponse,
2641
+ idleTimeoutMs,
2642
+ forceRefresh,
2643
+ });
2644
+ });
2645
+ //──────────────────────────────────────────────────────────────────────────────
2646
+ // Mistral Vibe Tool
2647
+ //──────────────────────────────────────────────────────────────────────────────
2648
+ server.tool("mistral_request", {
2649
+ prompt: z
2650
+ .string()
2651
+ .min(1, "Prompt cannot be empty")
2652
+ .max(100000, "Prompt too long (max 100k chars)")
2653
+ .describe("Prompt text for Mistral Vibe"),
2654
+ model: z
2655
+ .string()
2656
+ .optional()
2657
+ .describe("Model alias (e.g. devstral-medium, devstral-large, latest). Resolved alias is injected via VIBE_ACTIVE_MODEL env var — Vibe has no --model flag."),
2658
+ outputFormat: z
2659
+ .enum(["plain", "json", "stream-json"])
2660
+ .optional()
2661
+ .describe("Output format (plain|json|stream-json). Vibe default is plain."),
2662
+ sessionId: z
2663
+ .string()
2664
+ .optional()
2665
+ .describe("Session ID (user-provided CLI handle for --resume). Requires [session_logging] enabled = true in ~/.vibe/config.toml."),
2666
+ resumeLatest: z
2667
+ .boolean()
2668
+ .default(false)
2669
+ .describe("Resume most recent Vibe session in cwd (--continue)"),
2670
+ createNewSession: z.boolean().default(false).describe("Force new session"),
2671
+ permissionMode: z
2672
+ .enum(MISTRAL_AGENT_MODES)
2673
+ .optional()
2674
+ .describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
2675
+ effort: z
2676
+ .enum(["low", "medium", "high", "xhigh", "max"])
2677
+ .optional()
2678
+ .describe("Vibe effort level"),
2679
+ reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
2680
+ approvalStrategy: z
2681
+ .enum(["legacy", "mcp_managed"])
2682
+ .default("legacy")
2683
+ .describe("Approval strategy"),
2684
+ approvalPolicy: z
2685
+ .enum(["strict", "balanced", "permissive"])
2686
+ .optional()
2687
+ .describe("Approval policy override"),
2688
+ mcpServers: z
2689
+ .array(MCP_SERVER_ENUM)
2690
+ .default(["sqry"])
2691
+ .describe("MCP server names for approval tracking (Vibe manages its own MCP config via `vibe mcp`)"),
2692
+ allowedTools: z
2693
+ .array(z.string())
2694
+ .optional()
2695
+ .describe("Allowlist of built-in tools — each emitted as a separate --enabled-tools <tool> flag"),
2696
+ disallowedTools: z
2697
+ .array(z.string())
2698
+ .optional()
2699
+ .describe("Accepted for caller parity; Vibe has no deny-list flag, so values are ignored (a warning is logged)."),
2700
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2701
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2702
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
2703
+ idleTimeoutMs: z
2704
+ .number()
2705
+ .int()
2706
+ .min(30_000)
2707
+ .max(3_600_000)
2708
+ .optional()
2709
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2710
+ forceRefresh: z
2711
+ .boolean()
2712
+ .default(false)
2713
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2714
+ }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2715
+ return handleMistralRequest({ sessionManager, logger, runtime }, {
2716
+ prompt,
2717
+ model,
2718
+ outputFormat,
2719
+ sessionId,
2720
+ resumeLatest,
2721
+ createNewSession,
2722
+ permissionMode,
2723
+ effort,
2724
+ reasoningEffort,
2725
+ approvalStrategy,
2726
+ approvalPolicy,
2727
+ mcpServers,
2728
+ allowedTools,
2729
+ disallowedTools,
2730
+ correlationId,
2731
+ optimizePrompt,
2732
+ optimizeResponse,
2733
+ idleTimeoutMs,
2734
+ forceRefresh,
2735
+ });
2736
+ });
2737
+ //──────────────────────────────────────────────────────────────────────────────
2738
+ // Async Long-Running Job Tools (No Time-Bound LLM Execution)
2739
+ //──────────────────────────────────────────────────────────────────────────────
2740
+ server.tool("claude_request_async", {
2741
+ prompt: z
2742
+ .string()
2743
+ .min(1, "Prompt cannot be empty")
2744
+ .max(100000, "Prompt too long (max 100k chars)")
2745
+ .describe("Prompt text for Claude"),
2746
+ model: z
2747
+ .string()
2748
+ .optional()
2749
+ .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
2750
+ outputFormat: z
2751
+ .enum(["text", "json", "stream-json"])
2752
+ .default("text")
2753
+ .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
2754
+ sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
2755
+ continueSession: z.boolean().default(false).describe("Continue active session"),
2756
+ createNewSession: z.boolean().default(false).describe("Force new session"),
2757
+ allowedTools: z
2758
+ .array(z.string())
2759
+ .optional()
2760
+ .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
2761
+ disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
2762
+ dangerouslySkipPermissions: z
2763
+ .boolean()
2764
+ .default(false)
2765
+ .describe('DEPRECATED: prefer `permissionMode: "bypassPermissions"`. Maps to it when `permissionMode` is unset.'),
2766
+ permissionMode: z
2767
+ .enum(CLAUDE_PERMISSION_MODES)
2768
+ .optional()
2769
+ .describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op."),
2770
+ // U25 — Claude high-impact features
2771
+ agent: z
2772
+ .string()
2773
+ .optional()
2774
+ .describe("Claude --agent: dispatch to a named single sub-agent."),
2775
+ agents: z
2776
+ .record(z.record(z.unknown()))
2777
+ .optional()
2778
+ .describe("Claude --agents: inline JSON map of agent name → { description, prompt, tools?, model? }."),
2779
+ forkSession: z
2780
+ .boolean()
2781
+ .optional()
2782
+ .describe("Claude --fork-session: branch from an existing session into a fresh fork."),
2783
+ systemPrompt: z
2784
+ .string()
2785
+ .optional()
2786
+ .describe("Claude --system-prompt: replace the system prompt entirely."),
2787
+ appendSystemPrompt: z
2788
+ .string()
2789
+ .optional()
2790
+ .describe("Claude --append-system-prompt: append to the existing system prompt."),
2791
+ maxBudgetUsd: z
2792
+ .number()
2793
+ .positive()
2794
+ .optional()
2795
+ .describe("Claude --max-budget-usd: spend cap for this request in USD."),
2796
+ maxTurns: z
2797
+ .number()
2798
+ .int()
2799
+ .positive()
2800
+ .optional()
2801
+ .describe("Claude --max-turns: cap on agent loop iterations."),
2802
+ effort: z
2803
+ .enum(CLAUDE_EFFORT_LEVELS)
2804
+ .optional()
2805
+ .describe("Claude --effort: low|medium|high|xhigh|max."),
2806
+ excludeDynamicSystemPromptSections: z
2807
+ .boolean()
2808
+ .optional()
2809
+ .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
2810
+ approvalStrategy: z
2811
+ .enum(["legacy", "mcp_managed"])
2812
+ .default("legacy")
2813
+ .describe("Approval strategy"),
2814
+ approvalPolicy: z
2815
+ .enum(["strict", "balanced", "permissive"])
2816
+ .optional()
2817
+ .describe("Approval policy override"),
2818
+ mcpServers: z
2819
+ .array(MCP_SERVER_ENUM)
2820
+ .default(["sqry"])
2821
+ .describe("MCP servers exposed to Claude"),
2822
+ strictMcpConfig: z
2823
+ .boolean()
2824
+ .default(false)
2825
+ .describe("Restrict Claude to provided MCP config only"),
2826
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2827
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2828
+ idleTimeoutMs: z
2829
+ .number()
2830
+ .int()
2831
+ .min(30_000)
2832
+ .max(3_600_000)
2833
+ .optional()
2834
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2835
+ forceRefresh: z
2836
+ .boolean()
2837
+ .default(false)
2838
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2839
+ }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
2840
+ if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
2841
+ return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
2842
+ }
2843
+ const prep = prepareClaudeRequest({
2844
+ prompt,
2845
+ model,
2846
+ outputFormat,
2847
+ allowedTools,
2848
+ disallowedTools,
2849
+ dangerouslySkipPermissions,
2850
+ permissionMode,
2851
+ approvalStrategy,
2852
+ approvalPolicy,
2853
+ mcpServers,
2854
+ strictMcpConfig,
2855
+ correlationId,
2856
+ optimizePrompt,
2857
+ operation: "claude_request_async",
2858
+ agent,
2859
+ agents,
2860
+ forkSession,
2861
+ systemPrompt,
2862
+ appendSystemPrompt,
2863
+ maxBudgetUsd,
2864
+ maxTurns,
2865
+ effort,
2866
+ excludeDynamicSystemPromptSections,
2867
+ }, runtime);
2868
+ if (!("args" in prep))
2869
+ return prep;
2870
+ const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
2871
+ try {
2872
+ // Session management (before job start for async)
2873
+ let effectiveSessionId = sessionId;
2874
+ let useContinue = continueSession;
2875
+ const activeSession = await sessionManager.getActiveSession("claude");
2876
+ if (!createNewSession && !continueSession && !sessionId && activeSession) {
2877
+ effectiveSessionId = activeSession.id;
2878
+ useContinue = true;
2879
+ }
2880
+ if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
2881
+ useContinue = true;
2882
+ }
2883
+ if (useContinue) {
2884
+ args.push("--continue");
2885
+ }
2886
+ else if (effectiveSessionId) {
2887
+ args.push("--session-id", effectiveSessionId);
2888
+ await sessionManager.updateSessionUsage(effectiveSessionId);
2889
+ }
2890
+ if (effectiveSessionId) {
2891
+ const existingSession = await sessionManager.getSession(effectiveSessionId);
2892
+ if (!existingSession) {
2893
+ await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
2894
+ }
2895
+ }
2896
+ // Idle timeout only for stream-json (text/json produce no output until done)
2897
+ const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
2898
+ const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat, forceRefresh);
2899
+ logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
2900
+ const asyncResponse = {
2901
+ success: true,
2902
+ job,
2903
+ sessionId: effectiveSessionId || activeSession?.id || null,
2904
+ approval: approvalDecision,
2905
+ mcpServers: {
2906
+ requested: requestedMcpServers,
2907
+ enabled: mcpConfig?.enabled,
2908
+ missing: mcpConfig?.missing,
2145
2909
  },
2146
- ],
2147
- isError: true,
2148
- };
2149
- }
2150
- return {
2151
- content: [
2152
- {
2153
- type: "text",
2154
- text: JSON.stringify({
2155
- success: true,
2156
- job,
2157
- }, null, 2),
2158
- },
2159
- ],
2160
- };
2161
- });
2162
- server.tool("llm_job_result", {
2163
- jobId: z.string().describe("Async job ID from *_request_async"),
2164
- maxChars: z
2165
- .number()
2166
- .int()
2167
- .min(1000)
2168
- .max(2000000)
2169
- .default(200000)
2170
- .describe("Max chars returned per stream"),
2171
- }, async ({ jobId, maxChars }) => {
2172
- const result = asyncJobManager.getJobResult(jobId, maxChars);
2173
- if (!result) {
2910
+ };
2911
+ if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
2912
+ asyncResponse.reviewIntegrity = prep.reviewIntegrity;
2913
+ }
2914
+ return {
2915
+ content: [
2916
+ {
2917
+ type: "text",
2918
+ text: JSON.stringify(asyncResponse, null, 2),
2919
+ },
2920
+ ],
2921
+ };
2922
+ }
2923
+ catch (error) {
2924
+ return createErrorResponse("claude_request_async", 1, "", corrId, error);
2925
+ }
2926
+ });
2927
+ server.tool("codex_request_async", {
2928
+ prompt: z
2929
+ .string()
2930
+ .min(1, "Prompt cannot be empty")
2931
+ .max(100000, "Prompt too long (max 100k chars)")
2932
+ .describe("Prompt text for Codex"),
2933
+ model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
2934
+ fullAuto: z
2935
+ .boolean()
2936
+ .default(false)
2937
+ .describe("DEPRECATED: prefer `sandboxMode` + `askForApproval`. Expands to `--sandbox workspace-write --ask-for-approval never`."),
2938
+ sandboxMode: z
2939
+ .enum(CODEX_SANDBOX_MODES)
2940
+ .optional()
2941
+ .describe("Codex --sandbox: read-only|workspace-write|danger-full-access."),
2942
+ askForApproval: z
2943
+ .enum(CODEX_ASK_FOR_APPROVAL_MODES)
2944
+ .optional()
2945
+ .describe("Codex --ask-for-approval: untrusted|on-request|never."),
2946
+ useLegacyFullAutoFlag: z
2947
+ .boolean()
2948
+ .default(false)
2949
+ .describe("Escape hatch: emit `--full-auto` directly (deprecated)."),
2950
+ dangerouslyBypassApprovalsAndSandbox: z
2951
+ .boolean()
2952
+ .default(false)
2953
+ .describe("Run Codex without approvals/sandbox"),
2954
+ approvalStrategy: z
2955
+ .enum(["legacy", "mcp_managed"])
2956
+ .default("legacy")
2957
+ .describe("Approval strategy"),
2958
+ approvalPolicy: z
2959
+ .enum(["strict", "balanced", "permissive"])
2960
+ .optional()
2961
+ .describe("Approval policy override"),
2962
+ mcpServers: z
2963
+ .array(MCP_SERVER_ENUM)
2964
+ .default(["sqry"])
2965
+ .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
2966
+ sessionId: z
2967
+ .string()
2968
+ .optional()
2969
+ .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
2970
+ resumeLatest: z
2971
+ .boolean()
2972
+ .default(false)
2973
+ .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
2974
+ createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
2975
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2976
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2977
+ idleTimeoutMs: z
2978
+ .number()
2979
+ .int()
2980
+ .min(30_000)
2981
+ .max(3_600_000)
2982
+ .optional()
2983
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2984
+ forceRefresh: z
2985
+ .boolean()
2986
+ .default(false)
2987
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2988
+ // U23: emit `--json` to enable JSONL event-stream parsing for token usage.
2989
+ outputFormat: z
2990
+ .enum(["text", "json"])
2991
+ .default("text")
2992
+ .describe("Codex output format. `json` emits --json (JSONL events) for token usage extraction."),
2993
+ // U26: high-impact feature flags. All optional.
2994
+ outputSchema: z
2995
+ .union([z.string(), z.record(z.unknown())])
2996
+ .optional()
2997
+ .describe("Codex --output-schema. Pass a path (string) or an inline JSON Schema object."),
2998
+ search: z.boolean().optional().describe("Emit Codex --search to enable web search."),
2999
+ profile: z.string().optional().describe("Codex --profile <name>."),
3000
+ configOverrides: CODEX_CONFIG_OVERRIDES_SCHEMA.describe("Codex -c key=value overrides. Keys: /^[a-zA-Z0-9._]+$/. Values: no CR/LF."),
3001
+ ephemeral: z.boolean().optional().describe("Codex --ephemeral."),
3002
+ images: z.array(z.string()).optional().describe("Codex -i <path>: image attachments."),
3003
+ ignoreUserConfig: z.boolean().optional().describe("Codex --ignore-user-config."),
3004
+ ignoreRules: z.boolean().optional().describe("Codex --ignore-rules."),
3005
+ }, async ({ prompt, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
3006
+ return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3007
+ prompt,
3008
+ model,
3009
+ fullAuto,
3010
+ sandboxMode,
3011
+ askForApproval,
3012
+ useLegacyFullAutoFlag,
3013
+ dangerouslyBypassApprovalsAndSandbox,
3014
+ approvalStrategy,
3015
+ approvalPolicy,
3016
+ mcpServers,
3017
+ sessionId,
3018
+ resumeLatest,
3019
+ createNewSession,
3020
+ correlationId,
3021
+ optimizePrompt,
3022
+ idleTimeoutMs,
3023
+ forceRefresh,
3024
+ outputFormat,
3025
+ outputSchema,
3026
+ search,
3027
+ profile,
3028
+ configOverrides,
3029
+ ephemeral,
3030
+ images,
3031
+ ignoreUserConfig,
3032
+ ignoreRules,
3033
+ });
3034
+ });
3035
+ server.tool("gemini_request_async", {
3036
+ prompt: z
3037
+ .string()
3038
+ .min(1, "Prompt cannot be empty")
3039
+ .max(100000, "Prompt too long (max 100k chars)")
3040
+ .describe("Prompt text for Gemini"),
3041
+ model: z
3042
+ .string()
3043
+ .optional()
3044
+ .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
3045
+ sessionId: z
3046
+ .string()
3047
+ .optional()
3048
+ .describe("Session ID (user-provided CLI handle for --resume)"),
3049
+ resumeLatest: z.boolean().default(false).describe("Resume latest session"),
3050
+ createNewSession: z.boolean().default(false).describe("Force new session"),
3051
+ approvalMode: z
3052
+ .enum(GEMINI_APPROVAL_MODES)
3053
+ .optional()
3054
+ .describe("Approval: default|auto_edit|yolo|plan"),
3055
+ approvalStrategy: z
3056
+ .enum(["legacy", "mcp_managed"])
3057
+ .default("legacy")
3058
+ .describe("Approval strategy"),
3059
+ approvalPolicy: z
3060
+ .enum(["strict", "balanced", "permissive"])
3061
+ .optional()
3062
+ .describe("Approval policy override"),
3063
+ mcpServers: z
3064
+ .array(MCP_SERVER_ENUM)
3065
+ .default(["sqry"])
3066
+ .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
3067
+ allowedTools: z
3068
+ .array(z.string())
3069
+ .optional()
3070
+ .describe("Allowed tools (['Write','Edit','Bash'])"),
3071
+ includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
3072
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3073
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3074
+ idleTimeoutMs: z
3075
+ .number()
3076
+ .int()
3077
+ .min(30_000)
3078
+ .max(3_600_000)
3079
+ .optional()
3080
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3081
+ forceRefresh: z
3082
+ .boolean()
3083
+ .default(false)
3084
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3085
+ // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
3086
+ // remains text so existing callers see no behavior change.
3087
+ outputFormat: z
3088
+ .enum(["text", "json"])
3089
+ .default("text")
3090
+ .describe("Gemini output format. `json` emits `-o json` so usageMetadata is parsed and reported."),
3091
+ sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
3092
+ policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
3093
+ adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
3094
+ attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
3095
+ }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
3096
+ return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3097
+ prompt,
3098
+ model,
3099
+ sessionId,
3100
+ resumeLatest,
3101
+ createNewSession,
3102
+ approvalMode,
3103
+ approvalStrategy,
3104
+ approvalPolicy,
3105
+ mcpServers,
3106
+ allowedTools,
3107
+ includeDirs,
3108
+ correlationId,
3109
+ optimizePrompt,
3110
+ idleTimeoutMs,
3111
+ forceRefresh,
3112
+ outputFormat,
3113
+ sandbox,
3114
+ policyFiles,
3115
+ adminPolicyFiles,
3116
+ attachments,
3117
+ });
3118
+ });
3119
+ server.tool("grok_request_async", {
3120
+ prompt: z
3121
+ .string()
3122
+ .min(1, "Prompt cannot be empty")
3123
+ .max(100000, "Prompt too long (max 100k chars)")
3124
+ .describe("Prompt text for Grok"),
3125
+ model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
3126
+ outputFormat: z
3127
+ .enum(["plain", "json", "streaming-json"])
3128
+ .optional()
3129
+ .describe("Output format (plain|json|streaming-json). Grok default is plain."),
3130
+ sessionId: z
3131
+ .string()
3132
+ .optional()
3133
+ .describe("Session ID (user-provided CLI handle for --resume)"),
3134
+ resumeLatest: z
3135
+ .boolean()
3136
+ .default(false)
3137
+ .describe("Resume most recent Grok session in cwd (--continue)"),
3138
+ createNewSession: z.boolean().default(false).describe("Force new session"),
3139
+ alwaysApprove: z
3140
+ .boolean()
3141
+ .default(false)
3142
+ .describe("Auto-approve all tool executions (--always-approve)"),
3143
+ permissionMode: z
3144
+ .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
3145
+ .optional()
3146
+ .describe("Grok permission mode"),
3147
+ effort: z
3148
+ .enum(["low", "medium", "high", "xhigh", "max"])
3149
+ .optional()
3150
+ .describe("Grok effort level"),
3151
+ reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
3152
+ approvalStrategy: z
3153
+ .enum(["legacy", "mcp_managed"])
3154
+ .default("legacy")
3155
+ .describe("Approval strategy"),
3156
+ approvalPolicy: z
3157
+ .enum(["strict", "balanced", "permissive"])
3158
+ .optional()
3159
+ .describe("Approval policy override"),
3160
+ mcpServers: z
3161
+ .array(MCP_SERVER_ENUM)
3162
+ .default(["sqry"])
3163
+ .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
3164
+ allowedTools: z
3165
+ .array(z.string())
3166
+ .optional()
3167
+ .describe("Allowed built-in tools (passed as --tools comma list)"),
3168
+ disallowedTools: z
3169
+ .array(z.string())
3170
+ .optional()
3171
+ .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
3172
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3173
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3174
+ idleTimeoutMs: z
3175
+ .number()
3176
+ .int()
3177
+ .min(30_000)
3178
+ .max(3_600_000)
3179
+ .optional()
3180
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3181
+ forceRefresh: z
3182
+ .boolean()
3183
+ .default(false)
3184
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3185
+ }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3186
+ return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3187
+ prompt,
3188
+ model,
3189
+ outputFormat,
3190
+ sessionId,
3191
+ resumeLatest,
3192
+ createNewSession,
3193
+ alwaysApprove,
3194
+ permissionMode,
3195
+ effort,
3196
+ reasoningEffort,
3197
+ approvalStrategy,
3198
+ approvalPolicy,
3199
+ mcpServers,
3200
+ allowedTools,
3201
+ disallowedTools,
3202
+ correlationId,
3203
+ optimizePrompt,
3204
+ idleTimeoutMs,
3205
+ forceRefresh,
3206
+ });
3207
+ });
3208
+ server.tool("mistral_request_async", {
3209
+ prompt: z
3210
+ .string()
3211
+ .min(1, "Prompt cannot be empty")
3212
+ .max(100000, "Prompt too long (max 100k chars)")
3213
+ .describe("Prompt text for Mistral Vibe"),
3214
+ model: z
3215
+ .string()
3216
+ .optional()
3217
+ .describe("Model alias (resolved into VIBE_ACTIVE_MODEL env var — Vibe has no --model flag)"),
3218
+ outputFormat: z
3219
+ .enum(["plain", "json", "stream-json"])
3220
+ .optional()
3221
+ .describe("Output format (plain|json|stream-json). Vibe default is plain."),
3222
+ sessionId: z
3223
+ .string()
3224
+ .optional()
3225
+ .describe("Session ID (user-provided CLI handle for --resume). Requires [session_logging] enabled = true in ~/.vibe/config.toml."),
3226
+ resumeLatest: z
3227
+ .boolean()
3228
+ .default(false)
3229
+ .describe("Resume most recent Vibe session in cwd (--continue)"),
3230
+ createNewSession: z.boolean().default(false).describe("Force new session"),
3231
+ permissionMode: z
3232
+ .enum(MISTRAL_AGENT_MODES)
3233
+ .optional()
3234
+ .describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
3235
+ effort: z
3236
+ .enum(["low", "medium", "high", "xhigh", "max"])
3237
+ .optional()
3238
+ .describe("Vibe effort level"),
3239
+ reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
3240
+ approvalStrategy: z
3241
+ .enum(["legacy", "mcp_managed"])
3242
+ .default("legacy")
3243
+ .describe("Approval strategy"),
3244
+ approvalPolicy: z
3245
+ .enum(["strict", "balanced", "permissive"])
3246
+ .optional()
3247
+ .describe("Approval policy override"),
3248
+ mcpServers: z
3249
+ .array(MCP_SERVER_ENUM)
3250
+ .default(["sqry"])
3251
+ .describe("MCP server names for approval tracking (Vibe manages its own MCP config via `vibe mcp`)"),
3252
+ allowedTools: z
3253
+ .array(z.string())
3254
+ .optional()
3255
+ .describe("Allowlist of built-in tools — each emitted as a separate --enabled-tools <tool> flag"),
3256
+ disallowedTools: z
3257
+ .array(z.string())
3258
+ .optional()
3259
+ .describe("Accepted for caller parity; Vibe has no deny-list flag, so values are ignored (a warning is logged)."),
3260
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3261
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3262
+ idleTimeoutMs: z
3263
+ .number()
3264
+ .int()
3265
+ .min(30_000)
3266
+ .max(3_600_000)
3267
+ .optional()
3268
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3269
+ forceRefresh: z
3270
+ .boolean()
3271
+ .default(false)
3272
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3273
+ }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3274
+ return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3275
+ prompt,
3276
+ model,
3277
+ outputFormat,
3278
+ sessionId,
3279
+ resumeLatest,
3280
+ createNewSession,
3281
+ permissionMode,
3282
+ effort,
3283
+ reasoningEffort,
3284
+ approvalStrategy,
3285
+ approvalPolicy,
3286
+ mcpServers,
3287
+ allowedTools,
3288
+ disallowedTools,
3289
+ correlationId,
3290
+ optimizePrompt,
3291
+ idleTimeoutMs,
3292
+ forceRefresh,
3293
+ });
3294
+ });
3295
+ server.tool("llm_job_status", {
3296
+ jobId: z.string().describe("Async job ID from *_request_async"),
3297
+ }, async ({ jobId }) => {
3298
+ const job = asyncJobManager.getJobSnapshot(jobId);
3299
+ if (!job) {
3300
+ return {
3301
+ content: [
3302
+ {
3303
+ type: "text",
3304
+ text: JSON.stringify({
3305
+ success: false,
3306
+ error: "Job not found",
3307
+ jobId,
3308
+ }, null, 2),
3309
+ },
3310
+ ],
3311
+ isError: true,
3312
+ };
3313
+ }
2174
3314
  return {
2175
3315
  content: [
2176
3316
  {
2177
3317
  type: "text",
2178
3318
  text: JSON.stringify({
2179
- success: false,
2180
- error: "Job not found",
2181
- jobId,
3319
+ success: true,
3320
+ job,
2182
3321
  }, null, 2),
2183
3322
  },
2184
3323
  ],
2185
- isError: true,
2186
3324
  };
2187
- }
2188
- // Parse stream-json output for Claude async jobs
2189
- const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
2190
- let parsed;
2191
- if (outputFormat === "stream-json" && result.stdout) {
2192
- parsed = parseStreamJson(result.stdout);
2193
- }
2194
- return {
2195
- content: [
2196
- {
2197
- type: "text",
2198
- text: JSON.stringify({
2199
- success: true,
2200
- result,
2201
- ...(parsed
2202
- ? {
2203
- parsed: {
2204
- text: parsed.text,
2205
- costUsd: parsed.costUsd,
2206
- usage: parsed.usage,
2207
- model: parsed.model,
2208
- numTurns: parsed.numTurns,
2209
- },
2210
- }
2211
- : {}),
2212
- }, null, 2),
2213
- },
2214
- ],
2215
- };
2216
- });
2217
- server.tool("llm_job_cancel", {
2218
- jobId: z.string().describe("Async job ID from *_request_async"),
2219
- }, async ({ jobId }) => {
2220
- const cancel = asyncJobManager.cancelJob(jobId);
2221
- if (!cancel.canceled) {
3325
+ });
3326
+ server.tool("llm_job_result", {
3327
+ jobId: z.string().describe("Async job ID from *_request_async"),
3328
+ maxChars: z
3329
+ .number()
3330
+ .int()
3331
+ .min(1000)
3332
+ .max(2000000)
3333
+ .default(200000)
3334
+ .describe("Max chars returned per stream"),
3335
+ }, async ({ jobId, maxChars }) => {
3336
+ const result = asyncJobManager.getJobResult(jobId, maxChars);
3337
+ if (!result) {
3338
+ return {
3339
+ content: [
3340
+ {
3341
+ type: "text",
3342
+ text: JSON.stringify({
3343
+ success: false,
3344
+ error: "Job not found",
3345
+ jobId,
3346
+ }, null, 2),
3347
+ },
3348
+ ],
3349
+ isError: true,
3350
+ };
3351
+ }
3352
+ // Parse stream-json output for Claude async jobs
3353
+ const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
3354
+ let parsed;
3355
+ if (outputFormat === "stream-json" && result.stdout) {
3356
+ parsed = parseStreamJson(result.stdout);
3357
+ }
2222
3358
  return {
2223
3359
  content: [
2224
3360
  {
2225
3361
  type: "text",
2226
3362
  text: JSON.stringify({
2227
- success: false,
2228
- jobId,
2229
- reason: cancel.reason || "Unable to cancel",
3363
+ success: true,
3364
+ result,
3365
+ ...(parsed
3366
+ ? {
3367
+ parsed: {
3368
+ text: parsed.text,
3369
+ costUsd: parsed.costUsd,
3370
+ usage: parsed.usage,
3371
+ model: parsed.model,
3372
+ numTurns: parsed.numTurns,
3373
+ },
3374
+ }
3375
+ : {}),
2230
3376
  }, null, 2),
2231
3377
  },
2232
3378
  ],
2233
- isError: true,
2234
3379
  };
2235
- }
2236
- return {
2237
- content: [
2238
- {
2239
- type: "text",
2240
- text: JSON.stringify({
2241
- success: true,
2242
- jobId,
2243
- }, null, 2),
2244
- },
2245
- ],
2246
- };
2247
- });
2248
- server.tool("llm_process_health", {}, async () => {
2249
- const health = asyncJobManager.getJobHealth();
2250
- return {
2251
- content: [
2252
- {
2253
- type: "text",
2254
- text: JSON.stringify({ success: true, ...health }, null, 2),
2255
- },
2256
- ],
2257
- };
2258
- });
2259
- //──────────────────────────────────────────────────────────────────────────────
2260
- // Approval Audit Tools
2261
- //──────────────────────────────────────────────────────────────────────────────
2262
- server.tool("approval_list", {
2263
- limit: z.number().int().min(1).max(500).default(50).describe("Max number of approval records"),
2264
- cli: z.enum(["claude", "codex", "gemini"]).optional().describe("Optional CLI filter"),
2265
- }, async ({ limit, cli }) => {
2266
- const approvals = approvalManager.list(limit, cli);
2267
- return {
2268
- content: [
2269
- {
2270
- type: "text",
2271
- text: JSON.stringify({
2272
- success: true,
2273
- count: approvals.length,
2274
- approvals,
2275
- }, null, 2),
2276
- },
2277
- ],
2278
- };
2279
- });
2280
- //──────────────────────────────────────────────────────────────────────────────
2281
- // List Models Tool
2282
- //──────────────────────────────────────────────────────────────────────────────
2283
- server.tool("list_models", {
2284
- cli: z
2285
- .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini"]).optional())
2286
- .describe("CLI filter (claude|codex|gemini)"),
2287
- }, async ({ cli }) => {
2288
- const cliInfo = getCliInfo();
2289
- const result = cli ? { [cli]: cliInfo[cli] } : cliInfo;
2290
- return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
2291
- });
2292
- server.tool("cli_versions", {
2293
- cli: z
2294
- .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini"]).optional())
2295
- .describe("CLI filter (claude|codex|gemini)"),
2296
- }, async ({ cli }) => {
2297
- const versions = await getCliVersions(cli);
2298
- return { content: [{ type: "text", text: JSON.stringify({ versions }, null, 2) }] };
2299
- });
2300
- server.tool("cli_upgrade", {
2301
- cli: z.enum(["claude", "codex", "gemini"]).describe("CLI to upgrade"),
2302
- target: z
2303
- .string()
2304
- .min(1)
2305
- .default("latest")
2306
- .describe("Package tag/version/target to install (default: latest)"),
2307
- dryRun: z
2308
- .boolean()
2309
- .default(true)
2310
- .describe("When true, return the upgrade plan without running it"),
2311
- timeoutMs: z
2312
- .number()
2313
- .int()
2314
- .min(30_000)
2315
- .max(3_600_000)
2316
- .optional()
2317
- .describe("Upgrade timeout in ms when dryRun=false"),
2318
- }, async ({ cli, target, dryRun, timeoutMs }) => {
2319
- try {
2320
- const result = await runCliUpgrade({ cli, target, dryRun, timeoutMs, logger });
3380
+ });
3381
+ server.tool("llm_job_cancel", {
3382
+ jobId: z.string().describe("Async job ID from *_request_async"),
3383
+ }, async ({ jobId }) => {
3384
+ const cancel = asyncJobManager.cancelJob(jobId);
3385
+ if (!cancel.canceled) {
3386
+ return {
3387
+ content: [
3388
+ {
3389
+ type: "text",
3390
+ text: JSON.stringify({
3391
+ success: false,
3392
+ jobId,
3393
+ reason: cancel.reason || "Unable to cancel",
3394
+ }, null, 2),
3395
+ },
3396
+ ],
3397
+ isError: true,
3398
+ };
3399
+ }
2321
3400
  return {
2322
3401
  content: [
2323
3402
  {
2324
3403
  type: "text",
2325
3404
  text: JSON.stringify({
2326
3405
  success: true,
2327
- ...result,
3406
+ jobId,
2328
3407
  }, null, 2),
2329
3408
  },
2330
3409
  ],
2331
3410
  };
2332
- }
2333
- catch (error) {
2334
- const message = error instanceof Error ? error.message : String(error);
3411
+ });
3412
+ server.tool("llm_process_health", {}, async () => {
3413
+ const health = asyncJobManager.getJobHealth();
2335
3414
  return {
2336
3415
  content: [
2337
3416
  {
2338
3417
  type: "text",
2339
- text: JSON.stringify({
2340
- success: false,
2341
- error: message,
2342
- }, null, 2),
3418
+ text: JSON.stringify({ success: true, ...health }, null, 2),
2343
3419
  },
2344
3420
  ],
2345
- isError: true,
2346
3421
  };
2347
- }
2348
- });
2349
- //──────────────────────────────────────────────────────────────────────────────
2350
- // Session Management Tools
2351
- //──────────────────────────────────────────────────────────────────────────────
2352
- server.tool("session_create", {
2353
- cli: z.enum(["claude", "codex", "gemini"]).describe("CLI type (claude|codex|gemini)"),
2354
- description: z.string().optional().describe("Session description"),
2355
- setAsActive: z.boolean().default(true).describe("Set as active session"),
2356
- }, async ({ cli, description, setAsActive }) => {
2357
- try {
2358
- const session = await sessionManager.createSession(cli, description);
2359
- if (setAsActive) {
2360
- await sessionManager.setActiveSession(cli, session.id);
2361
- }
2362
- logger.info(`Created new ${cli} session: ${session.id}`);
3422
+ });
3423
+ //──────────────────────────────────────────────────────────────────────────────
3424
+ // Approval Audit Tools
3425
+ //──────────────────────────────────────────────────────────────────────────────
3426
+ server.tool("approval_list", {
3427
+ limit: z
3428
+ .number()
3429
+ .int()
3430
+ .min(1)
3431
+ .max(500)
3432
+ .default(50)
3433
+ .describe("Max number of approval records"),
3434
+ cli: z
3435
+ .enum(["claude", "codex", "gemini", "grok", "mistral"])
3436
+ .optional()
3437
+ .describe("Optional CLI filter"),
3438
+ }, async ({ limit, cli }) => {
3439
+ const approvals = approvalManager.list(limit, cli);
2363
3440
  return {
2364
3441
  content: [
2365
3442
  {
2366
3443
  type: "text",
2367
3444
  text: JSON.stringify({
2368
3445
  success: true,
2369
- session: {
2370
- id: session.id,
2371
- cli: session.cli,
2372
- description: session.description,
2373
- createdAt: session.createdAt,
2374
- isActive: setAsActive,
2375
- },
2376
- }, null, 2),
2377
- },
2378
- ],
2379
- };
2380
- }
2381
- catch (error) {
2382
- return createErrorResponse("session_create", 1, "", undefined, error);
2383
- }
2384
- });
2385
- server.tool("session_list", {
2386
- cli: z
2387
- .enum(["claude", "codex", "gemini"])
2388
- .optional()
2389
- .describe("CLI filter (claude|codex|gemini)"),
2390
- }, async ({ cli }) => {
2391
- try {
2392
- const sessions = await sessionManager.listSessions(cli);
2393
- const activeSessions = {
2394
- claude: await sessionManager.getActiveSession("claude"),
2395
- codex: await sessionManager.getActiveSession("codex"),
2396
- gemini: await sessionManager.getActiveSession("gemini"),
2397
- grok: await sessionManager.getActiveSession("grok"),
2398
- };
2399
- const sessionList = sessions.map(s => ({
2400
- id: s.id,
2401
- cli: s.cli,
2402
- description: s.description,
2403
- createdAt: s.createdAt,
2404
- lastUsedAt: s.lastUsedAt,
2405
- isActive: activeSessions[s.cli]?.id === s.id,
2406
- }));
2407
- return {
2408
- content: [
2409
- {
2410
- type: "text",
2411
- text: JSON.stringify({
2412
- total: sessionList.length,
2413
- sessions: sessionList,
2414
- activeSessions: {
2415
- claude: activeSessions.claude?.id || null,
2416
- codex: activeSessions.codex?.id || null,
2417
- gemini: activeSessions.gemini?.id || null,
2418
- grok: activeSessions.grok?.id || null,
2419
- },
3446
+ count: approvals.length,
3447
+ approvals,
2420
3448
  }, null, 2),
2421
3449
  },
2422
3450
  ],
2423
3451
  };
2424
- }
2425
- catch (error) {
2426
- return createErrorResponse("session_list", 1, "", undefined, error);
2427
- }
2428
- });
2429
- server.tool("session_set_active", {
2430
- cli: z.enum(["claude", "codex", "gemini"]).describe("CLI type (claude|codex|gemini)"),
2431
- sessionId: z.string().nullable().describe("Session ID (null to clear)"),
2432
- }, async ({ cli, sessionId }) => {
2433
- try {
2434
- const success = await sessionManager.setActiveSession(cli, sessionId || null);
2435
- if (!success) {
3452
+ });
3453
+ //──────────────────────────────────────────────────────────────────────────────
3454
+ // List Models Tool
3455
+ //──────────────────────────────────────────────────────────────────────────────
3456
+ server.tool("list_models", {
3457
+ cli: z
3458
+ .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
3459
+ .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
3460
+ }, async ({ cli }) => {
3461
+ const cliInfo = getCliInfo();
3462
+ const result = cli ? { [cli]: cliInfo[cli] } : cliInfo;
3463
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
3464
+ });
3465
+ server.tool("cli_versions", {
3466
+ cli: z
3467
+ .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
3468
+ .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
3469
+ }, async ({ cli }) => {
3470
+ const versions = await getCliVersions(cli);
3471
+ return { content: [{ type: "text", text: JSON.stringify({ versions }, null, 2) }] };
3472
+ });
3473
+ server.tool("cli_upgrade", {
3474
+ cli: z.enum(["claude", "codex", "gemini", "grok", "mistral"]).describe("CLI to upgrade"),
3475
+ target: z
3476
+ .string()
3477
+ .min(1)
3478
+ .default("latest")
3479
+ .describe("Package tag/version/target to install (default: latest)"),
3480
+ dryRun: z
3481
+ .boolean()
3482
+ .default(true)
3483
+ .describe("When true, return the upgrade plan without running it"),
3484
+ timeoutMs: z
3485
+ .number()
3486
+ .int()
3487
+ .min(30_000)
3488
+ .max(3_600_000)
3489
+ .optional()
3490
+ .describe("Upgrade timeout in ms when dryRun=false"),
3491
+ }, async ({ cli, target, dryRun, timeoutMs }) => {
3492
+ try {
3493
+ const result = await runCliUpgrade({ cli, target, dryRun, timeoutMs, logger });
2436
3494
  return {
2437
3495
  content: [
2438
3496
  {
2439
3497
  type: "text",
2440
3498
  text: JSON.stringify({
2441
- success: false,
2442
- error: "Session not found or does not belong to the specified CLI",
3499
+ success: true,
3500
+ ...result,
2443
3501
  }, null, 2),
2444
3502
  },
2445
3503
  ],
2446
- isError: true,
2447
3504
  };
2448
3505
  }
2449
- logger.info(`Set active ${cli} session to: ${sessionId}`);
2450
- return {
2451
- content: [
2452
- {
2453
- type: "text",
2454
- text: JSON.stringify({
2455
- success: true,
2456
- cli,
2457
- activeSessionId: sessionId,
2458
- }, null, 2),
2459
- },
2460
- ],
2461
- };
2462
- }
2463
- catch (error) {
2464
- return createErrorResponse("session_set_active", 1, "", undefined, error);
2465
- }
2466
- });
2467
- server.tool("session_delete", {
2468
- sessionId: z.string().describe("Session ID"),
2469
- }, async ({ sessionId }) => {
2470
- try {
2471
- const session = await sessionManager.getSession(sessionId);
2472
- if (!session) {
3506
+ catch (error) {
3507
+ const message = error instanceof Error ? error.message : String(error);
2473
3508
  return {
2474
3509
  content: [
2475
3510
  {
2476
3511
  type: "text",
2477
3512
  text: JSON.stringify({
2478
3513
  success: false,
2479
- error: "Session not found",
3514
+ error: message,
2480
3515
  }, null, 2),
2481
3516
  },
2482
3517
  ],
2483
3518
  isError: true,
2484
3519
  };
2485
3520
  }
2486
- const success = await sessionManager.deleteSession(sessionId);
2487
- logger.info(`Deleted session: ${sessionId}`);
2488
- return {
2489
- content: [
2490
- {
2491
- type: "text",
2492
- text: JSON.stringify({
2493
- success,
2494
- deletedSession: {
2495
- id: session.id,
2496
- cli: session.cli,
2497
- description: session.description,
3521
+ });
3522
+ //──────────────────────────────────────────────────────────────────────────────
3523
+ // Session Management Tools
3524
+ //──────────────────────────────────────────────────────────────────────────────
3525
+ server.tool("session_create", {
3526
+ cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
3527
+ description: z.string().optional().describe("Session description"),
3528
+ setAsActive: z.boolean().default(true).describe("Set as active session"),
3529
+ }, async ({ cli, description, setAsActive }) => {
3530
+ try {
3531
+ const session = await sessionManager.createSession(cli, description);
3532
+ if (setAsActive) {
3533
+ await sessionManager.setActiveSession(cli, session.id);
3534
+ }
3535
+ logger.info(`Created new ${cli} session: ${session.id}`);
3536
+ return {
3537
+ content: [
3538
+ {
3539
+ type: "text",
3540
+ text: JSON.stringify({
3541
+ success: true,
3542
+ session: {
3543
+ id: session.id,
3544
+ cli: session.cli,
3545
+ description: session.description,
3546
+ createdAt: session.createdAt,
3547
+ isActive: setAsActive,
3548
+ },
3549
+ }, null, 2),
3550
+ },
3551
+ ],
3552
+ };
3553
+ }
3554
+ catch (error) {
3555
+ return createErrorResponse("session_create", 1, "", undefined, error);
3556
+ }
3557
+ });
3558
+ server.tool("session_list", {
3559
+ cli: SESSION_PROVIDER_ENUM.optional().describe("CLI filter (claude|codex|gemini|grok|mistral)"),
3560
+ }, async ({ cli }) => {
3561
+ try {
3562
+ const sessions = await sessionManager.listSessions(cli);
3563
+ const activeSessions = {
3564
+ claude: await sessionManager.getActiveSession("claude"),
3565
+ codex: await sessionManager.getActiveSession("codex"),
3566
+ gemini: await sessionManager.getActiveSession("gemini"),
3567
+ grok: await sessionManager.getActiveSession("grok"),
3568
+ mistral: await sessionManager.getActiveSession("mistral"),
3569
+ };
3570
+ const sessionList = sessions.map(s => ({
3571
+ id: s.id,
3572
+ cli: s.cli,
3573
+ description: s.description,
3574
+ createdAt: s.createdAt,
3575
+ lastUsedAt: s.lastUsedAt,
3576
+ isActive: activeSessions[s.cli]?.id === s.id,
3577
+ }));
3578
+ return {
3579
+ content: [
3580
+ {
3581
+ type: "text",
3582
+ text: JSON.stringify({
3583
+ total: sessionList.length,
3584
+ sessions: sessionList,
3585
+ activeSessions: {
3586
+ claude: activeSessions.claude?.id || null,
3587
+ codex: activeSessions.codex?.id || null,
3588
+ gemini: activeSessions.gemini?.id || null,
3589
+ grok: activeSessions.grok?.id || null,
3590
+ mistral: activeSessions.mistral?.id || null,
3591
+ },
3592
+ }, null, 2),
3593
+ },
3594
+ ],
3595
+ };
3596
+ }
3597
+ catch (error) {
3598
+ return createErrorResponse("session_list", 1, "", undefined, error);
3599
+ }
3600
+ });
3601
+ server.tool("session_set_active", {
3602
+ cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
3603
+ sessionId: z.string().nullable().describe("Session ID (null to clear)"),
3604
+ }, async ({ cli, sessionId }) => {
3605
+ try {
3606
+ const success = await sessionManager.setActiveSession(cli, sessionId || null);
3607
+ if (!success) {
3608
+ return {
3609
+ content: [
3610
+ {
3611
+ type: "text",
3612
+ text: JSON.stringify({
3613
+ success: false,
3614
+ error: "Session not found or does not belong to the specified CLI",
3615
+ }, null, 2),
2498
3616
  },
2499
- }, null, 2),
2500
- },
2501
- ],
2502
- };
2503
- }
2504
- catch (error) {
2505
- return createErrorResponse("session_delete", 1, "", undefined, error);
2506
- }
2507
- });
2508
- server.tool("session_get", {
2509
- sessionId: z.string().describe("Session ID"),
2510
- }, async ({ sessionId }) => {
2511
- try {
2512
- const session = await sessionManager.getSession(sessionId);
2513
- if (!session) {
3617
+ ],
3618
+ isError: true,
3619
+ };
3620
+ }
3621
+ logger.info(`Set active ${cli} session to: ${sessionId}`);
2514
3622
  return {
2515
3623
  content: [
2516
3624
  {
2517
3625
  type: "text",
2518
3626
  text: JSON.stringify({
2519
- success: false,
2520
- error: "Session not found",
3627
+ success: true,
3628
+ cli,
3629
+ activeSessionId: sessionId,
2521
3630
  }, null, 2),
2522
3631
  },
2523
3632
  ],
2524
- isError: true,
2525
3633
  };
2526
3634
  }
2527
- const activeSession = await sessionManager.getActiveSession(session.cli);
2528
- return {
2529
- content: [
2530
- {
2531
- type: "text",
2532
- text: JSON.stringify({
2533
- success: true,
2534
- session: {
2535
- ...session,
2536
- isActive: activeSession?.id === session.id,
3635
+ catch (error) {
3636
+ return createErrorResponse("session_set_active", 1, "", undefined, error);
3637
+ }
3638
+ });
3639
+ server.tool("session_delete", {
3640
+ sessionId: z.string().describe("Session ID"),
3641
+ }, async ({ sessionId }) => {
3642
+ try {
3643
+ const session = await sessionManager.getSession(sessionId);
3644
+ if (!session) {
3645
+ return {
3646
+ content: [
3647
+ {
3648
+ type: "text",
3649
+ text: JSON.stringify({
3650
+ success: false,
3651
+ error: "Session not found",
3652
+ }, null, 2),
2537
3653
  },
2538
- }, null, 2),
2539
- },
2540
- ],
2541
- };
2542
- }
2543
- catch (error) {
2544
- return createErrorResponse("session_get", 1, "", undefined, error);
2545
- }
2546
- });
2547
- server.tool("session_clear_all", {
2548
- cli: z
2549
- .enum(["claude", "codex", "gemini"])
2550
- .optional()
2551
- .describe("CLI filter (claude|codex|gemini)"),
2552
- }, async ({ cli }) => {
2553
- try {
2554
- const count = await sessionManager.clearAllSessions(cli);
2555
- logger.info(`Cleared ${count} sessions${cli ? ` for ${cli}` : ""}`);
2556
- return {
2557
- content: [
2558
- {
2559
- type: "text",
2560
- text: JSON.stringify({
2561
- success: true,
2562
- deletedCount: count,
2563
- cli: cli || "all",
2564
- }, null, 2),
2565
- },
2566
- ],
2567
- };
2568
- }
2569
- catch (error) {
2570
- return createErrorResponse("session_clear_all", 1, "", undefined, error);
2571
- }
2572
- });
3654
+ ],
3655
+ isError: true,
3656
+ };
3657
+ }
3658
+ const success = await sessionManager.deleteSession(sessionId);
3659
+ logger.info(`Deleted session: ${sessionId}`);
3660
+ return {
3661
+ content: [
3662
+ {
3663
+ type: "text",
3664
+ text: JSON.stringify({
3665
+ success,
3666
+ deletedSession: {
3667
+ id: session.id,
3668
+ cli: session.cli,
3669
+ description: session.description,
3670
+ },
3671
+ }, null, 2),
3672
+ },
3673
+ ],
3674
+ };
3675
+ }
3676
+ catch (error) {
3677
+ return createErrorResponse("session_delete", 1, "", undefined, error);
3678
+ }
3679
+ });
3680
+ server.tool("session_get", {
3681
+ sessionId: z.string().describe("Session ID"),
3682
+ }, async ({ sessionId }) => {
3683
+ try {
3684
+ const session = await sessionManager.getSession(sessionId);
3685
+ if (!session) {
3686
+ return {
3687
+ content: [
3688
+ {
3689
+ type: "text",
3690
+ text: JSON.stringify({
3691
+ success: false,
3692
+ error: "Session not found",
3693
+ }, null, 2),
3694
+ },
3695
+ ],
3696
+ isError: true,
3697
+ };
3698
+ }
3699
+ const activeSession = await sessionManager.getActiveSession(session.cli);
3700
+ return {
3701
+ content: [
3702
+ {
3703
+ type: "text",
3704
+ text: JSON.stringify({
3705
+ success: true,
3706
+ session: {
3707
+ ...session,
3708
+ isActive: activeSession?.id === session.id,
3709
+ },
3710
+ }, null, 2),
3711
+ },
3712
+ ],
3713
+ };
3714
+ }
3715
+ catch (error) {
3716
+ return createErrorResponse("session_get", 1, "", undefined, error);
3717
+ }
3718
+ });
3719
+ server.tool("session_clear_all", {
3720
+ cli: SESSION_PROVIDER_ENUM.optional().describe("CLI filter (claude|codex|gemini|grok|mistral)"),
3721
+ }, async ({ cli }) => {
3722
+ try {
3723
+ const count = await sessionManager.clearAllSessions(cli);
3724
+ logger.info(`Cleared ${count} sessions${cli ? ` for ${cli}` : ""}`);
3725
+ return {
3726
+ content: [
3727
+ {
3728
+ type: "text",
3729
+ text: JSON.stringify({
3730
+ success: true,
3731
+ deletedCount: count,
3732
+ cli: cli || "all",
3733
+ }, null, 2),
3734
+ },
3735
+ ],
3736
+ };
3737
+ }
3738
+ catch (error) {
3739
+ return createErrorResponse("session_clear_all", 1, "", undefined, error);
3740
+ }
3741
+ });
3742
+ return server;
3743
+ }
2573
3744
  //──────────────────────────────────────────────────────────────────────────────
2574
3745
  // Async Initialization
2575
3746
  //──────────────────────────────────────────────────────────────────────────────
@@ -2592,7 +3763,7 @@ async function initializeSessionManager() {
2592
3763
  //──────────────────────────────────────────────────────────────────────────────
2593
3764
  // Health Check Resource (only if using PostgreSQL)
2594
3765
  //──────────────────────────────────────────────────────────────────────────────
2595
- function registerHealthResource() {
3766
+ function registerHealthResource(server) {
2596
3767
  if (db) {
2597
3768
  server.registerResource("health", "health://status", {
2598
3769
  title: "🏥 Health Status",
@@ -2640,8 +3811,16 @@ async function shutdown(signal) {
2640
3811
  // Kill all active process groups (SIGTERM → wait 3s → SIGKILL)
2641
3812
  await killAllProcessGroups();
2642
3813
  logger.info("All process groups terminated");
2643
- await server.close();
2644
- logger.info("MCP server closed");
3814
+ if (activeHttpGateway) {
3815
+ await activeHttpGateway.close();
3816
+ logger.info("HTTP MCP transport closed");
3817
+ activeHttpGateway = null;
3818
+ }
3819
+ if (activeServer) {
3820
+ await activeServer.close();
3821
+ logger.info("MCP server closed");
3822
+ activeServer = null;
3823
+ }
2645
3824
  if (db) {
2646
3825
  await db.disconnect();
2647
3826
  logger.info("Database connections closed");
@@ -2661,13 +3840,52 @@ process.on("SIGINT", () => shutdown("SIGINT"));
2661
3840
  // Server Startup
2662
3841
  //──────────────────────────────────────────────────────────────────────────────
2663
3842
  async function main() {
2664
- logger.info("Starting llm-cli-gateway MCP server");
3843
+ const args = process.argv.slice(2);
3844
+ if (args[0] === "doctor") {
3845
+ if (args.includes("--json")) {
3846
+ printDoctorJson();
3847
+ return;
3848
+ }
3849
+ process.stderr.write("Only doctor --json is supported in this layer.\n");
3850
+ process.exit(2);
3851
+ }
3852
+ const transportArg = args.find(arg => arg.startsWith("--transport="));
3853
+ const transportMode = transportArg?.split("=")[1] ||
3854
+ process.env.LLM_GATEWAY_TRANSPORT ||
3855
+ process.env.MCP_TRANSPORT ||
3856
+ "stdio";
3857
+ logger.info(`Starting llm-cli-gateway MCP server with ${transportMode} transport`);
2665
3858
  // Initialize session manager first
2666
3859
  await initializeSessionManager();
3860
+ const serverDeps = {
3861
+ sessionManager,
3862
+ resourceProvider,
3863
+ db,
3864
+ performanceMetrics,
3865
+ asyncJobManager,
3866
+ approvalManager,
3867
+ flightRecorder,
3868
+ logger,
3869
+ };
3870
+ if (transportMode === "http") {
3871
+ activeHttpGateway = await startHttpGateway({
3872
+ deps: serverDeps,
3873
+ createGatewayServer,
3874
+ logger,
3875
+ });
3876
+ logger.info(`llm-cli-gateway HTTP MCP server connected and ready at ${activeHttpGateway.url}`);
3877
+ return;
3878
+ }
3879
+ if (transportMode !== "stdio") {
3880
+ throw new Error(`Unsupported transport: ${transportMode}`);
3881
+ }
3882
+ activeServer = createGatewayServer({
3883
+ ...serverDeps,
3884
+ });
2667
3885
  // Register health check resource if using PostgreSQL
2668
- registerHealthResource();
3886
+ registerHealthResource(activeServer);
2669
3887
  const transport = new StdioServerTransport();
2670
- await server.connect(transport);
3888
+ await activeServer.connect(transport);
2671
3889
  logger.info("llm-cli-gateway MCP server connected and ready");
2672
3890
  }
2673
3891
  // Guard: only auto-start when run directly (not imported for testing)