llm-cli-gateway 1.0.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -11,15 +11,18 @@ import { parseStreamJson } from "./stream-json-parser.js";
11
11
  import { createSessionManager } from "./session-manager.js";
12
12
  import { ResourceProvider } from "./resources.js";
13
13
  import { PerformanceMetrics } from "./metrics.js";
14
- import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse as optimizeResponseText } from "./optimizer.js";
14
+ import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse as optimizeResponseText, } from "./optimizer.js";
15
15
  import { loadConfig } from "./config.js";
16
16
  import { checkHealth } from "./health.js";
17
17
  import { getCliInfo, resolveModelAlias } from "./model-registry.js";
18
18
  import { AsyncJobManager } from "./async-job-manager.js";
19
+ import { JobStore, resolveJobStoreDbPath } from "./job-store.js";
19
20
  import { ApprovalManager } from "./approval-manager.js";
20
21
  import { checkReviewIntegrity } from "./review-integrity.js";
21
- import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES } from "./claude-mcp-config.js";
22
- import { resolveSessionResumeArgs, GATEWAY_SESSION_PREFIX } from "./request-helpers.js";
22
+ import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES, } from "./claude-mcp-config.js";
23
+ import { resolveSessionResumeArgs, resolveGrokSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, GATEWAY_SESSION_PREFIX, } from "./request-helpers.js";
24
+ import { createFlightRecorder } from "./flight-recorder.js";
25
+ import { getCliVersions, runCliUpgrade } from "./cli-updater.js";
23
26
  // Simple logger that writes to stderr (stdout is used for MCP protocol)
24
27
  const logger = {
25
28
  info: (message, ...args) => {
@@ -32,7 +35,7 @@ const logger = {
32
35
  if (process.env.DEBUG) {
33
36
  console.error(`[DEBUG] ${new Date().toISOString()} - ${message}`, ...args);
34
37
  }
35
- }
38
+ },
36
39
  };
37
40
  function logOptimizationTokens(kind, correlationId, original, optimized) {
38
41
  const originalTokens = estimateTokens(original);
@@ -87,14 +90,14 @@ const loadedSkills = loadSkills();
87
90
  // system prompt at connection time. Covers key patterns + pointers to L2 resources.
88
91
  const SERVER_INSTRUCTIONS = `llm-cli-gateway: Multi-LLM orchestration via MCP.
89
92
 
90
- Tools: claude_request, codex_request, gemini_request (sync) | *_request_async (async)
93
+ Tools: claude_request, codex_request, gemini_request, grok_request (sync) | *_request_async (async)
91
94
  Jobs: llm_job_status, llm_job_result, llm_job_cancel
92
95
  Sessions: session_create, session_list, session_set_active, session_get, session_delete, session_clear_all
93
- Other: list_models, approval_list, llm_process_health
96
+ Other: list_models, cli_versions, cli_upgrade, approval_list, llm_process_health
94
97
 
95
98
  Key behaviors:
96
99
  - Sync auto-defers at ${SYNC_DEADLINE_MS}ms. Poll deferred jobs via llm_job_status/llm_job_result.
97
- - Sessions: Claude --continue, Gemini --resume (real CLI continuity). Codex bookkeeping only.
100
+ - Sessions: Claude --continue, Gemini --resume, Grok --resume/--continue, Codex \`exec resume <ID>\` / \`exec resume --last\` (all real CLI continuity). For Codex, sessionId must be a real Codex UUID (from ~/.codex/sessions/); gateway-generated gw-* IDs are rejected.
98
101
  - Approval gates: opt-in via approvalStrategy:"mcp_managed".
99
102
  - Idle timeout kills stuck processes (default 10min, configurable via idleTimeoutMs).
100
103
 
@@ -106,9 +109,27 @@ let sessionManager;
106
109
  let db = null;
107
110
  const performanceMetrics = new PerformanceMetrics();
108
111
  let resourceProvider;
112
+ const flightRecorder = createFlightRecorder(logger);
113
+ // Durable job store: persists every async job to ~/.llm-cli-gateway/logs.db so callers
114
+ // can collect results across long polling gaps and gateway restarts, and so repeated
115
+ // identical requests dedup onto the running/completed job instead of starting over.
116
+ const jobStore = (() => {
117
+ const dbPath = resolveJobStoreDbPath();
118
+ if (!dbPath) {
119
+ logger.info("Durable job store disabled (LLM_GATEWAY_LOGS_DB=none)");
120
+ return null;
121
+ }
122
+ try {
123
+ return new JobStore(dbPath, logger);
124
+ }
125
+ catch (err) {
126
+ logger.error("Failed to open durable job store; continuing in-memory only", err);
127
+ return null;
128
+ }
129
+ })();
109
130
  const asyncJobManager = new AsyncJobManager(logger, (cli, durationMs, success) => {
110
131
  performanceMetrics.recordRequest(cli, durationMs, success);
111
- });
132
+ }, jobStore);
112
133
  const approvalManager = new ApprovalManager(undefined, logger);
113
134
  const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
114
135
  // Per-CLI idle timeouts: kill process if no stdout/stderr activity for this duration.
@@ -118,6 +139,7 @@ const CLI_IDLE_TIMEOUTS = {
118
139
  claude: 600_000, // 10 minutes — only used when outputFormat=stream-json
119
140
  codex: 600_000, // 10 minutes — Codex streams stderr progress
120
141
  gemini: 600_000, // 10 minutes — Gemini streams stdout in real-time
142
+ grok: 600_000, // 10 minutes — Grok streams stderr/stdout activity in headless mode
121
143
  };
122
144
  function resolveIdleTimeout(cli, override) {
123
145
  if (override !== undefined)
@@ -129,12 +151,21 @@ const SYNC_POLL_INTERVAL_MS = 1_000;
129
151
  * Start an async job and poll until completion or deadline.
130
152
  * Returns the job result if it finishes in time, or a deferral marker.
131
153
  */
132
- async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat) {
154
+ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh) {
133
155
  if (SYNC_DEADLINE_MS === 0) {
134
- // Disabled — fall through to direct execution
156
+ // Disabled — fall through to direct execution.
157
+ // Note: direct execution bypasses dedup. forceRefresh is implied.
135
158
  return executeCli(cli, args, { idleTimeout: idleTimeoutMs, logger });
136
159
  }
137
- const job = asyncJobManager.startJob(cli, args, corrId, undefined, idleTimeoutMs, outputFormat);
160
+ const outcome = asyncJobManager.startJobWithDedup(cli, args, corrId, {
161
+ idleTimeoutMs,
162
+ outputFormat,
163
+ forceRefresh,
164
+ });
165
+ const job = outcome.snapshot;
166
+ if (outcome.deduped) {
167
+ logger.info(`[${corrId}] sync request deduped onto running job ${job.id} (original corrId=${outcome.originalCorrelationId})`);
168
+ }
138
169
  const deadline = Date.now() + SYNC_DEADLINE_MS;
139
170
  while (Date.now() < deadline) {
140
171
  const snapshot = asyncJobManager.getJobSnapshot(job.id);
@@ -147,7 +178,7 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat) {
147
178
  return {
148
179
  stdout: result.stdout,
149
180
  stderr: result.stderr,
150
- code: result.exitCode ?? 1
181
+ code: result.exitCode ?? 1,
151
182
  };
152
183
  }
153
184
  await new Promise(resolve => setTimeout(resolve, SYNC_POLL_INTERVAL_MS));
@@ -159,7 +190,7 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat) {
159
190
  jobId: job.id,
160
191
  cli,
161
192
  correlationId: corrId,
162
- message: `Execution exceeded sync deadline (${SYNC_DEADLINE_MS}ms). Poll with llm_job_status, fetch with llm_job_result.`
193
+ message: `Execution exceeded sync deadline (${SYNC_DEADLINE_MS}ms). Poll with llm_job_status, fetch with llm_job_result.`,
163
194
  };
164
195
  }
165
196
  function isDeferredResponse(result) {
@@ -167,7 +198,8 @@ function isDeferredResponse(result) {
167
198
  }
168
199
  function buildDeferredToolResponse(deferred, sessionId) {
169
200
  return {
170
- content: [{
201
+ content: [
202
+ {
171
203
  type: "text",
172
204
  text: JSON.stringify({
173
205
  status: "deferred",
@@ -178,9 +210,10 @@ function buildDeferredToolResponse(deferred, sessionId) {
178
210
  sessionId: sessionId || null,
179
211
  pollWith: "llm_job_status",
180
212
  fetchWith: "llm_job_result",
181
- cancelWith: "llm_job_cancel"
182
- }, null, 2)
183
- }]
213
+ cancelWith: "llm_job_cancel",
214
+ }, null, 2),
215
+ },
216
+ ],
184
217
  };
185
218
  }
186
219
  // Helper function for standardized error responses
@@ -211,20 +244,61 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
211
244
  }
212
245
  return {
213
246
  content: [{ type: "text", text: errorMessage }],
214
- isError: true
247
+ isError: true,
248
+ structuredContent: {
249
+ correlationId: correlationId || null,
250
+ cli,
251
+ exitCode: code,
252
+ errorCategory: code === 124
253
+ ? "timeout"
254
+ : code === 125
255
+ ? "idle_timeout"
256
+ : error
257
+ ? "spawn_error"
258
+ : "cli_error",
259
+ },
215
260
  };
216
261
  }
262
+ function extractUsageAndCost(cli, output, outputFormat) {
263
+ if (cli === "claude" && outputFormat === "stream-json") {
264
+ const parsed = parseStreamJson(output);
265
+ return {
266
+ inputTokens: parsed.usage?.inputTokens,
267
+ outputTokens: parsed.usage?.outputTokens,
268
+ costUsd: parsed.costUsd ?? undefined,
269
+ };
270
+ }
271
+ return {};
272
+ }
273
+ function safeFlightStart(entry) {
274
+ try {
275
+ flightRecorder.logStart(entry);
276
+ }
277
+ catch (error) {
278
+ logger.error("Flight recorder logStart failed", error);
279
+ }
280
+ }
281
+ function safeFlightComplete(correlationId, result) {
282
+ try {
283
+ flightRecorder.logComplete(correlationId, result);
284
+ }
285
+ catch (error) {
286
+ logger.error("Flight recorder logComplete failed", error);
287
+ }
288
+ }
217
289
  function createApprovalDeniedResponse(operation, decision) {
218
290
  return {
219
- content: [{
291
+ content: [
292
+ {
220
293
  type: "text",
221
294
  text: JSON.stringify({
222
295
  success: false,
223
296
  error: `${operation} denied by MCP-managed approval policy`,
224
- approval: decision
225
- }, null, 2)
226
- }],
227
- isError: true
297
+ approval: decision,
298
+ }, null, 2),
299
+ },
300
+ ],
301
+ isError: true,
228
302
  };
229
303
  }
230
304
  function normalizeMcpServers(mcpServers) {
@@ -235,7 +309,8 @@ function normalizeMcpServers(mcpServers) {
235
309
  }
236
310
  function createMcpConfigErrorResponse(operation, correlationId, requested, message, missing = []) {
237
311
  return {
238
- content: [{
312
+ content: [
313
+ {
239
314
  type: "text",
240
315
  text: JSON.stringify({
241
316
  success: false,
@@ -244,11 +319,12 @@ function createMcpConfigErrorResponse(operation, correlationId, requested, messa
244
319
  correlationId,
245
320
  mcpServers: {
246
321
  requested,
247
- missing
248
- }
249
- }, null, 2)
250
- }],
251
- isError: true
322
+ missing,
323
+ },
324
+ }, null, 2),
325
+ },
326
+ ],
327
+ isError: true,
252
328
  };
253
329
  }
254
330
  function resolveClaudeMcpConfig(operation, correlationId, requestedMcpServers, strictMcpConfig) {
@@ -260,13 +336,13 @@ function resolveClaudeMcpConfig(operation, correlationId, requestedMcpServers, s
260
336
  const message = error instanceof Error ? error.message : String(error);
261
337
  logger.error(`[${correlationId}] ${operation} failed to build Claude MCP config: ${message}`);
262
338
  return {
263
- errorResponse: createMcpConfigErrorResponse(operation, correlationId, requestedMcpServers, message)
339
+ errorResponse: createMcpConfigErrorResponse(operation, correlationId, requestedMcpServers, message),
264
340
  };
265
341
  }
266
342
  if (strictMcpConfig && mcpConfig.missing.length > 0) {
267
343
  const missing = mcpConfig.missing.join(", ");
268
344
  return {
269
- errorResponse: createMcpConfigErrorResponse(operation, correlationId, requestedMcpServers, `strictMcpConfig=true but requested servers are unavailable: ${missing}`, mcpConfig.missing)
345
+ errorResponse: createMcpConfigErrorResponse(operation, correlationId, requestedMcpServers, `strictMcpConfig=true but requested servers are unavailable: ${missing}`, mcpConfig.missing),
270
346
  };
271
347
  }
272
348
  return { config: mcpConfig };
@@ -279,13 +355,15 @@ for (const skill of loadedSkills) {
279
355
  server.registerResource(`skill-${skill.name}`, `skills://${skill.name}`, {
280
356
  title: skill.name,
281
357
  description: skill.description,
282
- mimeType: "text/markdown"
358
+ mimeType: "text/markdown",
283
359
  }, async () => ({
284
- contents: [{
360
+ contents: [
361
+ {
285
362
  uri: `skills://${skill.name}`,
286
363
  mimeType: "text/markdown",
287
- text: skill.content
288
- }]
364
+ text: skill.content,
365
+ },
366
+ ],
289
367
  }));
290
368
  }
291
369
  logger.info(`Registered ${loadedSkills.length} skill resources`);
@@ -293,7 +371,7 @@ logger.info(`Registered ${loadedSkills.length} skill resources`);
293
371
  server.registerResource("all-sessions", "sessions://all", {
294
372
  title: "📋 All Sessions",
295
373
  description: "All conversation sessions across CLIs",
296
- mimeType: "application/json"
374
+ mimeType: "application/json",
297
375
  }, async (uri) => {
298
376
  logger.debug("Reading all sessions resource");
299
377
  const contents = await resourceProvider.readResource(uri.href);
@@ -303,7 +381,7 @@ server.registerResource("all-sessions", "sessions://all", {
303
381
  server.registerResource("claude-sessions", "sessions://claude", {
304
382
  title: "🤖 Claude Sessions",
305
383
  description: "Claude conversation sessions",
306
- mimeType: "application/json"
384
+ mimeType: "application/json",
307
385
  }, async (uri) => {
308
386
  logger.debug("Reading Claude sessions resource");
309
387
  const contents = await resourceProvider.readResource(uri.href);
@@ -313,7 +391,7 @@ server.registerResource("claude-sessions", "sessions://claude", {
313
391
  server.registerResource("codex-sessions", "sessions://codex", {
314
392
  title: "💻 Codex Sessions",
315
393
  description: "Codex conversation sessions",
316
- mimeType: "application/json"
394
+ mimeType: "application/json",
317
395
  }, async (uri) => {
318
396
  logger.debug("Reading Codex sessions resource");
319
397
  const contents = await resourceProvider.readResource(uri.href);
@@ -323,17 +401,27 @@ server.registerResource("codex-sessions", "sessions://codex", {
323
401
  server.registerResource("gemini-sessions", "sessions://gemini", {
324
402
  title: "✨ Gemini Sessions",
325
403
  description: "Gemini conversation sessions",
326
- mimeType: "application/json"
404
+ mimeType: "application/json",
327
405
  }, async (uri) => {
328
406
  logger.debug("Reading Gemini sessions resource");
329
407
  const contents = await resourceProvider.readResource(uri.href);
330
408
  return { contents: contents ? [contents] : [] };
331
409
  });
410
+ // Register Grok sessions resource
411
+ server.registerResource("grok-sessions", "sessions://grok", {
412
+ title: "⚡ Grok Sessions",
413
+ description: "Grok conversation sessions",
414
+ mimeType: "application/json",
415
+ }, async (uri) => {
416
+ logger.debug("Reading Grok sessions resource");
417
+ const contents = await resourceProvider.readResource(uri.href);
418
+ return { contents: contents ? [contents] : [] };
419
+ });
332
420
  // Register Claude models resource
333
421
  server.registerResource("claude-models", "models://claude", {
334
422
  title: "🧠 Claude Models",
335
423
  description: "Claude models and capabilities",
336
- mimeType: "application/json"
424
+ mimeType: "application/json",
337
425
  }, async (uri) => {
338
426
  logger.debug("Reading Claude models resource");
339
427
  const contents = await resourceProvider.readResource(uri.href);
@@ -343,7 +431,7 @@ server.registerResource("claude-models", "models://claude", {
343
431
  server.registerResource("codex-models", "models://codex", {
344
432
  title: "🔧 Codex Models",
345
433
  description: "Codex models and capabilities",
346
- mimeType: "application/json"
434
+ mimeType: "application/json",
347
435
  }, async (uri) => {
348
436
  logger.debug("Reading Codex models resource");
349
437
  const contents = await resourceProvider.readResource(uri.href);
@@ -353,17 +441,27 @@ server.registerResource("codex-models", "models://codex", {
353
441
  server.registerResource("gemini-models", "models://gemini", {
354
442
  title: "🌟 Gemini Models",
355
443
  description: "Gemini models and capabilities",
356
- mimeType: "application/json"
444
+ mimeType: "application/json",
357
445
  }, async (uri) => {
358
446
  logger.debug("Reading Gemini models resource");
359
447
  const contents = await resourceProvider.readResource(uri.href);
360
448
  return { contents: contents ? [contents] : [] };
361
449
  });
450
+ // Register Grok models resource
451
+ server.registerResource("grok-models", "models://grok", {
452
+ title: "⚡ Grok Models",
453
+ description: "Grok models and capabilities",
454
+ mimeType: "application/json",
455
+ }, async (uri) => {
456
+ logger.debug("Reading Grok models resource");
457
+ const contents = await resourceProvider.readResource(uri.href);
458
+ return { contents: contents ? [contents] : [] };
459
+ });
362
460
  // Register performance metrics resource
363
461
  server.registerResource("performance-metrics", "metrics://performance", {
364
462
  title: "📈 Performance Metrics",
365
463
  description: "Request counts, latency, success/failure rates",
366
- mimeType: "application/json"
464
+ mimeType: "application/json",
367
465
  }, async (uri) => {
368
466
  logger.debug("Reading performance metrics resource");
369
467
  const contents = await resourceProvider.readResource(uri.href);
@@ -374,10 +472,16 @@ function prepareClaudeRequest(params) {
374
472
  const cliInfo = getCliInfo();
375
473
  const resolvedModel = resolveModelAlias("claude", params.model, cliInfo);
376
474
  // Review integrity check on raw prompt (before optimization)
377
- const reviewIntegrity = checkReviewIntegrity({ prompt: params.prompt, allowedTools: params.allowedTools, disallowedTools: params.disallowedTools });
475
+ const reviewIntegrity = checkReviewIntegrity({
476
+ prompt: params.prompt,
477
+ allowedTools: params.allowedTools,
478
+ disallowedTools: params.disallowedTools,
479
+ });
378
480
  if (reviewIntegrity.violations.length > 0) {
379
481
  logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
380
- cli: "claude", operation: params.operation, score: reviewIntegrity.totalScore
482
+ cli: "claude",
483
+ operation: params.operation,
484
+ score: reviewIntegrity.totalScore,
381
485
  });
382
486
  }
383
487
  let effectivePrompt = params.prompt;
@@ -405,7 +509,7 @@ function prepareClaudeRequest(params) {
405
509
  disallowedTools: params.disallowedTools,
406
510
  policy: params.approvalPolicy,
407
511
  metadata: { model: resolvedModel || "default", strictMcpConfig: params.strictMcpConfig },
408
- reviewIntegrity
512
+ reviewIntegrity,
409
513
  });
410
514
  if (approvalDecision.status !== "approved") {
411
515
  return createApprovalDeniedResponse(params.operation, approvalDecision);
@@ -421,9 +525,11 @@ function prepareClaudeRequest(params) {
421
525
  args.push("--output-format", "stream-json", "--include-partial-messages");
422
526
  }
423
527
  if (params.allowedTools && params.allowedTools.length > 0) {
528
+ sanitizeCliArgValues(params.allowedTools, "allowedTools");
424
529
  args.push("--allowed-tools", ...params.allowedTools);
425
530
  }
426
531
  if (params.disallowedTools && params.disallowedTools.length > 0) {
532
+ sanitizeCliArgValues(params.disallowedTools, "disallowedTools");
427
533
  args.push("--disallowed-tools", ...params.disallowedTools);
428
534
  }
429
535
  if (params.approvalStrategy === "mcp_managed") {
@@ -438,7 +544,16 @@ function prepareClaudeRequest(params) {
438
544
  args.push("--strict-mcp-config");
439
545
  }
440
546
  }
441
- return { corrId, effectivePrompt, resolvedModel, requestedMcpServers, mcpConfig, approvalDecision, reviewIntegrity, args };
547
+ return {
548
+ corrId,
549
+ effectivePrompt,
550
+ resolvedModel,
551
+ requestedMcpServers,
552
+ mcpConfig,
553
+ approvalDecision,
554
+ reviewIntegrity,
555
+ args,
556
+ };
442
557
  }
443
558
  function prepareCodexRequest(params) {
444
559
  const corrId = params.correlationId || randomUUID();
@@ -448,7 +563,9 @@ function prepareCodexRequest(params) {
448
563
  const reviewIntegrity = checkReviewIntegrity({ prompt: params.prompt });
449
564
  if (reviewIntegrity.violations.length > 0) {
450
565
  logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
451
- cli: "codex", operation: params.operation, score: reviewIntegrity.totalScore
566
+ cli: "codex",
567
+ operation: params.operation,
568
+ score: reviewIntegrity.totalScore,
452
569
  });
453
570
  }
454
571
  let effectivePrompt = params.prompt;
@@ -469,32 +586,70 @@ function prepareCodexRequest(params) {
469
586
  requestedMcpServers,
470
587
  policy: params.approvalPolicy,
471
588
  metadata: { model: resolvedModel || "default" },
472
- reviewIntegrity
589
+ reviewIntegrity,
473
590
  });
474
591
  if (approvalDecision.status !== "approved") {
475
592
  return createApprovalDeniedResponse(params.operation, approvalDecision);
476
593
  }
477
594
  }
595
+ // Resume mode: codex exec resume <SESSION_ID|--last> [flags] PROMPT
596
+ // Note: `codex exec resume` does NOT accept `--full-auto`; the original
597
+ // session's approval policy is inherited. We silently drop fullAuto on resume.
598
+ let sessionPlan;
599
+ try {
600
+ sessionPlan = resolveCodexSessionArgs({
601
+ sessionId: params.sessionId,
602
+ resumeLatest: params.resumeLatest,
603
+ createNewSession: params.createNewSession,
604
+ });
605
+ }
606
+ catch (err) {
607
+ return createErrorResponse(params.operation, 1, "", corrId, err);
608
+ }
478
609
  const args = ["exec"];
610
+ if (sessionPlan.mode !== "new") {
611
+ args.push("resume");
612
+ if (sessionPlan.mode === "resume-latest") {
613
+ args.push("--last");
614
+ }
615
+ }
479
616
  if (resolvedModel)
480
617
  args.push("--model", resolvedModel);
481
- if (params.fullAuto)
618
+ if (sessionPlan.mode === "new" && params.fullAuto) {
482
619
  args.push("--full-auto");
620
+ }
483
621
  if (params.dangerouslyBypassApprovalsAndSandbox) {
484
622
  args.push("--dangerously-bypass-approvals-and-sandbox");
485
623
  }
486
- args.push("--skip-git-repo-check", effectivePrompt);
487
- return { corrId, effectivePrompt, resolvedModel, requestedMcpServers, approvalDecision, reviewIntegrity, args };
624
+ args.push("--skip-git-repo-check");
625
+ if (sessionPlan.mode === "resume-by-id" && sessionPlan.sessionId) {
626
+ args.push(sessionPlan.sessionId);
627
+ }
628
+ args.push(effectivePrompt);
629
+ return {
630
+ corrId,
631
+ effectivePrompt,
632
+ resolvedModel,
633
+ requestedMcpServers,
634
+ approvalDecision,
635
+ reviewIntegrity,
636
+ args,
637
+ };
488
638
  }
489
639
  function prepareGeminiRequest(params) {
490
640
  const corrId = params.correlationId || randomUUID();
491
641
  const cliInfo = getCliInfo();
492
642
  const resolvedModel = resolveModelAlias("gemini", params.model, cliInfo);
493
643
  // Review integrity check on raw prompt (before optimization)
494
- const reviewIntegrity = checkReviewIntegrity({ prompt: params.prompt, allowedTools: params.allowedTools });
644
+ const reviewIntegrity = checkReviewIntegrity({
645
+ prompt: params.prompt,
646
+ allowedTools: params.allowedTools,
647
+ });
495
648
  if (reviewIntegrity.violations.length > 0) {
496
649
  logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
497
- cli: "gemini", operation: params.operation, score: reviewIntegrity.totalScore
650
+ cli: "gemini",
651
+ operation: params.operation,
652
+ score: reviewIntegrity.totalScore,
498
653
  });
499
654
  }
500
655
  let effectivePrompt = params.prompt;
@@ -516,7 +671,7 @@ function prepareGeminiRequest(params) {
516
671
  allowedTools: params.allowedTools,
517
672
  policy: params.approvalPolicy,
518
673
  metadata: { model: resolvedModel || "default" },
519
- reviewIntegrity
674
+ reviewIntegrity,
520
675
  });
521
676
  if (approvalDecision.status !== "approved") {
522
677
  return createApprovalDeniedResponse(params.operation, approvalDecision);
@@ -529,17 +684,103 @@ function prepareGeminiRequest(params) {
529
684
  if (effectiveApprovalMode)
530
685
  args.push("--approval-mode", effectiveApprovalMode);
531
686
  if (params.allowedTools && params.allowedTools.length > 0) {
687
+ sanitizeCliArgValues(params.allowedTools, "allowedTools");
532
688
  params.allowedTools.forEach(tool => args.push("--allowed-tools", tool));
533
689
  }
534
690
  if (requestedMcpServers.length > 0) {
691
+ sanitizeCliArgValues(requestedMcpServers, "mcpServers");
535
692
  requestedMcpServers.forEach(serverName => args.push("--allowed-mcp-server-names", serverName));
536
693
  }
537
694
  if (params.includeDirs && params.includeDirs.length > 0) {
695
+ sanitizeCliArgValues(params.includeDirs, "includeDirs");
538
696
  params.includeDirs.forEach(dir => args.push("--include-directories", dir));
539
697
  }
540
- return { corrId, effectivePrompt, resolvedModel, requestedMcpServers, approvalDecision, reviewIntegrity, args };
698
+ return {
699
+ corrId,
700
+ effectivePrompt,
701
+ resolvedModel,
702
+ requestedMcpServers,
703
+ approvalDecision,
704
+ reviewIntegrity,
705
+ args,
706
+ };
707
+ }
708
+ function prepareGrokRequest(params) {
709
+ const corrId = params.correlationId || randomUUID();
710
+ const cliInfo = getCliInfo();
711
+ const resolvedModel = resolveModelAlias("grok", params.model, cliInfo);
712
+ // Review integrity check on raw prompt (before optimization)
713
+ const reviewIntegrity = checkReviewIntegrity({
714
+ prompt: params.prompt,
715
+ allowedTools: params.allowedTools,
716
+ disallowedTools: params.disallowedTools,
717
+ });
718
+ if (reviewIntegrity.violations.length > 0) {
719
+ logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
720
+ cli: "grok",
721
+ operation: params.operation,
722
+ score: reviewIntegrity.totalScore,
723
+ });
724
+ }
725
+ let effectivePrompt = params.prompt;
726
+ if (params.optimizePrompt) {
727
+ const optimized = optimizePromptText(effectivePrompt);
728
+ logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
729
+ effectivePrompt = optimized;
730
+ }
731
+ const requestedMcpServers = normalizeMcpServers(params.mcpServers);
732
+ let approvalDecision = null;
733
+ if (params.approvalStrategy === "mcp_managed") {
734
+ approvalDecision = approvalManager.decide({
735
+ cli: "grok",
736
+ operation: params.operation,
737
+ prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
738
+ bypassRequested: Boolean(params.alwaysApprove) || params.permissionMode === "bypassPermissions",
739
+ fullAuto: false,
740
+ requestedMcpServers,
741
+ allowedTools: params.allowedTools,
742
+ disallowedTools: params.disallowedTools,
743
+ policy: params.approvalPolicy,
744
+ metadata: { model: resolvedModel || "default" },
745
+ reviewIntegrity,
746
+ });
747
+ if (approvalDecision.status !== "approved") {
748
+ return createApprovalDeniedResponse(params.operation, approvalDecision);
749
+ }
750
+ }
751
+ const effectiveAlwaysApprove = params.approvalStrategy === "mcp_managed" ? true : Boolean(params.alwaysApprove);
752
+ const args = ["-p", effectivePrompt];
753
+ if (resolvedModel)
754
+ args.push("--model", resolvedModel);
755
+ if (params.outputFormat)
756
+ args.push("--output-format", params.outputFormat);
757
+ if (effectiveAlwaysApprove) {
758
+ args.push("--always-approve");
759
+ }
760
+ else if (params.permissionMode) {
761
+ args.push("--permission-mode", params.permissionMode);
762
+ }
763
+ if (params.effort)
764
+ args.push("--effort", params.effort);
765
+ if (params.reasoningEffort)
766
+ args.push("--reasoning-effort", params.reasoningEffort);
767
+ if (params.allowedTools && params.allowedTools.length > 0) {
768
+ args.push("--tools", params.allowedTools.join(","));
769
+ }
770
+ if (params.disallowedTools && params.disallowedTools.length > 0) {
771
+ args.push("--disallowed-tools", params.disallowedTools.join(","));
772
+ }
773
+ return {
774
+ corrId,
775
+ effectivePrompt,
776
+ resolvedModel,
777
+ requestedMcpServers,
778
+ approvalDecision,
779
+ reviewIntegrity,
780
+ args,
781
+ };
541
782
  }
542
- function buildCliResponse(stdout, optimizeResponse, corrId, sessionId, prep, resumable, outputFormat) {
783
+ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat) {
543
784
  let finalStdout = stdout;
544
785
  // Skip response optimization for JSON output to prevent corrupting structured data
545
786
  if (optimizeResponse && outputFormat !== "json") {
@@ -548,7 +789,9 @@ function buildCliResponse(stdout, optimizeResponse, corrId, sessionId, prep, res
548
789
  finalStdout = optimized;
549
790
  }
550
791
  // Append review integrity warnings to response text (skip for JSON output to avoid corruption)
551
- if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0 && outputFormat !== "json") {
792
+ if (prep.reviewIntegrity &&
793
+ prep.reviewIntegrity.violations.length > 0 &&
794
+ outputFormat !== "json") {
552
795
  const warnings = prep.reviewIntegrity.violations
553
796
  .map(v => `- [${v.type}] ${v.detail}`)
554
797
  .join("\n");
@@ -556,9 +799,23 @@ function buildCliResponse(stdout, optimizeResponse, corrId, sessionId, prep, res
556
799
  }
557
800
  const response = {
558
801
  content: [{ type: "text", text: finalStdout }],
802
+ structuredContent: {
803
+ model: prep.resolvedModel || "default",
804
+ cli,
805
+ correlationId: corrId,
806
+ sessionId: sessionId || null,
807
+ durationMs,
808
+ ...extractUsageAndCost(cli, stdout, outputFormat),
809
+ exitCode: 0,
810
+ retryCount: 0,
811
+ },
559
812
  mcpServers: prep.mcpConfig
560
- ? { requested: prep.requestedMcpServers, enabled: prep.mcpConfig.enabled, missing: prep.mcpConfig.missing }
561
- : { requested: prep.requestedMcpServers }
813
+ ? {
814
+ requested: prep.requestedMcpServers,
815
+ enabled: prep.mcpConfig.enabled,
816
+ missing: prep.mcpConfig.missing,
817
+ }
818
+ : { requested: prep.requestedMcpServers },
562
819
  };
563
820
  if (sessionId) {
564
821
  response.sessionId = sessionId;
@@ -577,25 +834,40 @@ function buildCliResponse(stdout, optimizeResponse, corrId, sessionId, prep, res
577
834
  export async function handleGeminiRequest(deps, params) {
578
835
  const startTime = Date.now();
579
836
  const prep = prepareGeminiRequest({
580
- prompt: params.prompt, model: params.model, approvalMode: params.approvalMode,
581
- approvalStrategy: params.approvalStrategy, approvalPolicy: params.approvalPolicy,
582
- allowedTools: params.allowedTools, includeDirs: params.includeDirs,
583
- mcpServers: params.mcpServers, correlationId: params.correlationId,
584
- optimizePrompt: params.optimizePrompt, operation: "gemini_request"
837
+ prompt: params.prompt,
838
+ model: params.model,
839
+ approvalMode: params.approvalMode,
840
+ approvalStrategy: params.approvalStrategy,
841
+ approvalPolicy: params.approvalPolicy,
842
+ allowedTools: params.allowedTools,
843
+ includeDirs: params.includeDirs,
844
+ mcpServers: params.mcpServers,
845
+ correlationId: params.correlationId,
846
+ optimizePrompt: params.optimizePrompt,
847
+ operation: "gemini_request",
585
848
  });
586
849
  if (!("args" in prep))
587
850
  return prep;
588
851
  const { corrId, args } = prep;
589
852
  let durationMs = 0;
590
853
  let wasSuccessful = false;
854
+ safeFlightStart({
855
+ correlationId: corrId,
856
+ cli: "gemini",
857
+ model: prep.resolvedModel || "default",
858
+ prompt: params.prompt,
859
+ sessionId: params.sessionId,
860
+ });
591
861
  deps.logger.info(`[${corrId}] gemini_request invoked with model=${prep.resolvedModel || "default"}, approvalMode=${params.approvalMode}, prompt length=${params.prompt.length}`);
592
862
  try {
593
863
  // Session arg planning (pure, no I/O)
594
864
  const sessionResult = resolveSessionResumeArgs({
595
- sessionId: params.sessionId, resumeLatest: params.resumeLatest, createNewSession: params.createNewSession
865
+ sessionId: params.sessionId,
866
+ resumeLatest: params.resumeLatest,
867
+ createNewSession: params.createNewSession,
596
868
  });
597
869
  args.push(...sessionResult.resumeArgs);
598
- const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs));
870
+ const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), undefined, params.forceRefresh);
599
871
  // Deferred — job still running, return async reference
600
872
  if (isDeferredResponse(result)) {
601
873
  return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
@@ -604,6 +876,16 @@ export async function handleGeminiRequest(deps, params) {
604
876
  durationMs = Math.max(0, Date.now() - startTime);
605
877
  if (code !== 0) {
606
878
  deps.logger.info(`[${corrId}] gemini_request failed in ${durationMs}ms`);
879
+ safeFlightComplete(corrId, {
880
+ response: stderr || "",
881
+ durationMs,
882
+ retryCount: 0,
883
+ circuitBreakerState: "closed",
884
+ optimizationApplied: false,
885
+ exitCode: code,
886
+ errorMessage: stderr || `Exit code ${code}`,
887
+ status: "failed",
888
+ });
607
889
  return createErrorResponse("gemini", code, stderr, corrId);
608
890
  }
609
891
  wasSuccessful = true;
@@ -628,11 +910,32 @@ export async function handleGeminiRequest(deps, params) {
628
910
  effectiveSessionId = newSession.id;
629
911
  }
630
912
  deps.logger.info(`[${corrId}] gemini_request completed successfully in ${durationMs}ms`);
631
- return buildCliResponse(stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, sessionResult.userProvidedSession);
913
+ const response = buildCliResponse("gemini", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession);
914
+ safeFlightComplete(corrId, {
915
+ response: stdout,
916
+ durationMs,
917
+ retryCount: 0,
918
+ circuitBreakerState: "closed",
919
+ approvalDecision: prep.approvalDecision?.status,
920
+ optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
921
+ exitCode: 0,
922
+ status: "completed",
923
+ });
924
+ return response;
632
925
  }
633
926
  catch (error) {
634
927
  const elapsedMs = Math.max(0, Date.now() - startTime);
635
928
  deps.logger.info(`[${corrId}] gemini_request threw exception after ${elapsedMs}ms`);
929
+ safeFlightComplete(corrId, {
930
+ response: "",
931
+ durationMs: elapsedMs,
932
+ retryCount: 0,
933
+ circuitBreakerState: "closed",
934
+ optimizationApplied: false,
935
+ exitCode: 1,
936
+ errorMessage: error.message,
937
+ status: "failed",
938
+ });
636
939
  return createErrorResponse("gemini", 1, "", corrId, error);
637
940
  }
638
941
  finally {
@@ -642,11 +945,17 @@ export async function handleGeminiRequest(deps, params) {
642
945
  }
643
946
  export async function handleGeminiRequestAsync(deps, params) {
644
947
  const prep = prepareGeminiRequest({
645
- prompt: params.prompt, model: params.model, approvalMode: params.approvalMode,
646
- approvalStrategy: params.approvalStrategy, approvalPolicy: params.approvalPolicy,
647
- allowedTools: params.allowedTools, includeDirs: params.includeDirs,
648
- mcpServers: params.mcpServers, correlationId: params.correlationId,
649
- optimizePrompt: params.optimizePrompt, operation: "gemini_request_async"
948
+ prompt: params.prompt,
949
+ model: params.model,
950
+ approvalMode: params.approvalMode,
951
+ approvalStrategy: params.approvalStrategy,
952
+ approvalPolicy: params.approvalPolicy,
953
+ allowedTools: params.allowedTools,
954
+ includeDirs: params.includeDirs,
955
+ mcpServers: params.mcpServers,
956
+ correlationId: params.correlationId,
957
+ optimizePrompt: params.optimizePrompt,
958
+ operation: "gemini_request_async",
650
959
  });
651
960
  if (!("args" in prep))
652
961
  return prep;
@@ -654,7 +963,9 @@ export async function handleGeminiRequestAsync(deps, params) {
654
963
  try {
655
964
  // Session arg planning (pure, no I/O)
656
965
  const sessionResult = resolveSessionResumeArgs({
657
- sessionId: params.sessionId, resumeLatest: params.resumeLatest, createNewSession: params.createNewSession
966
+ sessionId: params.sessionId,
967
+ resumeLatest: params.resumeLatest,
968
+ createNewSession: params.createNewSession,
658
969
  });
659
970
  args.push(...sessionResult.resumeArgs);
660
971
  // Pre-start session I/O (async handlers: prevent orphaned jobs)
@@ -678,7 +989,7 @@ export async function handleGeminiRequestAsync(deps, params) {
678
989
  effectiveSessionId = newSession.id;
679
990
  }
680
991
  // Start job only after all session I/O succeeds
681
- const job = deps.asyncJobManager.startJob("gemini", args, corrId, undefined, resolveIdleTimeout("gemini", params.idleTimeoutMs));
992
+ const job = deps.asyncJobManager.startJob("gemini", args, corrId, undefined, resolveIdleTimeout("gemini", params.idleTimeoutMs), undefined, params.forceRefresh);
682
993
  deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
683
994
  const asyncResponse = {
684
995
  success: true,
@@ -686,30 +997,231 @@ export async function handleGeminiRequestAsync(deps, params) {
686
997
  sessionId: effectiveSessionId || null,
687
998
  resumable: sessionResult.userProvidedSession,
688
999
  approval: approvalDecision,
689
- mcpServers: { requested: requestedMcpServers }
1000
+ mcpServers: { requested: requestedMcpServers },
690
1001
  };
691
1002
  if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
692
1003
  asyncResponse.reviewIntegrity = prep.reviewIntegrity;
693
1004
  }
694
1005
  return {
695
- content: [{
1006
+ content: [
1007
+ {
696
1008
  type: "text",
697
- text: JSON.stringify(asyncResponse, null, 2)
698
- }]
1009
+ text: JSON.stringify(asyncResponse, null, 2),
1010
+ },
1011
+ ],
699
1012
  };
700
1013
  }
701
1014
  catch (error) {
702
1015
  return createErrorResponse("gemini_request_async", 1, "", corrId, error);
703
1016
  }
704
1017
  }
1018
+ export async function handleGrokRequest(deps, params) {
1019
+ const startTime = Date.now();
1020
+ const prep = prepareGrokRequest({
1021
+ prompt: params.prompt,
1022
+ model: params.model,
1023
+ outputFormat: params.outputFormat,
1024
+ alwaysApprove: params.alwaysApprove,
1025
+ permissionMode: params.permissionMode,
1026
+ effort: params.effort,
1027
+ reasoningEffort: params.reasoningEffort,
1028
+ allowedTools: params.allowedTools,
1029
+ disallowedTools: params.disallowedTools,
1030
+ approvalStrategy: params.approvalStrategy,
1031
+ approvalPolicy: params.approvalPolicy,
1032
+ mcpServers: params.mcpServers,
1033
+ correlationId: params.correlationId,
1034
+ optimizePrompt: params.optimizePrompt,
1035
+ operation: "grok_request",
1036
+ });
1037
+ if (!("args" in prep))
1038
+ return prep;
1039
+ const { corrId, args } = prep;
1040
+ let durationMs = 0;
1041
+ let wasSuccessful = false;
1042
+ safeFlightStart({
1043
+ correlationId: corrId,
1044
+ cli: "grok",
1045
+ model: prep.resolvedModel || "default",
1046
+ prompt: params.prompt,
1047
+ sessionId: params.sessionId,
1048
+ });
1049
+ deps.logger.info(`[${corrId}] grok_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode}, prompt length=${params.prompt.length}`);
1050
+ try {
1051
+ // Session arg planning (pure, no I/O)
1052
+ const sessionResult = resolveGrokSessionArgs({
1053
+ sessionId: params.sessionId,
1054
+ resumeLatest: params.resumeLatest,
1055
+ createNewSession: params.createNewSession,
1056
+ });
1057
+ args.push(...sessionResult.resumeArgs);
1058
+ const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh);
1059
+ // Deferred — job still running, return async reference
1060
+ if (isDeferredResponse(result)) {
1061
+ return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
1062
+ }
1063
+ const { stdout, stderr, code } = result;
1064
+ durationMs = Math.max(0, Date.now() - startTime);
1065
+ if (code !== 0) {
1066
+ deps.logger.info(`[${corrId}] grok_request failed in ${durationMs}ms`);
1067
+ safeFlightComplete(corrId, {
1068
+ response: stderr || "",
1069
+ durationMs,
1070
+ retryCount: 0,
1071
+ circuitBreakerState: "closed",
1072
+ optimizationApplied: false,
1073
+ exitCode: code,
1074
+ errorMessage: stderr || `Exit code ${code}`,
1075
+ status: "failed",
1076
+ });
1077
+ return createErrorResponse("grok", code, stderr, corrId);
1078
+ }
1079
+ wasSuccessful = true;
1080
+ // Post-success session I/O (sync handlers: no phantom sessions on CLI failure)
1081
+ let effectiveSessionId = sessionResult.effectiveSessionId;
1082
+ if (sessionResult.userProvidedSession && effectiveSessionId) {
1083
+ const existing = await deps.sessionManager.getSession(effectiveSessionId);
1084
+ if (!existing) {
1085
+ try {
1086
+ await deps.sessionManager.createSession("grok", "Grok Session", effectiveSessionId);
1087
+ }
1088
+ catch {
1089
+ const rechecked = await deps.sessionManager.getSession(effectiveSessionId);
1090
+ if (!rechecked)
1091
+ throw new Error(`Failed to create or find session ${effectiveSessionId}`);
1092
+ }
1093
+ }
1094
+ await deps.sessionManager.updateSessionUsage(effectiveSessionId);
1095
+ }
1096
+ else if (!params.createNewSession && !effectiveSessionId) {
1097
+ const newSession = await deps.sessionManager.createSession("grok", "Grok Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
1098
+ effectiveSessionId = newSession.id;
1099
+ }
1100
+ deps.logger.info(`[${corrId}] grok_request completed successfully in ${durationMs}ms`);
1101
+ const response = buildCliResponse("grok", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession, params.outputFormat);
1102
+ safeFlightComplete(corrId, {
1103
+ response: stdout,
1104
+ durationMs,
1105
+ retryCount: 0,
1106
+ circuitBreakerState: "closed",
1107
+ approvalDecision: prep.approvalDecision?.status,
1108
+ optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
1109
+ exitCode: 0,
1110
+ status: "completed",
1111
+ });
1112
+ return response;
1113
+ }
1114
+ catch (error) {
1115
+ const elapsedMs = Math.max(0, Date.now() - startTime);
1116
+ deps.logger.info(`[${corrId}] grok_request threw exception after ${elapsedMs}ms`);
1117
+ safeFlightComplete(corrId, {
1118
+ response: "",
1119
+ durationMs: elapsedMs,
1120
+ retryCount: 0,
1121
+ circuitBreakerState: "closed",
1122
+ optimizationApplied: false,
1123
+ exitCode: 1,
1124
+ errorMessage: error.message,
1125
+ status: "failed",
1126
+ });
1127
+ return createErrorResponse("grok", 1, "", corrId, error);
1128
+ }
1129
+ finally {
1130
+ const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
1131
+ performanceMetrics.recordRequest("grok", finalizedDurationMs, wasSuccessful);
1132
+ }
1133
+ }
1134
+ export async function handleGrokRequestAsync(deps, params) {
1135
+ const prep = prepareGrokRequest({
1136
+ prompt: params.prompt,
1137
+ model: params.model,
1138
+ outputFormat: params.outputFormat,
1139
+ alwaysApprove: params.alwaysApprove,
1140
+ permissionMode: params.permissionMode,
1141
+ effort: params.effort,
1142
+ reasoningEffort: params.reasoningEffort,
1143
+ allowedTools: params.allowedTools,
1144
+ disallowedTools: params.disallowedTools,
1145
+ approvalStrategy: params.approvalStrategy,
1146
+ approvalPolicy: params.approvalPolicy,
1147
+ mcpServers: params.mcpServers,
1148
+ correlationId: params.correlationId,
1149
+ optimizePrompt: params.optimizePrompt,
1150
+ operation: "grok_request_async",
1151
+ });
1152
+ if (!("args" in prep))
1153
+ return prep;
1154
+ const { corrId, args, requestedMcpServers, approvalDecision } = prep;
1155
+ try {
1156
+ // Session arg planning (pure, no I/O)
1157
+ const sessionResult = resolveGrokSessionArgs({
1158
+ sessionId: params.sessionId,
1159
+ resumeLatest: params.resumeLatest,
1160
+ createNewSession: params.createNewSession,
1161
+ });
1162
+ args.push(...sessionResult.resumeArgs);
1163
+ // Pre-start session I/O (async handlers: prevent orphaned jobs)
1164
+ let effectiveSessionId = sessionResult.effectiveSessionId;
1165
+ if (sessionResult.userProvidedSession && effectiveSessionId) {
1166
+ const existing = await deps.sessionManager.getSession(effectiveSessionId);
1167
+ if (!existing) {
1168
+ try {
1169
+ await deps.sessionManager.createSession("grok", "Grok Session", effectiveSessionId);
1170
+ }
1171
+ catch {
1172
+ const rechecked = await deps.sessionManager.getSession(effectiveSessionId);
1173
+ if (!rechecked)
1174
+ throw new Error(`Failed to create or find session ${effectiveSessionId}`);
1175
+ }
1176
+ }
1177
+ await deps.sessionManager.updateSessionUsage(effectiveSessionId);
1178
+ }
1179
+ else if (!params.createNewSession && !effectiveSessionId) {
1180
+ const newSession = await deps.sessionManager.createSession("grok", "Grok Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
1181
+ effectiveSessionId = newSession.id;
1182
+ }
1183
+ // Start job only after all session I/O succeeds
1184
+ const job = deps.asyncJobManager.startJob("grok", args, corrId, undefined, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh);
1185
+ deps.logger.info(`[${corrId}] grok_request_async started job ${job.id}`);
1186
+ const asyncResponse = {
1187
+ success: true,
1188
+ job,
1189
+ sessionId: effectiveSessionId || null,
1190
+ resumable: sessionResult.userProvidedSession,
1191
+ approval: approvalDecision,
1192
+ mcpServers: { requested: requestedMcpServers },
1193
+ };
1194
+ if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
1195
+ asyncResponse.reviewIntegrity = prep.reviewIntegrity;
1196
+ }
1197
+ return {
1198
+ content: [
1199
+ {
1200
+ type: "text",
1201
+ text: JSON.stringify(asyncResponse, null, 2),
1202
+ },
1203
+ ],
1204
+ };
1205
+ }
1206
+ catch (error) {
1207
+ return createErrorResponse("grok_request_async", 1, "", corrId, error);
1208
+ }
1209
+ }
705
1210
  export async function handleCodexRequestAsync(deps, params) {
706
1211
  const prep = prepareCodexRequest({
707
- prompt: params.prompt, model: params.model, fullAuto: params.fullAuto,
1212
+ prompt: params.prompt,
1213
+ model: params.model,
1214
+ fullAuto: params.fullAuto,
708
1215
  dangerouslyBypassApprovalsAndSandbox: params.dangerouslyBypassApprovalsAndSandbox,
709
- approvalStrategy: params.approvalStrategy, approvalPolicy: params.approvalPolicy,
1216
+ approvalStrategy: params.approvalStrategy,
1217
+ approvalPolicy: params.approvalPolicy,
710
1218
  mcpServers: params.mcpServers,
711
- correlationId: params.correlationId, optimizePrompt: params.optimizePrompt,
712
- operation: "codex_request_async"
1219
+ sessionId: params.sessionId,
1220
+ resumeLatest: params.resumeLatest,
1221
+ createNewSession: params.createNewSession,
1222
+ correlationId: params.correlationId,
1223
+ optimizePrompt: params.optimizePrompt,
1224
+ operation: "codex_request_async",
713
1225
  });
714
1226
  if (!("args" in prep))
715
1227
  return prep;
@@ -735,23 +1247,25 @@ export async function handleCodexRequestAsync(deps, params) {
735
1247
  effectiveSessionId = newSession.id;
736
1248
  }
737
1249
  // Start job only after all session I/O succeeds
738
- const job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs));
1250
+ const job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs), undefined, params.forceRefresh);
739
1251
  deps.logger.info(`[${corrId}] codex_request_async started job ${job.id}`);
740
1252
  const asyncResponse = {
741
1253
  success: true,
742
1254
  job,
743
1255
  sessionId: effectiveSessionId || null,
744
1256
  approval: approvalDecision,
745
- mcpServers: { requested: requestedMcpServers }
1257
+ mcpServers: { requested: requestedMcpServers },
746
1258
  };
747
1259
  if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
748
1260
  asyncResponse.reviewIntegrity = prep.reviewIntegrity;
749
1261
  }
750
1262
  return {
751
- content: [{
1263
+ content: [
1264
+ {
752
1265
  type: "text",
753
- text: JSON.stringify(asyncResponse, null, 2)
754
- }]
1266
+ text: JSON.stringify(asyncResponse, null, 2),
1267
+ },
1268
+ ],
755
1269
  };
756
1270
  }
757
1271
  catch (error) {
@@ -762,35 +1276,90 @@ export async function handleCodexRequestAsync(deps, params) {
762
1276
  // Claude Code Tool
763
1277
  //──────────────────────────────────────────────────────────────────────────────
764
1278
  server.tool("claude_request", {
765
- prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Claude"),
766
- model: z.string().optional().describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
767
- outputFormat: z.enum(["text", "json", "stream-json"]).default("text").describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
1279
+ prompt: z
1280
+ .string()
1281
+ .min(1, "Prompt cannot be empty")
1282
+ .max(100000, "Prompt too long (max 100k chars)")
1283
+ .describe("Prompt text for Claude"),
1284
+ model: z
1285
+ .string()
1286
+ .optional()
1287
+ .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
1288
+ outputFormat: z
1289
+ .enum(["text", "json", "stream-json"])
1290
+ .default("text")
1291
+ .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
768
1292
  sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
769
1293
  continueSession: z.boolean().default(false).describe("Continue active session"),
770
1294
  createNewSession: z.boolean().default(false).describe("Force new session"),
771
- allowedTools: z.array(z.string()).optional().describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
1295
+ allowedTools: z
1296
+ .array(z.string())
1297
+ .optional()
1298
+ .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
772
1299
  disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
773
- dangerouslySkipPermissions: z.boolean().default(false).describe("Bypass permissions (sandbox only)"),
774
- approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
775
- approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
776
- mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP servers exposed to Claude"),
777
- strictMcpConfig: z.boolean().default(false).describe("Restrict Claude to provided MCP config only"),
1300
+ dangerouslySkipPermissions: z
1301
+ .boolean()
1302
+ .default(false)
1303
+ .describe("Bypass permissions (sandbox only)"),
1304
+ approvalStrategy: z
1305
+ .enum(["legacy", "mcp_managed"])
1306
+ .default("legacy")
1307
+ .describe("Approval strategy"),
1308
+ approvalPolicy: z
1309
+ .enum(["strict", "balanced", "permissive"])
1310
+ .optional()
1311
+ .describe("Approval policy override"),
1312
+ mcpServers: z
1313
+ .array(MCP_SERVER_ENUM)
1314
+ .default(["sqry"])
1315
+ .describe("MCP servers exposed to Claude"),
1316
+ strictMcpConfig: z
1317
+ .boolean()
1318
+ .default(false)
1319
+ .describe("Restrict Claude to provided MCP config only"),
778
1320
  correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
779
1321
  optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
780
1322
  optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
781
- idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
782
- }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs }) => {
1323
+ idleTimeoutMs: z
1324
+ .number()
1325
+ .int()
1326
+ .min(30_000)
1327
+ .max(3_600_000)
1328
+ .optional()
1329
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1330
+ forceRefresh: z
1331
+ .boolean()
1332
+ .default(false)
1333
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1334
+ }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
783
1335
  const startTime = Date.now();
784
1336
  const prep = prepareClaudeRequest({
785
- prompt, model, outputFormat, allowedTools, disallowedTools, dangerouslySkipPermissions,
786
- approvalStrategy, approvalPolicy, mcpServers,
787
- strictMcpConfig, correlationId, optimizePrompt, operation: "claude_request"
1337
+ prompt,
1338
+ model,
1339
+ outputFormat,
1340
+ allowedTools,
1341
+ disallowedTools,
1342
+ dangerouslySkipPermissions,
1343
+ approvalStrategy,
1344
+ approvalPolicy,
1345
+ mcpServers,
1346
+ strictMcpConfig,
1347
+ correlationId,
1348
+ optimizePrompt,
1349
+ operation: "claude_request",
788
1350
  });
789
1351
  if (!("args" in prep))
790
1352
  return prep;
791
1353
  const { corrId, args } = prep;
792
1354
  let durationMs = 0;
793
1355
  let wasSuccessful = false;
1356
+ safeFlightStart({
1357
+ correlationId: corrId,
1358
+ cli: "claude",
1359
+ model: prep.resolvedModel || "default",
1360
+ prompt,
1361
+ sessionId,
1362
+ });
794
1363
  logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prompt.length}, sessionId=${sessionId}`);
795
1364
  try {
796
1365
  // Session management
@@ -812,10 +1381,8 @@ server.tool("claude_request", {
812
1381
  await sessionManager.updateSessionUsage(effectiveSessionId);
813
1382
  }
814
1383
  // Idle timeout only for stream-json (text/json produce no output until done)
815
- const effectiveIdleTimeout = outputFormat === "stream-json"
816
- ? resolveIdleTimeout("claude", idleTimeoutMs)
817
- : undefined;
818
- const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat);
1384
+ const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
1385
+ const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh);
819
1386
  // Deferred — job still running, return async reference
820
1387
  if (isDeferredResponse(result)) {
821
1388
  return buildDeferredToolResponse(result, effectiveSessionId);
@@ -824,6 +1391,16 @@ server.tool("claude_request", {
824
1391
  durationMs = Math.max(0, Date.now() - startTime);
825
1392
  if (code !== 0) {
826
1393
  logger.info(`[${corrId}] claude_request failed in ${durationMs}ms`);
1394
+ safeFlightComplete(corrId, {
1395
+ response: stderr || "",
1396
+ durationMs,
1397
+ retryCount: 0,
1398
+ circuitBreakerState: "closed",
1399
+ optimizationApplied: optimizePrompt || optimizeResponse,
1400
+ exitCode: code,
1401
+ errorMessage: stderr || `Exit code ${code}`,
1402
+ status: "failed",
1403
+ });
827
1404
  return createErrorResponse("claude", code, stderr, corrId);
828
1405
  }
829
1406
  wasSuccessful = true;
@@ -841,13 +1418,44 @@ server.tool("claude_request", {
841
1418
  if (parsed.costUsd !== null) {
842
1419
  logger.debug(`[${corrId}] stream-json cost=$${parsed.costUsd}, model=${parsed.model}, turns=${parsed.numTurns}`);
843
1420
  }
844
- return buildCliResponse(parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, undefined, outputFormat);
1421
+ safeFlightComplete(corrId, {
1422
+ response: parsed.text,
1423
+ inputTokens: parsed.usage?.inputTokens,
1424
+ outputTokens: parsed.usage?.outputTokens,
1425
+ durationMs,
1426
+ retryCount: 0,
1427
+ circuitBreakerState: "closed",
1428
+ costUsd: parsed.costUsd ?? undefined,
1429
+ optimizationApplied: optimizePrompt || optimizeResponse,
1430
+ exitCode: 0,
1431
+ status: "completed",
1432
+ });
1433
+ return buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
845
1434
  }
846
- return buildCliResponse(stdout, optimizeResponse, corrId, effectiveSessionId, prep, undefined, outputFormat);
1435
+ safeFlightComplete(corrId, {
1436
+ response: stdout,
1437
+ durationMs,
1438
+ retryCount: 0,
1439
+ circuitBreakerState: "closed",
1440
+ optimizationApplied: optimizePrompt || optimizeResponse,
1441
+ exitCode: 0,
1442
+ status: "completed",
1443
+ });
1444
+ return buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
847
1445
  }
848
1446
  catch (error) {
849
1447
  const elapsedMs = Math.max(0, Date.now() - startTime);
850
1448
  logger.info(`[${corrId}] claude_request threw exception after ${elapsedMs}ms`);
1449
+ safeFlightComplete(corrId, {
1450
+ response: "",
1451
+ durationMs: elapsedMs,
1452
+ retryCount: 0,
1453
+ circuitBreakerState: "closed",
1454
+ optimizationApplied: optimizePrompt || optimizeResponse,
1455
+ exitCode: 1,
1456
+ errorMessage: error.message,
1457
+ status: "failed",
1458
+ });
851
1459
  return createErrorResponse("claude", 1, "", corrId, error);
852
1460
  }
853
1461
  finally {
@@ -859,34 +1467,84 @@ server.tool("claude_request", {
859
1467
  // Codex Tool
860
1468
  //──────────────────────────────────────────────────────────────────────────────
861
1469
  server.tool("codex_request", {
862
- prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Codex"),
1470
+ prompt: z
1471
+ .string()
1472
+ .min(1, "Prompt cannot be empty")
1473
+ .max(100000, "Prompt too long (max 100k chars)")
1474
+ .describe("Prompt text for Codex"),
863
1475
  model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
864
1476
  fullAuto: z.boolean().default(false).describe("Full-auto mode (sandboxed execution)"),
865
- dangerouslyBypassApprovalsAndSandbox: z.boolean().default(false).describe("Run Codex without approvals/sandbox"),
866
- approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
867
- approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
868
- mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
869
- sessionId: z.string().optional().describe("Session ID (Codex manages internally)"),
870
- createNewSession: z.boolean().default(false).describe("Force new session"),
1477
+ dangerouslyBypassApprovalsAndSandbox: z
1478
+ .boolean()
1479
+ .default(false)
1480
+ .describe("Run Codex without approvals/sandbox"),
1481
+ approvalStrategy: z
1482
+ .enum(["legacy", "mcp_managed"])
1483
+ .default("legacy")
1484
+ .describe("Approval strategy"),
1485
+ approvalPolicy: z
1486
+ .enum(["strict", "balanced", "permissive"])
1487
+ .optional()
1488
+ .describe("Approval policy override"),
1489
+ mcpServers: z
1490
+ .array(MCP_SERVER_ENUM)
1491
+ .default(["sqry"])
1492
+ .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
1493
+ sessionId: z
1494
+ .string()
1495
+ .optional()
1496
+ .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
1497
+ resumeLatest: z
1498
+ .boolean()
1499
+ .default(false)
1500
+ .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
1501
+ createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
871
1502
  correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
872
1503
  optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
873
1504
  optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
874
- idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
875
- }, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs }) => {
1505
+ idleTimeoutMs: z
1506
+ .number()
1507
+ .int()
1508
+ .min(30_000)
1509
+ .max(3_600_000)
1510
+ .optional()
1511
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1512
+ forceRefresh: z
1513
+ .boolean()
1514
+ .default(false)
1515
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1516
+ }, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
876
1517
  const startTime = Date.now();
877
1518
  const prep = prepareCodexRequest({
878
- prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox,
879
- approvalStrategy, approvalPolicy, mcpServers,
880
- correlationId, optimizePrompt, operation: "codex_request"
1519
+ prompt,
1520
+ model,
1521
+ fullAuto,
1522
+ dangerouslyBypassApprovalsAndSandbox,
1523
+ approvalStrategy,
1524
+ approvalPolicy,
1525
+ mcpServers,
1526
+ sessionId,
1527
+ resumeLatest,
1528
+ createNewSession,
1529
+ correlationId,
1530
+ optimizePrompt,
1531
+ operation: "codex_request",
881
1532
  });
882
1533
  if (!("args" in prep))
883
1534
  return prep;
884
1535
  const { corrId, args } = prep;
885
1536
  let durationMs = 0;
886
1537
  let wasSuccessful = false;
1538
+ safeFlightStart({
1539
+ correlationId: corrId,
1540
+ cli: "codex",
1541
+ model: prep.resolvedModel || "default",
1542
+ prompt,
1543
+ sessionId,
1544
+ });
887
1545
  logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prompt.length}`);
888
1546
  try {
889
- const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs));
1547
+ const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), undefined, forceRefresh);
890
1548
  // Deferred — job still running, return async reference
891
1549
  if (isDeferredResponse(result)) {
892
1550
  return buildDeferredToolResponse(result, sessionId);
@@ -895,6 +1553,16 @@ server.tool("codex_request", {
895
1553
  durationMs = Math.max(0, Date.now() - startTime);
896
1554
  if (code !== 0) {
897
1555
  logger.info(`[${corrId}] codex_request failed in ${durationMs}ms`);
1556
+ safeFlightComplete(corrId, {
1557
+ response: stderr || "",
1558
+ durationMs,
1559
+ retryCount: 0,
1560
+ circuitBreakerState: "closed",
1561
+ optimizationApplied: optimizePrompt || optimizeResponse,
1562
+ exitCode: code,
1563
+ errorMessage: stderr || `Exit code ${code}`,
1564
+ status: "failed",
1565
+ });
898
1566
  return createErrorResponse("codex", code, stderr, corrId);
899
1567
  }
900
1568
  wasSuccessful = true;
@@ -918,11 +1586,30 @@ server.tool("codex_request", {
918
1586
  effectiveSessionId = newSession.id;
919
1587
  }
920
1588
  logger.info(`[${corrId}] codex_request completed successfully in ${durationMs}ms`);
921
- return buildCliResponse(stdout, optimizeResponse, corrId, effectiveSessionId, prep);
1589
+ safeFlightComplete(corrId, {
1590
+ response: stdout,
1591
+ durationMs,
1592
+ retryCount: 0,
1593
+ circuitBreakerState: "closed",
1594
+ optimizationApplied: optimizePrompt || optimizeResponse,
1595
+ exitCode: 0,
1596
+ status: "completed",
1597
+ });
1598
+ return buildCliResponse("codex", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs);
922
1599
  }
923
1600
  catch (error) {
924
1601
  const elapsedMs = Math.max(0, Date.now() - startTime);
925
1602
  logger.info(`[${corrId}] codex_request threw exception after ${elapsedMs}ms`);
1603
+ safeFlightComplete(corrId, {
1604
+ response: "",
1605
+ durationMs: elapsedMs,
1606
+ retryCount: 0,
1607
+ circuitBreakerState: "closed",
1608
+ optimizationApplied: optimizePrompt || optimizeResponse,
1609
+ exitCode: 1,
1610
+ errorMessage: error.message,
1611
+ status: "failed",
1612
+ });
926
1613
  return createErrorResponse("codex", 1, "", corrId, error);
927
1614
  }
928
1615
  finally {
@@ -934,49 +1621,237 @@ server.tool("codex_request", {
934
1621
  // Gemini Tool
935
1622
  //──────────────────────────────────────────────────────────────────────────────
936
1623
  server.tool("gemini_request", {
937
- prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Gemini"),
938
- model: z.string().optional().describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
1624
+ prompt: z
1625
+ .string()
1626
+ .min(1, "Prompt cannot be empty")
1627
+ .max(100000, "Prompt too long (max 100k chars)")
1628
+ .describe("Prompt text for Gemini"),
1629
+ model: z
1630
+ .string()
1631
+ .optional()
1632
+ .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
939
1633
  sessionId: z.string().optional().describe("Session ID or 'latest'"),
940
1634
  resumeLatest: z.boolean().default(false).describe("Resume latest session"),
941
1635
  createNewSession: z.boolean().default(false).describe("Force new session"),
942
- approvalMode: z.enum(["default", "auto_edit", "yolo"]).optional().describe("Approval: default|auto_edit|yolo"),
943
- approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
944
- approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
945
- mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
946
- allowedTools: z.array(z.string()).optional().describe("Allowed tools (['Write','Edit','Bash'])"),
1636
+ approvalMode: z
1637
+ .enum(["default", "auto_edit", "yolo"])
1638
+ .optional()
1639
+ .describe("Approval: default|auto_edit|yolo"),
1640
+ approvalStrategy: z
1641
+ .enum(["legacy", "mcp_managed"])
1642
+ .default("legacy")
1643
+ .describe("Approval strategy"),
1644
+ approvalPolicy: z
1645
+ .enum(["strict", "balanced", "permissive"])
1646
+ .optional()
1647
+ .describe("Approval policy override"),
1648
+ mcpServers: z
1649
+ .array(MCP_SERVER_ENUM)
1650
+ .default(["sqry"])
1651
+ .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
1652
+ allowedTools: z
1653
+ .array(z.string())
1654
+ .optional()
1655
+ .describe("Allowed tools (['Write','Edit','Bash'])"),
947
1656
  includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
948
1657
  correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
949
1658
  optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
950
1659
  optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
951
- idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
952
- }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs }) => {
953
- return handleGeminiRequest({ sessionManager, logger }, { prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs });
1660
+ idleTimeoutMs: z
1661
+ .number()
1662
+ .int()
1663
+ .min(30_000)
1664
+ .max(3_600_000)
1665
+ .optional()
1666
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1667
+ forceRefresh: z
1668
+ .boolean()
1669
+ .default(false)
1670
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1671
+ }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
1672
+ return handleGeminiRequest({ sessionManager, logger }, {
1673
+ prompt,
1674
+ model,
1675
+ sessionId,
1676
+ resumeLatest,
1677
+ createNewSession,
1678
+ approvalMode,
1679
+ approvalStrategy,
1680
+ approvalPolicy,
1681
+ mcpServers,
1682
+ allowedTools,
1683
+ includeDirs,
1684
+ correlationId,
1685
+ optimizePrompt,
1686
+ optimizeResponse,
1687
+ idleTimeoutMs,
1688
+ forceRefresh,
1689
+ });
1690
+ });
1691
+ //──────────────────────────────────────────────────────────────────────────────
1692
+ // Grok Tool
1693
+ //──────────────────────────────────────────────────────────────────────────────
1694
+ server.tool("grok_request", {
1695
+ prompt: z
1696
+ .string()
1697
+ .min(1, "Prompt cannot be empty")
1698
+ .max(100000, "Prompt too long (max 100k chars)")
1699
+ .describe("Prompt text for Grok"),
1700
+ model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
1701
+ outputFormat: z
1702
+ .enum(["plain", "json", "streaming-json"])
1703
+ .optional()
1704
+ .describe("Output format (plain|json|streaming-json). Grok default is plain."),
1705
+ sessionId: z.string().optional().describe("Session ID (user-provided CLI handle for --resume)"),
1706
+ resumeLatest: z
1707
+ .boolean()
1708
+ .default(false)
1709
+ .describe("Resume most recent Grok session in cwd (--continue)"),
1710
+ createNewSession: z.boolean().default(false).describe("Force new session"),
1711
+ alwaysApprove: z
1712
+ .boolean()
1713
+ .default(false)
1714
+ .describe("Auto-approve all tool executions (--always-approve)"),
1715
+ permissionMode: z
1716
+ .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
1717
+ .optional()
1718
+ .describe("Grok permission mode"),
1719
+ effort: z
1720
+ .enum(["low", "medium", "high", "xhigh", "max"])
1721
+ .optional()
1722
+ .describe("Grok effort level"),
1723
+ reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
1724
+ approvalStrategy: z
1725
+ .enum(["legacy", "mcp_managed"])
1726
+ .default("legacy")
1727
+ .describe("Approval strategy"),
1728
+ approvalPolicy: z
1729
+ .enum(["strict", "balanced", "permissive"])
1730
+ .optional()
1731
+ .describe("Approval policy override"),
1732
+ mcpServers: z
1733
+ .array(MCP_SERVER_ENUM)
1734
+ .default(["sqry"])
1735
+ .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
1736
+ allowedTools: z
1737
+ .array(z.string())
1738
+ .optional()
1739
+ .describe("Allowed built-in tools (passed as --tools comma list)"),
1740
+ disallowedTools: z
1741
+ .array(z.string())
1742
+ .optional()
1743
+ .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
1744
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1745
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1746
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
1747
+ idleTimeoutMs: z
1748
+ .number()
1749
+ .int()
1750
+ .min(30_000)
1751
+ .max(3_600_000)
1752
+ .optional()
1753
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1754
+ forceRefresh: z
1755
+ .boolean()
1756
+ .default(false)
1757
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1758
+ }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
1759
+ return handleGrokRequest({ sessionManager, logger }, {
1760
+ prompt,
1761
+ model,
1762
+ outputFormat,
1763
+ sessionId,
1764
+ resumeLatest,
1765
+ createNewSession,
1766
+ alwaysApprove,
1767
+ permissionMode,
1768
+ effort,
1769
+ reasoningEffort,
1770
+ approvalStrategy,
1771
+ approvalPolicy,
1772
+ mcpServers,
1773
+ allowedTools,
1774
+ disallowedTools,
1775
+ correlationId,
1776
+ optimizePrompt,
1777
+ optimizeResponse,
1778
+ idleTimeoutMs,
1779
+ forceRefresh,
1780
+ });
954
1781
  });
955
1782
  //──────────────────────────────────────────────────────────────────────────────
956
1783
  // Async Long-Running Job Tools (No Time-Bound LLM Execution)
957
1784
  //──────────────────────────────────────────────────────────────────────────────
958
1785
  server.tool("claude_request_async", {
959
- prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Claude"),
960
- model: z.string().optional().describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
961
- outputFormat: z.enum(["text", "json", "stream-json"]).default("text").describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
1786
+ prompt: z
1787
+ .string()
1788
+ .min(1, "Prompt cannot be empty")
1789
+ .max(100000, "Prompt too long (max 100k chars)")
1790
+ .describe("Prompt text for Claude"),
1791
+ model: z
1792
+ .string()
1793
+ .optional()
1794
+ .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
1795
+ outputFormat: z
1796
+ .enum(["text", "json", "stream-json"])
1797
+ .default("text")
1798
+ .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
962
1799
  sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
963
1800
  continueSession: z.boolean().default(false).describe("Continue active session"),
964
1801
  createNewSession: z.boolean().default(false).describe("Force new session"),
965
- allowedTools: z.array(z.string()).optional().describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
1802
+ allowedTools: z
1803
+ .array(z.string())
1804
+ .optional()
1805
+ .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
966
1806
  disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
967
- dangerouslySkipPermissions: z.boolean().default(false).describe("Bypass permissions (sandbox only)"),
968
- approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
969
- approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
970
- mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP servers exposed to Claude"),
971
- strictMcpConfig: z.boolean().default(false).describe("Restrict Claude to provided MCP config only"),
1807
+ dangerouslySkipPermissions: z
1808
+ .boolean()
1809
+ .default(false)
1810
+ .describe("Bypass permissions (sandbox only)"),
1811
+ approvalStrategy: z
1812
+ .enum(["legacy", "mcp_managed"])
1813
+ .default("legacy")
1814
+ .describe("Approval strategy"),
1815
+ approvalPolicy: z
1816
+ .enum(["strict", "balanced", "permissive"])
1817
+ .optional()
1818
+ .describe("Approval policy override"),
1819
+ mcpServers: z
1820
+ .array(MCP_SERVER_ENUM)
1821
+ .default(["sqry"])
1822
+ .describe("MCP servers exposed to Claude"),
1823
+ strictMcpConfig: z
1824
+ .boolean()
1825
+ .default(false)
1826
+ .describe("Restrict Claude to provided MCP config only"),
972
1827
  correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
973
1828
  optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
974
- idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
975
- }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs }) => {
1829
+ idleTimeoutMs: z
1830
+ .number()
1831
+ .int()
1832
+ .min(30_000)
1833
+ .max(3_600_000)
1834
+ .optional()
1835
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1836
+ forceRefresh: z
1837
+ .boolean()
1838
+ .default(false)
1839
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1840
+ }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
976
1841
  const prep = prepareClaudeRequest({
977
- prompt, model, outputFormat, allowedTools, disallowedTools, dangerouslySkipPermissions,
978
- approvalStrategy, approvalPolicy, mcpServers,
979
- strictMcpConfig, correlationId, optimizePrompt, operation: "claude_request_async"
1842
+ prompt,
1843
+ model,
1844
+ outputFormat,
1845
+ allowedTools,
1846
+ disallowedTools,
1847
+ dangerouslySkipPermissions,
1848
+ approvalStrategy,
1849
+ approvalPolicy,
1850
+ mcpServers,
1851
+ strictMcpConfig,
1852
+ correlationId,
1853
+ optimizePrompt,
1854
+ operation: "claude_request_async",
980
1855
  });
981
1856
  if (!("args" in prep))
982
1857
  return prep;
@@ -1007,10 +1882,8 @@ server.tool("claude_request_async", {
1007
1882
  }
1008
1883
  }
1009
1884
  // Idle timeout only for stream-json (text/json produce no output until done)
1010
- const effectiveIdleTimeout = outputFormat === "stream-json"
1011
- ? resolveIdleTimeout("claude", idleTimeoutMs)
1012
- : undefined;
1013
- const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat);
1885
+ const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
1886
+ const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat, forceRefresh);
1014
1887
  logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
1015
1888
  const asyncResponse = {
1016
1889
  success: true,
@@ -1020,17 +1893,19 @@ server.tool("claude_request_async", {
1020
1893
  mcpServers: {
1021
1894
  requested: requestedMcpServers,
1022
1895
  enabled: mcpConfig?.enabled,
1023
- missing: mcpConfig?.missing
1024
- }
1896
+ missing: mcpConfig?.missing,
1897
+ },
1025
1898
  };
1026
1899
  if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
1027
1900
  asyncResponse.reviewIntegrity = prep.reviewIntegrity;
1028
1901
  }
1029
1902
  return {
1030
- content: [{
1903
+ content: [
1904
+ {
1031
1905
  type: "text",
1032
- text: JSON.stringify(asyncResponse, null, 2)
1033
- }]
1906
+ text: JSON.stringify(asyncResponse, null, 2),
1907
+ },
1908
+ ],
1034
1909
  };
1035
1910
  }
1036
1911
  catch (error) {
@@ -1038,82 +1913,276 @@ server.tool("claude_request_async", {
1038
1913
  }
1039
1914
  });
1040
1915
  server.tool("codex_request_async", {
1041
- prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Codex"),
1916
+ prompt: z
1917
+ .string()
1918
+ .min(1, "Prompt cannot be empty")
1919
+ .max(100000, "Prompt too long (max 100k chars)")
1920
+ .describe("Prompt text for Codex"),
1042
1921
  model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
1043
1922
  fullAuto: z.boolean().default(false).describe("Full-auto mode (sandboxed execution)"),
1044
- dangerouslyBypassApprovalsAndSandbox: z.boolean().default(false).describe("Run Codex without approvals/sandbox"),
1045
- approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
1046
- approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
1047
- mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
1048
- sessionId: z.string().optional().describe("Session ID (Codex manages internally)"),
1049
- createNewSession: z.boolean().default(false).describe("Force new session"),
1923
+ dangerouslyBypassApprovalsAndSandbox: z
1924
+ .boolean()
1925
+ .default(false)
1926
+ .describe("Run Codex without approvals/sandbox"),
1927
+ approvalStrategy: z
1928
+ .enum(["legacy", "mcp_managed"])
1929
+ .default("legacy")
1930
+ .describe("Approval strategy"),
1931
+ approvalPolicy: z
1932
+ .enum(["strict", "balanced", "permissive"])
1933
+ .optional()
1934
+ .describe("Approval policy override"),
1935
+ mcpServers: z
1936
+ .array(MCP_SERVER_ENUM)
1937
+ .default(["sqry"])
1938
+ .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
1939
+ sessionId: z
1940
+ .string()
1941
+ .optional()
1942
+ .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
1943
+ resumeLatest: z
1944
+ .boolean()
1945
+ .default(false)
1946
+ .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
1947
+ createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
1050
1948
  correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1051
1949
  optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1052
- idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
1053
- }, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, createNewSession, correlationId, optimizePrompt, idleTimeoutMs }) => {
1054
- return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger }, { prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, createNewSession, correlationId, optimizePrompt, idleTimeoutMs });
1950
+ idleTimeoutMs: z
1951
+ .number()
1952
+ .int()
1953
+ .min(30_000)
1954
+ .max(3_600_000)
1955
+ .optional()
1956
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1957
+ forceRefresh: z
1958
+ .boolean()
1959
+ .default(false)
1960
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
1961
+ }, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
1962
+ return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger }, {
1963
+ prompt,
1964
+ model,
1965
+ fullAuto,
1966
+ dangerouslyBypassApprovalsAndSandbox,
1967
+ approvalStrategy,
1968
+ approvalPolicy,
1969
+ mcpServers,
1970
+ sessionId,
1971
+ resumeLatest,
1972
+ createNewSession,
1973
+ correlationId,
1974
+ optimizePrompt,
1975
+ idleTimeoutMs,
1976
+ forceRefresh,
1977
+ });
1055
1978
  });
1056
1979
  server.tool("gemini_request_async", {
1057
- prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Gemini"),
1058
- model: z.string().optional().describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
1980
+ prompt: z
1981
+ .string()
1982
+ .min(1, "Prompt cannot be empty")
1983
+ .max(100000, "Prompt too long (max 100k chars)")
1984
+ .describe("Prompt text for Gemini"),
1985
+ model: z
1986
+ .string()
1987
+ .optional()
1988
+ .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
1059
1989
  sessionId: z.string().optional().describe("Session ID (user-provided CLI handle for --resume)"),
1060
1990
  resumeLatest: z.boolean().default(false).describe("Resume latest session"),
1061
1991
  createNewSession: z.boolean().default(false).describe("Force new session"),
1062
- approvalMode: z.enum(["default", "auto_edit", "yolo"]).optional().describe("Approval: default|auto_edit|yolo"),
1063
- approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
1064
- approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
1065
- mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
1066
- allowedTools: z.array(z.string()).optional().describe("Allowed tools (['Write','Edit','Bash'])"),
1992
+ approvalMode: z
1993
+ .enum(["default", "auto_edit", "yolo"])
1994
+ .optional()
1995
+ .describe("Approval: default|auto_edit|yolo"),
1996
+ approvalStrategy: z
1997
+ .enum(["legacy", "mcp_managed"])
1998
+ .default("legacy")
1999
+ .describe("Approval strategy"),
2000
+ approvalPolicy: z
2001
+ .enum(["strict", "balanced", "permissive"])
2002
+ .optional()
2003
+ .describe("Approval policy override"),
2004
+ mcpServers: z
2005
+ .array(MCP_SERVER_ENUM)
2006
+ .default(["sqry"])
2007
+ .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
2008
+ allowedTools: z
2009
+ .array(z.string())
2010
+ .optional()
2011
+ .describe("Allowed tools (['Write','Edit','Bash'])"),
1067
2012
  includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
1068
2013
  correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1069
2014
  optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1070
- idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
1071
- }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs }) => {
1072
- return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger }, { prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs });
2015
+ idleTimeoutMs: z
2016
+ .number()
2017
+ .int()
2018
+ .min(30_000)
2019
+ .max(3_600_000)
2020
+ .optional()
2021
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2022
+ forceRefresh: z
2023
+ .boolean()
2024
+ .default(false)
2025
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2026
+ }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
2027
+ return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger }, {
2028
+ prompt,
2029
+ model,
2030
+ sessionId,
2031
+ resumeLatest,
2032
+ createNewSession,
2033
+ approvalMode,
2034
+ approvalStrategy,
2035
+ approvalPolicy,
2036
+ mcpServers,
2037
+ allowedTools,
2038
+ includeDirs,
2039
+ correlationId,
2040
+ optimizePrompt,
2041
+ idleTimeoutMs,
2042
+ forceRefresh,
2043
+ });
2044
+ });
2045
+ server.tool("grok_request_async", {
2046
+ prompt: z
2047
+ .string()
2048
+ .min(1, "Prompt cannot be empty")
2049
+ .max(100000, "Prompt too long (max 100k chars)")
2050
+ .describe("Prompt text for Grok"),
2051
+ model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
2052
+ outputFormat: z
2053
+ .enum(["plain", "json", "streaming-json"])
2054
+ .optional()
2055
+ .describe("Output format (plain|json|streaming-json). Grok default is plain."),
2056
+ sessionId: z.string().optional().describe("Session ID (user-provided CLI handle for --resume)"),
2057
+ resumeLatest: z
2058
+ .boolean()
2059
+ .default(false)
2060
+ .describe("Resume most recent Grok session in cwd (--continue)"),
2061
+ createNewSession: z.boolean().default(false).describe("Force new session"),
2062
+ alwaysApprove: z
2063
+ .boolean()
2064
+ .default(false)
2065
+ .describe("Auto-approve all tool executions (--always-approve)"),
2066
+ permissionMode: z
2067
+ .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
2068
+ .optional()
2069
+ .describe("Grok permission mode"),
2070
+ effort: z
2071
+ .enum(["low", "medium", "high", "xhigh", "max"])
2072
+ .optional()
2073
+ .describe("Grok effort level"),
2074
+ reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
2075
+ approvalStrategy: z
2076
+ .enum(["legacy", "mcp_managed"])
2077
+ .default("legacy")
2078
+ .describe("Approval strategy"),
2079
+ approvalPolicy: z
2080
+ .enum(["strict", "balanced", "permissive"])
2081
+ .optional()
2082
+ .describe("Approval policy override"),
2083
+ mcpServers: z
2084
+ .array(MCP_SERVER_ENUM)
2085
+ .default(["sqry"])
2086
+ .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
2087
+ allowedTools: z
2088
+ .array(z.string())
2089
+ .optional()
2090
+ .describe("Allowed built-in tools (passed as --tools comma list)"),
2091
+ disallowedTools: z
2092
+ .array(z.string())
2093
+ .optional()
2094
+ .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
2095
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2096
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2097
+ idleTimeoutMs: z
2098
+ .number()
2099
+ .int()
2100
+ .min(30_000)
2101
+ .max(3_600_000)
2102
+ .optional()
2103
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2104
+ forceRefresh: z
2105
+ .boolean()
2106
+ .default(false)
2107
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2108
+ }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
2109
+ return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger }, {
2110
+ prompt,
2111
+ model,
2112
+ outputFormat,
2113
+ sessionId,
2114
+ resumeLatest,
2115
+ createNewSession,
2116
+ alwaysApprove,
2117
+ permissionMode,
2118
+ effort,
2119
+ reasoningEffort,
2120
+ approvalStrategy,
2121
+ approvalPolicy,
2122
+ mcpServers,
2123
+ allowedTools,
2124
+ disallowedTools,
2125
+ correlationId,
2126
+ optimizePrompt,
2127
+ idleTimeoutMs,
2128
+ forceRefresh,
2129
+ });
1073
2130
  });
1074
2131
  server.tool("llm_job_status", {
1075
- jobId: z.string().describe("Async job ID from *_request_async")
2132
+ jobId: z.string().describe("Async job ID from *_request_async"),
1076
2133
  }, async ({ jobId }) => {
1077
2134
  const job = asyncJobManager.getJobSnapshot(jobId);
1078
2135
  if (!job) {
1079
2136
  return {
1080
- content: [{
2137
+ content: [
2138
+ {
1081
2139
  type: "text",
1082
2140
  text: JSON.stringify({
1083
2141
  success: false,
1084
2142
  error: "Job not found",
1085
- jobId
1086
- }, null, 2)
1087
- }],
1088
- isError: true
2143
+ jobId,
2144
+ }, null, 2),
2145
+ },
2146
+ ],
2147
+ isError: true,
1089
2148
  };
1090
2149
  }
1091
2150
  return {
1092
- content: [{
2151
+ content: [
2152
+ {
1093
2153
  type: "text",
1094
2154
  text: JSON.stringify({
1095
2155
  success: true,
1096
- job
1097
- }, null, 2)
1098
- }]
2156
+ job,
2157
+ }, null, 2),
2158
+ },
2159
+ ],
1099
2160
  };
1100
2161
  });
1101
2162
  server.tool("llm_job_result", {
1102
2163
  jobId: z.string().describe("Async job ID from *_request_async"),
1103
- maxChars: z.number().int().min(1000).max(2000000).default(200000).describe("Max chars returned per stream")
2164
+ maxChars: z
2165
+ .number()
2166
+ .int()
2167
+ .min(1000)
2168
+ .max(2000000)
2169
+ .default(200000)
2170
+ .describe("Max chars returned per stream"),
1104
2171
  }, async ({ jobId, maxChars }) => {
1105
2172
  const result = asyncJobManager.getJobResult(jobId, maxChars);
1106
2173
  if (!result) {
1107
2174
  return {
1108
- content: [{
2175
+ content: [
2176
+ {
1109
2177
  type: "text",
1110
2178
  text: JSON.stringify({
1111
2179
  success: false,
1112
2180
  error: "Job not found",
1113
- jobId
1114
- }, null, 2)
1115
- }],
1116
- isError: true
2181
+ jobId,
2182
+ }, null, 2),
2183
+ },
2184
+ ],
2185
+ isError: true,
1117
2186
  };
1118
2187
  }
1119
2188
  // Parse stream-json output for Claude async jobs
@@ -1123,50 +2192,68 @@ server.tool("llm_job_result", {
1123
2192
  parsed = parseStreamJson(result.stdout);
1124
2193
  }
1125
2194
  return {
1126
- content: [{
2195
+ content: [
2196
+ {
1127
2197
  type: "text",
1128
2198
  text: JSON.stringify({
1129
2199
  success: true,
1130
2200
  result,
1131
- ...(parsed ? { parsed: { text: parsed.text, costUsd: parsed.costUsd, usage: parsed.usage, model: parsed.model, numTurns: parsed.numTurns } } : {})
1132
- }, null, 2)
1133
- }]
2201
+ ...(parsed
2202
+ ? {
2203
+ parsed: {
2204
+ text: parsed.text,
2205
+ costUsd: parsed.costUsd,
2206
+ usage: parsed.usage,
2207
+ model: parsed.model,
2208
+ numTurns: parsed.numTurns,
2209
+ },
2210
+ }
2211
+ : {}),
2212
+ }, null, 2),
2213
+ },
2214
+ ],
1134
2215
  };
1135
2216
  });
1136
2217
  server.tool("llm_job_cancel", {
1137
- jobId: z.string().describe("Async job ID from *_request_async")
2218
+ jobId: z.string().describe("Async job ID from *_request_async"),
1138
2219
  }, async ({ jobId }) => {
1139
2220
  const cancel = asyncJobManager.cancelJob(jobId);
1140
2221
  if (!cancel.canceled) {
1141
2222
  return {
1142
- content: [{
2223
+ content: [
2224
+ {
1143
2225
  type: "text",
1144
2226
  text: JSON.stringify({
1145
2227
  success: false,
1146
2228
  jobId,
1147
- reason: cancel.reason || "Unable to cancel"
1148
- }, null, 2)
1149
- }],
1150
- isError: true
2229
+ reason: cancel.reason || "Unable to cancel",
2230
+ }, null, 2),
2231
+ },
2232
+ ],
2233
+ isError: true,
1151
2234
  };
1152
2235
  }
1153
2236
  return {
1154
- content: [{
2237
+ content: [
2238
+ {
1155
2239
  type: "text",
1156
2240
  text: JSON.stringify({
1157
2241
  success: true,
1158
- jobId
1159
- }, null, 2)
1160
- }]
2242
+ jobId,
2243
+ }, null, 2),
2244
+ },
2245
+ ],
1161
2246
  };
1162
2247
  });
1163
2248
  server.tool("llm_process_health", {}, async () => {
1164
2249
  const health = asyncJobManager.getJobHealth();
1165
2250
  return {
1166
- content: [{
2251
+ content: [
2252
+ {
1167
2253
  type: "text",
1168
- text: JSON.stringify({ success: true, ...health }, null, 2)
1169
- }]
2254
+ text: JSON.stringify({ success: true, ...health }, null, 2),
2255
+ },
2256
+ ],
1170
2257
  };
1171
2258
  });
1172
2259
  //──────────────────────────────────────────────────────────────────────────────
@@ -1174,37 +2261,98 @@ server.tool("llm_process_health", {}, async () => {
1174
2261
  //──────────────────────────────────────────────────────────────────────────────
1175
2262
  server.tool("approval_list", {
1176
2263
  limit: z.number().int().min(1).max(500).default(50).describe("Max number of approval records"),
1177
- cli: z.enum(["claude", "codex", "gemini"]).optional().describe("Optional CLI filter")
2264
+ cli: z.enum(["claude", "codex", "gemini"]).optional().describe("Optional CLI filter"),
1178
2265
  }, async ({ limit, cli }) => {
1179
2266
  const approvals = approvalManager.list(limit, cli);
1180
2267
  return {
1181
- content: [{
2268
+ content: [
2269
+ {
1182
2270
  type: "text",
1183
2271
  text: JSON.stringify({
1184
2272
  success: true,
1185
2273
  count: approvals.length,
1186
- approvals
1187
- }, null, 2)
1188
- }]
2274
+ approvals,
2275
+ }, null, 2),
2276
+ },
2277
+ ],
1189
2278
  };
1190
2279
  });
1191
2280
  //──────────────────────────────────────────────────────────────────────────────
1192
2281
  // List Models Tool
1193
2282
  //──────────────────────────────────────────────────────────────────────────────
1194
2283
  server.tool("list_models", {
1195
- cli: z.preprocess((value) => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini"]).optional()).describe("CLI filter (claude|codex|gemini)")
2284
+ cli: z
2285
+ .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini"]).optional())
2286
+ .describe("CLI filter (claude|codex|gemini)"),
1196
2287
  }, async ({ cli }) => {
1197
2288
  const cliInfo = getCliInfo();
1198
2289
  const result = cli ? { [cli]: cliInfo[cli] } : cliInfo;
1199
2290
  return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
1200
2291
  });
2292
+ server.tool("cli_versions", {
2293
+ cli: z
2294
+ .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini"]).optional())
2295
+ .describe("CLI filter (claude|codex|gemini)"),
2296
+ }, async ({ cli }) => {
2297
+ const versions = await getCliVersions(cli);
2298
+ return { content: [{ type: "text", text: JSON.stringify({ versions }, null, 2) }] };
2299
+ });
2300
+ server.tool("cli_upgrade", {
2301
+ cli: z.enum(["claude", "codex", "gemini"]).describe("CLI to upgrade"),
2302
+ target: z
2303
+ .string()
2304
+ .min(1)
2305
+ .default("latest")
2306
+ .describe("Package tag/version/target to install (default: latest)"),
2307
+ dryRun: z
2308
+ .boolean()
2309
+ .default(true)
2310
+ .describe("When true, return the upgrade plan without running it"),
2311
+ timeoutMs: z
2312
+ .number()
2313
+ .int()
2314
+ .min(30_000)
2315
+ .max(3_600_000)
2316
+ .optional()
2317
+ .describe("Upgrade timeout in ms when dryRun=false"),
2318
+ }, async ({ cli, target, dryRun, timeoutMs }) => {
2319
+ try {
2320
+ const result = await runCliUpgrade({ cli, target, dryRun, timeoutMs, logger });
2321
+ return {
2322
+ content: [
2323
+ {
2324
+ type: "text",
2325
+ text: JSON.stringify({
2326
+ success: true,
2327
+ ...result,
2328
+ }, null, 2),
2329
+ },
2330
+ ],
2331
+ };
2332
+ }
2333
+ catch (error) {
2334
+ const message = error instanceof Error ? error.message : String(error);
2335
+ return {
2336
+ content: [
2337
+ {
2338
+ type: "text",
2339
+ text: JSON.stringify({
2340
+ success: false,
2341
+ error: message,
2342
+ }, null, 2),
2343
+ },
2344
+ ],
2345
+ isError: true,
2346
+ };
2347
+ }
2348
+ });
1201
2349
  //──────────────────────────────────────────────────────────────────────────────
1202
2350
  // Session Management Tools
1203
2351
  //──────────────────────────────────────────────────────────────────────────────
1204
2352
  server.tool("session_create", {
1205
2353
  cli: z.enum(["claude", "codex", "gemini"]).describe("CLI type (claude|codex|gemini)"),
1206
2354
  description: z.string().optional().describe("Session description"),
1207
- setAsActive: z.boolean().default(true).describe("Set as active session")
2355
+ setAsActive: z.boolean().default(true).describe("Set as active session"),
1208
2356
  }, async ({ cli, description, setAsActive }) => {
1209
2357
  try {
1210
2358
  const session = await sessionManager.createSession(cli, description);
@@ -1213,7 +2361,8 @@ server.tool("session_create", {
1213
2361
  }
1214
2362
  logger.info(`Created new ${cli} session: ${session.id}`);
1215
2363
  return {
1216
- content: [{
2364
+ content: [
2365
+ {
1217
2366
  type: "text",
1218
2367
  text: JSON.stringify({
1219
2368
  success: true,
@@ -1222,10 +2371,11 @@ server.tool("session_create", {
1222
2371
  cli: session.cli,
1223
2372
  description: session.description,
1224
2373
  createdAt: session.createdAt,
1225
- isActive: setAsActive
1226
- }
1227
- }, null, 2)
1228
- }]
2374
+ isActive: setAsActive,
2375
+ },
2376
+ }, null, 2),
2377
+ },
2378
+ ],
1229
2379
  };
1230
2380
  }
1231
2381
  catch (error) {
@@ -1233,14 +2383,18 @@ server.tool("session_create", {
1233
2383
  }
1234
2384
  });
1235
2385
  server.tool("session_list", {
1236
- cli: z.enum(["claude", "codex", "gemini"]).optional().describe("CLI filter (claude|codex|gemini)")
2386
+ cli: z
2387
+ .enum(["claude", "codex", "gemini"])
2388
+ .optional()
2389
+ .describe("CLI filter (claude|codex|gemini)"),
1237
2390
  }, async ({ cli }) => {
1238
2391
  try {
1239
2392
  const sessions = await sessionManager.listSessions(cli);
1240
2393
  const activeSessions = {
1241
2394
  claude: await sessionManager.getActiveSession("claude"),
1242
2395
  codex: await sessionManager.getActiveSession("codex"),
1243
- gemini: await sessionManager.getActiveSession("gemini")
2396
+ gemini: await sessionManager.getActiveSession("gemini"),
2397
+ grok: await sessionManager.getActiveSession("grok"),
1244
2398
  };
1245
2399
  const sessionList = sessions.map(s => ({
1246
2400
  id: s.id,
@@ -1248,10 +2402,11 @@ server.tool("session_list", {
1248
2402
  description: s.description,
1249
2403
  createdAt: s.createdAt,
1250
2404
  lastUsedAt: s.lastUsedAt,
1251
- isActive: activeSessions[s.cli]?.id === s.id
2405
+ isActive: activeSessions[s.cli]?.id === s.id,
1252
2406
  }));
1253
2407
  return {
1254
- content: [{
2408
+ content: [
2409
+ {
1255
2410
  type: "text",
1256
2411
  text: JSON.stringify({
1257
2412
  total: sessionList.length,
@@ -1259,10 +2414,12 @@ server.tool("session_list", {
1259
2414
  activeSessions: {
1260
2415
  claude: activeSessions.claude?.id || null,
1261
2416
  codex: activeSessions.codex?.id || null,
1262
- gemini: activeSessions.gemini?.id || null
1263
- }
1264
- }, null, 2)
1265
- }]
2417
+ gemini: activeSessions.gemini?.id || null,
2418
+ grok: activeSessions.grok?.id || null,
2419
+ },
2420
+ }, null, 2),
2421
+ },
2422
+ ],
1266
2423
  };
1267
2424
  }
1268
2425
  catch (error) {
@@ -1271,32 +2428,36 @@ server.tool("session_list", {
1271
2428
  });
1272
2429
  server.tool("session_set_active", {
1273
2430
  cli: z.enum(["claude", "codex", "gemini"]).describe("CLI type (claude|codex|gemini)"),
1274
- sessionId: z.string().nullable().describe("Session ID (null to clear)")
2431
+ sessionId: z.string().nullable().describe("Session ID (null to clear)"),
1275
2432
  }, async ({ cli, sessionId }) => {
1276
2433
  try {
1277
2434
  const success = await sessionManager.setActiveSession(cli, sessionId || null);
1278
2435
  if (!success) {
1279
2436
  return {
1280
- content: [{
2437
+ content: [
2438
+ {
1281
2439
  type: "text",
1282
2440
  text: JSON.stringify({
1283
2441
  success: false,
1284
- error: "Session not found or does not belong to the specified CLI"
1285
- }, null, 2)
1286
- }],
1287
- isError: true
2442
+ error: "Session not found or does not belong to the specified CLI",
2443
+ }, null, 2),
2444
+ },
2445
+ ],
2446
+ isError: true,
1288
2447
  };
1289
2448
  }
1290
2449
  logger.info(`Set active ${cli} session to: ${sessionId}`);
1291
2450
  return {
1292
- content: [{
2451
+ content: [
2452
+ {
1293
2453
  type: "text",
1294
2454
  text: JSON.stringify({
1295
2455
  success: true,
1296
2456
  cli,
1297
- activeSessionId: sessionId
1298
- }, null, 2)
1299
- }]
2457
+ activeSessionId: sessionId,
2458
+ }, null, 2),
2459
+ },
2460
+ ],
1300
2461
  };
1301
2462
  }
1302
2463
  catch (error) {
@@ -1304,36 +2465,40 @@ server.tool("session_set_active", {
1304
2465
  }
1305
2466
  });
1306
2467
  server.tool("session_delete", {
1307
- sessionId: z.string().describe("Session ID")
2468
+ sessionId: z.string().describe("Session ID"),
1308
2469
  }, async ({ sessionId }) => {
1309
2470
  try {
1310
2471
  const session = await sessionManager.getSession(sessionId);
1311
2472
  if (!session) {
1312
2473
  return {
1313
- content: [{
2474
+ content: [
2475
+ {
1314
2476
  type: "text",
1315
2477
  text: JSON.stringify({
1316
2478
  success: false,
1317
- error: "Session not found"
1318
- }, null, 2)
1319
- }],
1320
- isError: true
2479
+ error: "Session not found",
2480
+ }, null, 2),
2481
+ },
2482
+ ],
2483
+ isError: true,
1321
2484
  };
1322
2485
  }
1323
2486
  const success = await sessionManager.deleteSession(sessionId);
1324
2487
  logger.info(`Deleted session: ${sessionId}`);
1325
2488
  return {
1326
- content: [{
2489
+ content: [
2490
+ {
1327
2491
  type: "text",
1328
2492
  text: JSON.stringify({
1329
2493
  success,
1330
2494
  deletedSession: {
1331
2495
  id: session.id,
1332
2496
  cli: session.cli,
1333
- description: session.description
1334
- }
1335
- }, null, 2)
1336
- }]
2497
+ description: session.description,
2498
+ },
2499
+ }, null, 2),
2500
+ },
2501
+ ],
1337
2502
  };
1338
2503
  }
1339
2504
  catch (error) {
@@ -1341,34 +2506,38 @@ server.tool("session_delete", {
1341
2506
  }
1342
2507
  });
1343
2508
  server.tool("session_get", {
1344
- sessionId: z.string().describe("Session ID")
2509
+ sessionId: z.string().describe("Session ID"),
1345
2510
  }, async ({ sessionId }) => {
1346
2511
  try {
1347
2512
  const session = await sessionManager.getSession(sessionId);
1348
2513
  if (!session) {
1349
2514
  return {
1350
- content: [{
2515
+ content: [
2516
+ {
1351
2517
  type: "text",
1352
2518
  text: JSON.stringify({
1353
2519
  success: false,
1354
- error: "Session not found"
1355
- }, null, 2)
1356
- }],
1357
- isError: true
2520
+ error: "Session not found",
2521
+ }, null, 2),
2522
+ },
2523
+ ],
2524
+ isError: true,
1358
2525
  };
1359
2526
  }
1360
2527
  const activeSession = await sessionManager.getActiveSession(session.cli);
1361
2528
  return {
1362
- content: [{
2529
+ content: [
2530
+ {
1363
2531
  type: "text",
1364
2532
  text: JSON.stringify({
1365
2533
  success: true,
1366
2534
  session: {
1367
2535
  ...session,
1368
- isActive: activeSession?.id === session.id
1369
- }
1370
- }, null, 2)
1371
- }]
2536
+ isActive: activeSession?.id === session.id,
2537
+ },
2538
+ }, null, 2),
2539
+ },
2540
+ ],
1372
2541
  };
1373
2542
  }
1374
2543
  catch (error) {
@@ -1376,20 +2545,25 @@ server.tool("session_get", {
1376
2545
  }
1377
2546
  });
1378
2547
  server.tool("session_clear_all", {
1379
- cli: z.enum(["claude", "codex", "gemini"]).optional().describe("CLI filter (claude|codex|gemini)")
2548
+ cli: z
2549
+ .enum(["claude", "codex", "gemini"])
2550
+ .optional()
2551
+ .describe("CLI filter (claude|codex|gemini)"),
1380
2552
  }, async ({ cli }) => {
1381
2553
  try {
1382
2554
  const count = await sessionManager.clearAllSessions(cli);
1383
- logger.info(`Cleared ${count} sessions${cli ? ` for ${cli}` : ''}`);
2555
+ logger.info(`Cleared ${count} sessions${cli ? ` for ${cli}` : ""}`);
1384
2556
  return {
1385
- content: [{
2557
+ content: [
2558
+ {
1386
2559
  type: "text",
1387
2560
  text: JSON.stringify({
1388
2561
  success: true,
1389
2562
  deletedCount: count,
1390
- cli: cli || "all"
1391
- }, null, 2)
1392
- }]
2563
+ cli: cli || "all",
2564
+ }, null, 2),
2565
+ },
2566
+ ],
1393
2567
  };
1394
2568
  }
1395
2569
  catch (error) {
@@ -1423,15 +2597,17 @@ function registerHealthResource() {
1423
2597
  server.registerResource("health", "health://status", {
1424
2598
  title: "🏥 Health Status",
1425
2599
  description: "DB connectivity and latency",
1426
- mimeType: "application/json"
2600
+ mimeType: "application/json",
1427
2601
  }, async () => {
1428
2602
  const health = await checkHealth(db);
1429
2603
  return {
1430
- contents: [{
2604
+ contents: [
2605
+ {
1431
2606
  uri: "health://status",
1432
2607
  text: JSON.stringify(health, null, 2),
1433
- mimeType: "application/json"
1434
- }]
2608
+ mimeType: "application/json",
2609
+ },
2610
+ ],
1435
2611
  };
1436
2612
  });
1437
2613
  logger.info("Health check resource registered");
@@ -1440,15 +2616,17 @@ function registerHealthResource() {
1440
2616
  server.registerResource("process-health", "metrics://process-health", {
1441
2617
  title: "Process Health",
1442
2618
  description: "Async job health (CPU, memory, zombie detection)",
1443
- mimeType: "application/json"
2619
+ mimeType: "application/json",
1444
2620
  }, async (uri) => {
1445
2621
  const health = asyncJobManager.getJobHealth();
1446
2622
  return {
1447
- contents: [{
2623
+ contents: [
2624
+ {
1448
2625
  uri: uri.href,
1449
2626
  mimeType: "application/json",
1450
- text: JSON.stringify(health, null, 2)
1451
- }]
2627
+ text: JSON.stringify(health, null, 2),
2628
+ },
2629
+ ],
1452
2630
  };
1453
2631
  });
1454
2632
  logger.info("Process health resource registered");
@@ -1468,6 +2646,8 @@ async function shutdown(signal) {
1468
2646
  await db.disconnect();
1469
2647
  logger.info("Database connections closed");
1470
2648
  }
2649
+ flightRecorder.close();
2650
+ logger.info("Flight recorder closed");
1471
2651
  process.exit(0);
1472
2652
  }
1473
2653
  catch (error) {
@@ -1492,11 +2672,9 @@ async function main() {
1492
2672
  }
1493
2673
  // Guard: only auto-start when run directly (not imported for testing)
1494
2674
  // Resolve symlinks so `llm-cli-gateway` (npm-linked bin) matches import.meta.url
1495
- const __entryUrl = process.argv[1]
1496
- ? new URL(realpathSync(process.argv[1]), "file://").href
1497
- : "";
2675
+ const __entryUrl = process.argv[1] ? new URL(realpathSync(process.argv[1]), "file://").href : "";
1498
2676
  if (__entryUrl === import.meta.url) {
1499
- main().catch((error) => {
2677
+ main().catch(error => {
1500
2678
  logger.error("Fatal server error:", error);
1501
2679
  process.exit(1);
1502
2680
  });