llm-cli-gateway 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -11,15 +11,16 @@ import { parseStreamJson } from "./stream-json-parser.js";
11
11
  import { createSessionManager } from "./session-manager.js";
12
12
  import { ResourceProvider } from "./resources.js";
13
13
  import { PerformanceMetrics } from "./metrics.js";
14
- import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse as optimizeResponseText } from "./optimizer.js";
14
+ import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse as optimizeResponseText, } from "./optimizer.js";
15
15
  import { loadConfig } from "./config.js";
16
16
  import { checkHealth } from "./health.js";
17
17
  import { getCliInfo, resolveModelAlias } from "./model-registry.js";
18
18
  import { AsyncJobManager } from "./async-job-manager.js";
19
19
  import { ApprovalManager } from "./approval-manager.js";
20
20
  import { checkReviewIntegrity } from "./review-integrity.js";
21
- import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES } from "./claude-mcp-config.js";
22
- import { resolveSessionResumeArgs, GATEWAY_SESSION_PREFIX } from "./request-helpers.js";
21
+ import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES, } from "./claude-mcp-config.js";
22
+ import { resolveSessionResumeArgs, sanitizeCliArgValues, GATEWAY_SESSION_PREFIX, } from "./request-helpers.js";
23
+ import { createFlightRecorder } from "./flight-recorder.js";
23
24
  // Simple logger that writes to stderr (stdout is used for MCP protocol)
24
25
  const logger = {
25
26
  info: (message, ...args) => {
@@ -32,7 +33,7 @@ const logger = {
32
33
  if (process.env.DEBUG) {
33
34
  console.error(`[DEBUG] ${new Date().toISOString()} - ${message}`, ...args);
34
35
  }
35
- }
36
+ },
36
37
  };
37
38
  function logOptimizationTokens(kind, correlationId, original, optimized) {
38
39
  const originalTokens = estimateTokens(original);
@@ -106,6 +107,7 @@ let sessionManager;
106
107
  let db = null;
107
108
  const performanceMetrics = new PerformanceMetrics();
108
109
  let resourceProvider;
110
+ const flightRecorder = createFlightRecorder(logger);
109
111
  const asyncJobManager = new AsyncJobManager(logger, (cli, durationMs, success) => {
110
112
  performanceMetrics.recordRequest(cli, durationMs, success);
111
113
  });
@@ -147,7 +149,7 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat) {
147
149
  return {
148
150
  stdout: result.stdout,
149
151
  stderr: result.stderr,
150
- code: result.exitCode ?? 1
152
+ code: result.exitCode ?? 1,
151
153
  };
152
154
  }
153
155
  await new Promise(resolve => setTimeout(resolve, SYNC_POLL_INTERVAL_MS));
@@ -159,7 +161,7 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat) {
159
161
  jobId: job.id,
160
162
  cli,
161
163
  correlationId: corrId,
162
- message: `Execution exceeded sync deadline (${SYNC_DEADLINE_MS}ms). Poll with llm_job_status, fetch with llm_job_result.`
164
+ message: `Execution exceeded sync deadline (${SYNC_DEADLINE_MS}ms). Poll with llm_job_status, fetch with llm_job_result.`,
163
165
  };
164
166
  }
165
167
  function isDeferredResponse(result) {
@@ -167,7 +169,8 @@ function isDeferredResponse(result) {
167
169
  }
168
170
  function buildDeferredToolResponse(deferred, sessionId) {
169
171
  return {
170
- content: [{
172
+ content: [
173
+ {
171
174
  type: "text",
172
175
  text: JSON.stringify({
173
176
  status: "deferred",
@@ -178,9 +181,10 @@ function buildDeferredToolResponse(deferred, sessionId) {
178
181
  sessionId: sessionId || null,
179
182
  pollWith: "llm_job_status",
180
183
  fetchWith: "llm_job_result",
181
- cancelWith: "llm_job_cancel"
182
- }, null, 2)
183
- }]
184
+ cancelWith: "llm_job_cancel",
185
+ }, null, 2),
186
+ },
187
+ ],
184
188
  };
185
189
  }
186
190
  // Helper function for standardized error responses
@@ -211,20 +215,61 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
211
215
  }
212
216
  return {
213
217
  content: [{ type: "text", text: errorMessage }],
214
- isError: true
218
+ isError: true,
219
+ structuredContent: {
220
+ correlationId: correlationId || null,
221
+ cli,
222
+ exitCode: code,
223
+ errorCategory: code === 124
224
+ ? "timeout"
225
+ : code === 125
226
+ ? "idle_timeout"
227
+ : error
228
+ ? "spawn_error"
229
+ : "cli_error",
230
+ },
215
231
  };
216
232
  }
233
+ function extractUsageAndCost(cli, output, outputFormat) {
234
+ if (cli === "claude" && outputFormat === "stream-json") {
235
+ const parsed = parseStreamJson(output);
236
+ return {
237
+ inputTokens: parsed.usage?.inputTokens,
238
+ outputTokens: parsed.usage?.outputTokens,
239
+ costUsd: parsed.costUsd ?? undefined,
240
+ };
241
+ }
242
+ return {};
243
+ }
244
+ function safeFlightStart(entry) {
245
+ try {
246
+ flightRecorder.logStart(entry);
247
+ }
248
+ catch (error) {
249
+ logger.error("Flight recorder logStart failed", error);
250
+ }
251
+ }
252
+ function safeFlightComplete(correlationId, result) {
253
+ try {
254
+ flightRecorder.logComplete(correlationId, result);
255
+ }
256
+ catch (error) {
257
+ logger.error("Flight recorder logComplete failed", error);
258
+ }
259
+ }
217
260
  function createApprovalDeniedResponse(operation, decision) {
218
261
  return {
219
- content: [{
262
+ content: [
263
+ {
220
264
  type: "text",
221
265
  text: JSON.stringify({
222
266
  success: false,
223
267
  error: `${operation} denied by MCP-managed approval policy`,
224
- approval: decision
225
- }, null, 2)
226
- }],
227
- isError: true
268
+ approval: decision,
269
+ }, null, 2),
270
+ },
271
+ ],
272
+ isError: true,
228
273
  };
229
274
  }
230
275
  function normalizeMcpServers(mcpServers) {
@@ -235,7 +280,8 @@ function normalizeMcpServers(mcpServers) {
235
280
  }
236
281
  function createMcpConfigErrorResponse(operation, correlationId, requested, message, missing = []) {
237
282
  return {
238
- content: [{
283
+ content: [
284
+ {
239
285
  type: "text",
240
286
  text: JSON.stringify({
241
287
  success: false,
@@ -244,11 +290,12 @@ function createMcpConfigErrorResponse(operation, correlationId, requested, messa
244
290
  correlationId,
245
291
  mcpServers: {
246
292
  requested,
247
- missing
248
- }
249
- }, null, 2)
250
- }],
251
- isError: true
293
+ missing,
294
+ },
295
+ }, null, 2),
296
+ },
297
+ ],
298
+ isError: true,
252
299
  };
253
300
  }
254
301
  function resolveClaudeMcpConfig(operation, correlationId, requestedMcpServers, strictMcpConfig) {
@@ -260,13 +307,13 @@ function resolveClaudeMcpConfig(operation, correlationId, requestedMcpServers, s
260
307
  const message = error instanceof Error ? error.message : String(error);
261
308
  logger.error(`[${correlationId}] ${operation} failed to build Claude MCP config: ${message}`);
262
309
  return {
263
- errorResponse: createMcpConfigErrorResponse(operation, correlationId, requestedMcpServers, message)
310
+ errorResponse: createMcpConfigErrorResponse(operation, correlationId, requestedMcpServers, message),
264
311
  };
265
312
  }
266
313
  if (strictMcpConfig && mcpConfig.missing.length > 0) {
267
314
  const missing = mcpConfig.missing.join(", ");
268
315
  return {
269
- errorResponse: createMcpConfigErrorResponse(operation, correlationId, requestedMcpServers, `strictMcpConfig=true but requested servers are unavailable: ${missing}`, mcpConfig.missing)
316
+ errorResponse: createMcpConfigErrorResponse(operation, correlationId, requestedMcpServers, `strictMcpConfig=true but requested servers are unavailable: ${missing}`, mcpConfig.missing),
270
317
  };
271
318
  }
272
319
  return { config: mcpConfig };
@@ -279,13 +326,15 @@ for (const skill of loadedSkills) {
279
326
  server.registerResource(`skill-${skill.name}`, `skills://${skill.name}`, {
280
327
  title: skill.name,
281
328
  description: skill.description,
282
- mimeType: "text/markdown"
329
+ mimeType: "text/markdown",
283
330
  }, async () => ({
284
- contents: [{
331
+ contents: [
332
+ {
285
333
  uri: `skills://${skill.name}`,
286
334
  mimeType: "text/markdown",
287
- text: skill.content
288
- }]
335
+ text: skill.content,
336
+ },
337
+ ],
289
338
  }));
290
339
  }
291
340
  logger.info(`Registered ${loadedSkills.length} skill resources`);
@@ -293,7 +342,7 @@ logger.info(`Registered ${loadedSkills.length} skill resources`);
293
342
  server.registerResource("all-sessions", "sessions://all", {
294
343
  title: "📋 All Sessions",
295
344
  description: "All conversation sessions across CLIs",
296
- mimeType: "application/json"
345
+ mimeType: "application/json",
297
346
  }, async (uri) => {
298
347
  logger.debug("Reading all sessions resource");
299
348
  const contents = await resourceProvider.readResource(uri.href);
@@ -303,7 +352,7 @@ server.registerResource("all-sessions", "sessions://all", {
303
352
  server.registerResource("claude-sessions", "sessions://claude", {
304
353
  title: "🤖 Claude Sessions",
305
354
  description: "Claude conversation sessions",
306
- mimeType: "application/json"
355
+ mimeType: "application/json",
307
356
  }, async (uri) => {
308
357
  logger.debug("Reading Claude sessions resource");
309
358
  const contents = await resourceProvider.readResource(uri.href);
@@ -313,7 +362,7 @@ server.registerResource("claude-sessions", "sessions://claude", {
313
362
  server.registerResource("codex-sessions", "sessions://codex", {
314
363
  title: "💻 Codex Sessions",
315
364
  description: "Codex conversation sessions",
316
- mimeType: "application/json"
365
+ mimeType: "application/json",
317
366
  }, async (uri) => {
318
367
  logger.debug("Reading Codex sessions resource");
319
368
  const contents = await resourceProvider.readResource(uri.href);
@@ -323,7 +372,7 @@ server.registerResource("codex-sessions", "sessions://codex", {
323
372
  server.registerResource("gemini-sessions", "sessions://gemini", {
324
373
  title: "✨ Gemini Sessions",
325
374
  description: "Gemini conversation sessions",
326
- mimeType: "application/json"
375
+ mimeType: "application/json",
327
376
  }, async (uri) => {
328
377
  logger.debug("Reading Gemini sessions resource");
329
378
  const contents = await resourceProvider.readResource(uri.href);
@@ -333,7 +382,7 @@ server.registerResource("gemini-sessions", "sessions://gemini", {
333
382
  server.registerResource("claude-models", "models://claude", {
334
383
  title: "🧠 Claude Models",
335
384
  description: "Claude models and capabilities",
336
- mimeType: "application/json"
385
+ mimeType: "application/json",
337
386
  }, async (uri) => {
338
387
  logger.debug("Reading Claude models resource");
339
388
  const contents = await resourceProvider.readResource(uri.href);
@@ -343,7 +392,7 @@ server.registerResource("claude-models", "models://claude", {
343
392
  server.registerResource("codex-models", "models://codex", {
344
393
  title: "🔧 Codex Models",
345
394
  description: "Codex models and capabilities",
346
- mimeType: "application/json"
395
+ mimeType: "application/json",
347
396
  }, async (uri) => {
348
397
  logger.debug("Reading Codex models resource");
349
398
  const contents = await resourceProvider.readResource(uri.href);
@@ -353,7 +402,7 @@ server.registerResource("codex-models", "models://codex", {
353
402
  server.registerResource("gemini-models", "models://gemini", {
354
403
  title: "🌟 Gemini Models",
355
404
  description: "Gemini models and capabilities",
356
- mimeType: "application/json"
405
+ mimeType: "application/json",
357
406
  }, async (uri) => {
358
407
  logger.debug("Reading Gemini models resource");
359
408
  const contents = await resourceProvider.readResource(uri.href);
@@ -363,7 +412,7 @@ server.registerResource("gemini-models", "models://gemini", {
363
412
  server.registerResource("performance-metrics", "metrics://performance", {
364
413
  title: "📈 Performance Metrics",
365
414
  description: "Request counts, latency, success/failure rates",
366
- mimeType: "application/json"
415
+ mimeType: "application/json",
367
416
  }, async (uri) => {
368
417
  logger.debug("Reading performance metrics resource");
369
418
  const contents = await resourceProvider.readResource(uri.href);
@@ -374,10 +423,16 @@ function prepareClaudeRequest(params) {
374
423
  const cliInfo = getCliInfo();
375
424
  const resolvedModel = resolveModelAlias("claude", params.model, cliInfo);
376
425
  // Review integrity check on raw prompt (before optimization)
377
- const reviewIntegrity = checkReviewIntegrity({ prompt: params.prompt, allowedTools: params.allowedTools, disallowedTools: params.disallowedTools });
426
+ const reviewIntegrity = checkReviewIntegrity({
427
+ prompt: params.prompt,
428
+ allowedTools: params.allowedTools,
429
+ disallowedTools: params.disallowedTools,
430
+ });
378
431
  if (reviewIntegrity.violations.length > 0) {
379
432
  logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
380
- cli: "claude", operation: params.operation, score: reviewIntegrity.totalScore
433
+ cli: "claude",
434
+ operation: params.operation,
435
+ score: reviewIntegrity.totalScore,
381
436
  });
382
437
  }
383
438
  let effectivePrompt = params.prompt;
@@ -405,7 +460,7 @@ function prepareClaudeRequest(params) {
405
460
  disallowedTools: params.disallowedTools,
406
461
  policy: params.approvalPolicy,
407
462
  metadata: { model: resolvedModel || "default", strictMcpConfig: params.strictMcpConfig },
408
- reviewIntegrity
463
+ reviewIntegrity,
409
464
  });
410
465
  if (approvalDecision.status !== "approved") {
411
466
  return createApprovalDeniedResponse(params.operation, approvalDecision);
@@ -421,9 +476,11 @@ function prepareClaudeRequest(params) {
421
476
  args.push("--output-format", "stream-json", "--include-partial-messages");
422
477
  }
423
478
  if (params.allowedTools && params.allowedTools.length > 0) {
479
+ sanitizeCliArgValues(params.allowedTools, "allowedTools");
424
480
  args.push("--allowed-tools", ...params.allowedTools);
425
481
  }
426
482
  if (params.disallowedTools && params.disallowedTools.length > 0) {
483
+ sanitizeCliArgValues(params.disallowedTools, "disallowedTools");
427
484
  args.push("--disallowed-tools", ...params.disallowedTools);
428
485
  }
429
486
  if (params.approvalStrategy === "mcp_managed") {
@@ -438,7 +495,16 @@ function prepareClaudeRequest(params) {
438
495
  args.push("--strict-mcp-config");
439
496
  }
440
497
  }
441
- return { corrId, effectivePrompt, resolvedModel, requestedMcpServers, mcpConfig, approvalDecision, reviewIntegrity, args };
498
+ return {
499
+ corrId,
500
+ effectivePrompt,
501
+ resolvedModel,
502
+ requestedMcpServers,
503
+ mcpConfig,
504
+ approvalDecision,
505
+ reviewIntegrity,
506
+ args,
507
+ };
442
508
  }
443
509
  function prepareCodexRequest(params) {
444
510
  const corrId = params.correlationId || randomUUID();
@@ -448,7 +514,9 @@ function prepareCodexRequest(params) {
448
514
  const reviewIntegrity = checkReviewIntegrity({ prompt: params.prompt });
449
515
  if (reviewIntegrity.violations.length > 0) {
450
516
  logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
451
- cli: "codex", operation: params.operation, score: reviewIntegrity.totalScore
517
+ cli: "codex",
518
+ operation: params.operation,
519
+ score: reviewIntegrity.totalScore,
452
520
  });
453
521
  }
454
522
  let effectivePrompt = params.prompt;
@@ -469,7 +537,7 @@ function prepareCodexRequest(params) {
469
537
  requestedMcpServers,
470
538
  policy: params.approvalPolicy,
471
539
  metadata: { model: resolvedModel || "default" },
472
- reviewIntegrity
540
+ reviewIntegrity,
473
541
  });
474
542
  if (approvalDecision.status !== "approved") {
475
543
  return createApprovalDeniedResponse(params.operation, approvalDecision);
@@ -484,17 +552,30 @@ function prepareCodexRequest(params) {
484
552
  args.push("--dangerously-bypass-approvals-and-sandbox");
485
553
  }
486
554
  args.push("--skip-git-repo-check", effectivePrompt);
487
- return { corrId, effectivePrompt, resolvedModel, requestedMcpServers, approvalDecision, reviewIntegrity, args };
555
+ return {
556
+ corrId,
557
+ effectivePrompt,
558
+ resolvedModel,
559
+ requestedMcpServers,
560
+ approvalDecision,
561
+ reviewIntegrity,
562
+ args,
563
+ };
488
564
  }
489
565
  function prepareGeminiRequest(params) {
490
566
  const corrId = params.correlationId || randomUUID();
491
567
  const cliInfo = getCliInfo();
492
568
  const resolvedModel = resolveModelAlias("gemini", params.model, cliInfo);
493
569
  // Review integrity check on raw prompt (before optimization)
494
- const reviewIntegrity = checkReviewIntegrity({ prompt: params.prompt, allowedTools: params.allowedTools });
570
+ const reviewIntegrity = checkReviewIntegrity({
571
+ prompt: params.prompt,
572
+ allowedTools: params.allowedTools,
573
+ });
495
574
  if (reviewIntegrity.violations.length > 0) {
496
575
  logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
497
- cli: "gemini", operation: params.operation, score: reviewIntegrity.totalScore
576
+ cli: "gemini",
577
+ operation: params.operation,
578
+ score: reviewIntegrity.totalScore,
498
579
  });
499
580
  }
500
581
  let effectivePrompt = params.prompt;
@@ -516,7 +597,7 @@ function prepareGeminiRequest(params) {
516
597
  allowedTools: params.allowedTools,
517
598
  policy: params.approvalPolicy,
518
599
  metadata: { model: resolvedModel || "default" },
519
- reviewIntegrity
600
+ reviewIntegrity,
520
601
  });
521
602
  if (approvalDecision.status !== "approved") {
522
603
  return createApprovalDeniedResponse(params.operation, approvalDecision);
@@ -529,17 +610,28 @@ function prepareGeminiRequest(params) {
529
610
  if (effectiveApprovalMode)
530
611
  args.push("--approval-mode", effectiveApprovalMode);
531
612
  if (params.allowedTools && params.allowedTools.length > 0) {
613
+ sanitizeCliArgValues(params.allowedTools, "allowedTools");
532
614
  params.allowedTools.forEach(tool => args.push("--allowed-tools", tool));
533
615
  }
534
616
  if (requestedMcpServers.length > 0) {
617
+ sanitizeCliArgValues(requestedMcpServers, "mcpServers");
535
618
  requestedMcpServers.forEach(serverName => args.push("--allowed-mcp-server-names", serverName));
536
619
  }
537
620
  if (params.includeDirs && params.includeDirs.length > 0) {
621
+ sanitizeCliArgValues(params.includeDirs, "includeDirs");
538
622
  params.includeDirs.forEach(dir => args.push("--include-directories", dir));
539
623
  }
540
- return { corrId, effectivePrompt, resolvedModel, requestedMcpServers, approvalDecision, reviewIntegrity, args };
624
+ return {
625
+ corrId,
626
+ effectivePrompt,
627
+ resolvedModel,
628
+ requestedMcpServers,
629
+ approvalDecision,
630
+ reviewIntegrity,
631
+ args,
632
+ };
541
633
  }
542
- function buildCliResponse(stdout, optimizeResponse, corrId, sessionId, prep, resumable, outputFormat) {
634
+ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat) {
543
635
  let finalStdout = stdout;
544
636
  // Skip response optimization for JSON output to prevent corrupting structured data
545
637
  if (optimizeResponse && outputFormat !== "json") {
@@ -548,7 +640,9 @@ function buildCliResponse(stdout, optimizeResponse, corrId, sessionId, prep, res
548
640
  finalStdout = optimized;
549
641
  }
550
642
  // Append review integrity warnings to response text (skip for JSON output to avoid corruption)
551
- if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0 && outputFormat !== "json") {
643
+ if (prep.reviewIntegrity &&
644
+ prep.reviewIntegrity.violations.length > 0 &&
645
+ outputFormat !== "json") {
552
646
  const warnings = prep.reviewIntegrity.violations
553
647
  .map(v => `- [${v.type}] ${v.detail}`)
554
648
  .join("\n");
@@ -556,9 +650,23 @@ function buildCliResponse(stdout, optimizeResponse, corrId, sessionId, prep, res
556
650
  }
557
651
  const response = {
558
652
  content: [{ type: "text", text: finalStdout }],
653
+ structuredContent: {
654
+ model: prep.resolvedModel || "default",
655
+ cli,
656
+ correlationId: corrId,
657
+ sessionId: sessionId || null,
658
+ durationMs,
659
+ ...extractUsageAndCost(cli, stdout, outputFormat),
660
+ exitCode: 0,
661
+ retryCount: 0,
662
+ },
559
663
  mcpServers: prep.mcpConfig
560
- ? { requested: prep.requestedMcpServers, enabled: prep.mcpConfig.enabled, missing: prep.mcpConfig.missing }
561
- : { requested: prep.requestedMcpServers }
664
+ ? {
665
+ requested: prep.requestedMcpServers,
666
+ enabled: prep.mcpConfig.enabled,
667
+ missing: prep.mcpConfig.missing,
668
+ }
669
+ : { requested: prep.requestedMcpServers },
562
670
  };
563
671
  if (sessionId) {
564
672
  response.sessionId = sessionId;
@@ -577,22 +685,37 @@ function buildCliResponse(stdout, optimizeResponse, corrId, sessionId, prep, res
577
685
  export async function handleGeminiRequest(deps, params) {
578
686
  const startTime = Date.now();
579
687
  const prep = prepareGeminiRequest({
580
- prompt: params.prompt, model: params.model, approvalMode: params.approvalMode,
581
- approvalStrategy: params.approvalStrategy, approvalPolicy: params.approvalPolicy,
582
- allowedTools: params.allowedTools, includeDirs: params.includeDirs,
583
- mcpServers: params.mcpServers, correlationId: params.correlationId,
584
- optimizePrompt: params.optimizePrompt, operation: "gemini_request"
688
+ prompt: params.prompt,
689
+ model: params.model,
690
+ approvalMode: params.approvalMode,
691
+ approvalStrategy: params.approvalStrategy,
692
+ approvalPolicy: params.approvalPolicy,
693
+ allowedTools: params.allowedTools,
694
+ includeDirs: params.includeDirs,
695
+ mcpServers: params.mcpServers,
696
+ correlationId: params.correlationId,
697
+ optimizePrompt: params.optimizePrompt,
698
+ operation: "gemini_request",
585
699
  });
586
700
  if (!("args" in prep))
587
701
  return prep;
588
702
  const { corrId, args } = prep;
589
703
  let durationMs = 0;
590
704
  let wasSuccessful = false;
705
+ safeFlightStart({
706
+ correlationId: corrId,
707
+ cli: "gemini",
708
+ model: prep.resolvedModel || "default",
709
+ prompt: params.prompt,
710
+ sessionId: params.sessionId,
711
+ });
591
712
  deps.logger.info(`[${corrId}] gemini_request invoked with model=${prep.resolvedModel || "default"}, approvalMode=${params.approvalMode}, prompt length=${params.prompt.length}`);
592
713
  try {
593
714
  // Session arg planning (pure, no I/O)
594
715
  const sessionResult = resolveSessionResumeArgs({
595
- sessionId: params.sessionId, resumeLatest: params.resumeLatest, createNewSession: params.createNewSession
716
+ sessionId: params.sessionId,
717
+ resumeLatest: params.resumeLatest,
718
+ createNewSession: params.createNewSession,
596
719
  });
597
720
  args.push(...sessionResult.resumeArgs);
598
721
  const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs));
@@ -604,6 +727,16 @@ export async function handleGeminiRequest(deps, params) {
604
727
  durationMs = Math.max(0, Date.now() - startTime);
605
728
  if (code !== 0) {
606
729
  deps.logger.info(`[${corrId}] gemini_request failed in ${durationMs}ms`);
730
+ safeFlightComplete(corrId, {
731
+ response: stderr || "",
732
+ durationMs,
733
+ retryCount: 0,
734
+ circuitBreakerState: "closed",
735
+ optimizationApplied: false,
736
+ exitCode: code,
737
+ errorMessage: stderr || `Exit code ${code}`,
738
+ status: "failed",
739
+ });
607
740
  return createErrorResponse("gemini", code, stderr, corrId);
608
741
  }
609
742
  wasSuccessful = true;
@@ -628,11 +761,32 @@ export async function handleGeminiRequest(deps, params) {
628
761
  effectiveSessionId = newSession.id;
629
762
  }
630
763
  deps.logger.info(`[${corrId}] gemini_request completed successfully in ${durationMs}ms`);
631
- return buildCliResponse(stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, sessionResult.userProvidedSession);
764
+ const response = buildCliResponse("gemini", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession);
765
+ safeFlightComplete(corrId, {
766
+ response: stdout,
767
+ durationMs,
768
+ retryCount: 0,
769
+ circuitBreakerState: "closed",
770
+ approvalDecision: prep.approvalDecision?.status,
771
+ optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
772
+ exitCode: 0,
773
+ status: "completed",
774
+ });
775
+ return response;
632
776
  }
633
777
  catch (error) {
634
778
  const elapsedMs = Math.max(0, Date.now() - startTime);
635
779
  deps.logger.info(`[${corrId}] gemini_request threw exception after ${elapsedMs}ms`);
780
+ safeFlightComplete(corrId, {
781
+ response: "",
782
+ durationMs: elapsedMs,
783
+ retryCount: 0,
784
+ circuitBreakerState: "closed",
785
+ optimizationApplied: false,
786
+ exitCode: 1,
787
+ errorMessage: error.message,
788
+ status: "failed",
789
+ });
636
790
  return createErrorResponse("gemini", 1, "", corrId, error);
637
791
  }
638
792
  finally {
@@ -642,11 +796,17 @@ export async function handleGeminiRequest(deps, params) {
642
796
  }
643
797
  export async function handleGeminiRequestAsync(deps, params) {
644
798
  const prep = prepareGeminiRequest({
645
- prompt: params.prompt, model: params.model, approvalMode: params.approvalMode,
646
- approvalStrategy: params.approvalStrategy, approvalPolicy: params.approvalPolicy,
647
- allowedTools: params.allowedTools, includeDirs: params.includeDirs,
648
- mcpServers: params.mcpServers, correlationId: params.correlationId,
649
- optimizePrompt: params.optimizePrompt, operation: "gemini_request_async"
799
+ prompt: params.prompt,
800
+ model: params.model,
801
+ approvalMode: params.approvalMode,
802
+ approvalStrategy: params.approvalStrategy,
803
+ approvalPolicy: params.approvalPolicy,
804
+ allowedTools: params.allowedTools,
805
+ includeDirs: params.includeDirs,
806
+ mcpServers: params.mcpServers,
807
+ correlationId: params.correlationId,
808
+ optimizePrompt: params.optimizePrompt,
809
+ operation: "gemini_request_async",
650
810
  });
651
811
  if (!("args" in prep))
652
812
  return prep;
@@ -654,7 +814,9 @@ export async function handleGeminiRequestAsync(deps, params) {
654
814
  try {
655
815
  // Session arg planning (pure, no I/O)
656
816
  const sessionResult = resolveSessionResumeArgs({
657
- sessionId: params.sessionId, resumeLatest: params.resumeLatest, createNewSession: params.createNewSession
817
+ sessionId: params.sessionId,
818
+ resumeLatest: params.resumeLatest,
819
+ createNewSession: params.createNewSession,
658
820
  });
659
821
  args.push(...sessionResult.resumeArgs);
660
822
  // Pre-start session I/O (async handlers: prevent orphaned jobs)
@@ -686,16 +848,18 @@ export async function handleGeminiRequestAsync(deps, params) {
686
848
  sessionId: effectiveSessionId || null,
687
849
  resumable: sessionResult.userProvidedSession,
688
850
  approval: approvalDecision,
689
- mcpServers: { requested: requestedMcpServers }
851
+ mcpServers: { requested: requestedMcpServers },
690
852
  };
691
853
  if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
692
854
  asyncResponse.reviewIntegrity = prep.reviewIntegrity;
693
855
  }
694
856
  return {
695
- content: [{
857
+ content: [
858
+ {
696
859
  type: "text",
697
- text: JSON.stringify(asyncResponse, null, 2)
698
- }]
860
+ text: JSON.stringify(asyncResponse, null, 2),
861
+ },
862
+ ],
699
863
  };
700
864
  }
701
865
  catch (error) {
@@ -704,12 +868,16 @@ export async function handleGeminiRequestAsync(deps, params) {
704
868
  }
705
869
  export async function handleCodexRequestAsync(deps, params) {
706
870
  const prep = prepareCodexRequest({
707
- prompt: params.prompt, model: params.model, fullAuto: params.fullAuto,
871
+ prompt: params.prompt,
872
+ model: params.model,
873
+ fullAuto: params.fullAuto,
708
874
  dangerouslyBypassApprovalsAndSandbox: params.dangerouslyBypassApprovalsAndSandbox,
709
- approvalStrategy: params.approvalStrategy, approvalPolicy: params.approvalPolicy,
875
+ approvalStrategy: params.approvalStrategy,
876
+ approvalPolicy: params.approvalPolicy,
710
877
  mcpServers: params.mcpServers,
711
- correlationId: params.correlationId, optimizePrompt: params.optimizePrompt,
712
- operation: "codex_request_async"
878
+ correlationId: params.correlationId,
879
+ optimizePrompt: params.optimizePrompt,
880
+ operation: "codex_request_async",
713
881
  });
714
882
  if (!("args" in prep))
715
883
  return prep;
@@ -742,16 +910,18 @@ export async function handleCodexRequestAsync(deps, params) {
742
910
  job,
743
911
  sessionId: effectiveSessionId || null,
744
912
  approval: approvalDecision,
745
- mcpServers: { requested: requestedMcpServers }
913
+ mcpServers: { requested: requestedMcpServers },
746
914
  };
747
915
  if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
748
916
  asyncResponse.reviewIntegrity = prep.reviewIntegrity;
749
917
  }
750
918
  return {
751
- content: [{
919
+ content: [
920
+ {
752
921
  type: "text",
753
- text: JSON.stringify(asyncResponse, null, 2)
754
- }]
922
+ text: JSON.stringify(asyncResponse, null, 2),
923
+ },
924
+ ],
755
925
  };
756
926
  }
757
927
  catch (error) {
@@ -762,35 +932,86 @@ export async function handleCodexRequestAsync(deps, params) {
762
932
  // Claude Code Tool
763
933
  //──────────────────────────────────────────────────────────────────────────────
764
934
  server.tool("claude_request", {
765
- prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Claude"),
766
- model: z.string().optional().describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
767
- outputFormat: z.enum(["text", "json", "stream-json"]).default("text").describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
935
+ prompt: z
936
+ .string()
937
+ .min(1, "Prompt cannot be empty")
938
+ .max(100000, "Prompt too long (max 100k chars)")
939
+ .describe("Prompt text for Claude"),
940
+ model: z
941
+ .string()
942
+ .optional()
943
+ .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
944
+ outputFormat: z
945
+ .enum(["text", "json", "stream-json"])
946
+ .default("text")
947
+ .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
768
948
  sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
769
949
  continueSession: z.boolean().default(false).describe("Continue active session"),
770
950
  createNewSession: z.boolean().default(false).describe("Force new session"),
771
- allowedTools: z.array(z.string()).optional().describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
951
+ allowedTools: z
952
+ .array(z.string())
953
+ .optional()
954
+ .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
772
955
  disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
773
- dangerouslySkipPermissions: z.boolean().default(false).describe("Bypass permissions (sandbox only)"),
774
- approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
775
- approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
776
- mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP servers exposed to Claude"),
777
- strictMcpConfig: z.boolean().default(false).describe("Restrict Claude to provided MCP config only"),
956
+ dangerouslySkipPermissions: z
957
+ .boolean()
958
+ .default(false)
959
+ .describe("Bypass permissions (sandbox only)"),
960
+ approvalStrategy: z
961
+ .enum(["legacy", "mcp_managed"])
962
+ .default("legacy")
963
+ .describe("Approval strategy"),
964
+ approvalPolicy: z
965
+ .enum(["strict", "balanced", "permissive"])
966
+ .optional()
967
+ .describe("Approval policy override"),
968
+ mcpServers: z
969
+ .array(MCP_SERVER_ENUM)
970
+ .default(["sqry"])
971
+ .describe("MCP servers exposed to Claude"),
972
+ strictMcpConfig: z
973
+ .boolean()
974
+ .default(false)
975
+ .describe("Restrict Claude to provided MCP config only"),
778
976
  correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
779
977
  optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
780
978
  optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
781
- idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
782
- }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs }) => {
979
+ idleTimeoutMs: z
980
+ .number()
981
+ .int()
982
+ .min(30_000)
983
+ .max(3_600_000)
984
+ .optional()
985
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
986
+ }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, }) => {
783
987
  const startTime = Date.now();
784
988
  const prep = prepareClaudeRequest({
785
- prompt, model, outputFormat, allowedTools, disallowedTools, dangerouslySkipPermissions,
786
- approvalStrategy, approvalPolicy, mcpServers,
787
- strictMcpConfig, correlationId, optimizePrompt, operation: "claude_request"
989
+ prompt,
990
+ model,
991
+ outputFormat,
992
+ allowedTools,
993
+ disallowedTools,
994
+ dangerouslySkipPermissions,
995
+ approvalStrategy,
996
+ approvalPolicy,
997
+ mcpServers,
998
+ strictMcpConfig,
999
+ correlationId,
1000
+ optimizePrompt,
1001
+ operation: "claude_request",
788
1002
  });
789
1003
  if (!("args" in prep))
790
1004
  return prep;
791
1005
  const { corrId, args } = prep;
792
1006
  let durationMs = 0;
793
1007
  let wasSuccessful = false;
1008
+ safeFlightStart({
1009
+ correlationId: corrId,
1010
+ cli: "claude",
1011
+ model: prep.resolvedModel || "default",
1012
+ prompt,
1013
+ sessionId,
1014
+ });
794
1015
  logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prompt.length}, sessionId=${sessionId}`);
795
1016
  try {
796
1017
  // Session management
@@ -812,9 +1033,7 @@ server.tool("claude_request", {
812
1033
  await sessionManager.updateSessionUsage(effectiveSessionId);
813
1034
  }
814
1035
  // Idle timeout only for stream-json (text/json produce no output until done)
815
- const effectiveIdleTimeout = outputFormat === "stream-json"
816
- ? resolveIdleTimeout("claude", idleTimeoutMs)
817
- : undefined;
1036
+ const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
818
1037
  const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat);
819
1038
  // Deferred — job still running, return async reference
820
1039
  if (isDeferredResponse(result)) {
@@ -824,6 +1043,16 @@ server.tool("claude_request", {
824
1043
  durationMs = Math.max(0, Date.now() - startTime);
825
1044
  if (code !== 0) {
826
1045
  logger.info(`[${corrId}] claude_request failed in ${durationMs}ms`);
1046
+ safeFlightComplete(corrId, {
1047
+ response: stderr || "",
1048
+ durationMs,
1049
+ retryCount: 0,
1050
+ circuitBreakerState: "closed",
1051
+ optimizationApplied: optimizePrompt || optimizeResponse,
1052
+ exitCode: code,
1053
+ errorMessage: stderr || `Exit code ${code}`,
1054
+ status: "failed",
1055
+ });
827
1056
  return createErrorResponse("claude", code, stderr, corrId);
828
1057
  }
829
1058
  wasSuccessful = true;
@@ -841,13 +1070,44 @@ server.tool("claude_request", {
841
1070
  if (parsed.costUsd !== null) {
842
1071
  logger.debug(`[${corrId}] stream-json cost=$${parsed.costUsd}, model=${parsed.model}, turns=${parsed.numTurns}`);
843
1072
  }
844
- return buildCliResponse(parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, undefined, outputFormat);
1073
+ safeFlightComplete(corrId, {
1074
+ response: parsed.text,
1075
+ inputTokens: parsed.usage?.inputTokens,
1076
+ outputTokens: parsed.usage?.outputTokens,
1077
+ durationMs,
1078
+ retryCount: 0,
1079
+ circuitBreakerState: "closed",
1080
+ costUsd: parsed.costUsd ?? undefined,
1081
+ optimizationApplied: optimizePrompt || optimizeResponse,
1082
+ exitCode: 0,
1083
+ status: "completed",
1084
+ });
1085
+ return buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
845
1086
  }
846
- return buildCliResponse(stdout, optimizeResponse, corrId, effectiveSessionId, prep, undefined, outputFormat);
1087
+ safeFlightComplete(corrId, {
1088
+ response: stdout,
1089
+ durationMs,
1090
+ retryCount: 0,
1091
+ circuitBreakerState: "closed",
1092
+ optimizationApplied: optimizePrompt || optimizeResponse,
1093
+ exitCode: 0,
1094
+ status: "completed",
1095
+ });
1096
+ return buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
847
1097
  }
848
1098
  catch (error) {
849
1099
  const elapsedMs = Math.max(0, Date.now() - startTime);
850
1100
  logger.info(`[${corrId}] claude_request threw exception after ${elapsedMs}ms`);
1101
+ safeFlightComplete(corrId, {
1102
+ response: "",
1103
+ durationMs: elapsedMs,
1104
+ retryCount: 0,
1105
+ circuitBreakerState: "closed",
1106
+ optimizationApplied: optimizePrompt || optimizeResponse,
1107
+ exitCode: 1,
1108
+ errorMessage: error.message,
1109
+ status: "failed",
1110
+ });
851
1111
  return createErrorResponse("claude", 1, "", corrId, error);
852
1112
  }
853
1113
  finally {
@@ -859,31 +1119,67 @@ server.tool("claude_request", {
859
1119
  // Codex Tool
860
1120
  //──────────────────────────────────────────────────────────────────────────────
861
1121
  server.tool("codex_request", {
862
- prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Codex"),
1122
+ prompt: z
1123
+ .string()
1124
+ .min(1, "Prompt cannot be empty")
1125
+ .max(100000, "Prompt too long (max 100k chars)")
1126
+ .describe("Prompt text for Codex"),
863
1127
  model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
864
1128
  fullAuto: z.boolean().default(false).describe("Full-auto mode (sandboxed execution)"),
865
- dangerouslyBypassApprovalsAndSandbox: z.boolean().default(false).describe("Run Codex without approvals/sandbox"),
866
- approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
867
- approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
868
- mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
1129
+ dangerouslyBypassApprovalsAndSandbox: z
1130
+ .boolean()
1131
+ .default(false)
1132
+ .describe("Run Codex without approvals/sandbox"),
1133
+ approvalStrategy: z
1134
+ .enum(["legacy", "mcp_managed"])
1135
+ .default("legacy")
1136
+ .describe("Approval strategy"),
1137
+ approvalPolicy: z
1138
+ .enum(["strict", "balanced", "permissive"])
1139
+ .optional()
1140
+ .describe("Approval policy override"),
1141
+ mcpServers: z
1142
+ .array(MCP_SERVER_ENUM)
1143
+ .default(["sqry"])
1144
+ .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
869
1145
  sessionId: z.string().optional().describe("Session ID (Codex manages internally)"),
870
1146
  createNewSession: z.boolean().default(false).describe("Force new session"),
871
1147
  correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
872
1148
  optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
873
1149
  optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
874
- idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
875
- }, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs }) => {
1150
+ idleTimeoutMs: z
1151
+ .number()
1152
+ .int()
1153
+ .min(30_000)
1154
+ .max(3_600_000)
1155
+ .optional()
1156
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1157
+ }, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, }) => {
876
1158
  const startTime = Date.now();
877
1159
  const prep = prepareCodexRequest({
878
- prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox,
879
- approvalStrategy, approvalPolicy, mcpServers,
880
- correlationId, optimizePrompt, operation: "codex_request"
1160
+ prompt,
1161
+ model,
1162
+ fullAuto,
1163
+ dangerouslyBypassApprovalsAndSandbox,
1164
+ approvalStrategy,
1165
+ approvalPolicy,
1166
+ mcpServers,
1167
+ correlationId,
1168
+ optimizePrompt,
1169
+ operation: "codex_request",
881
1170
  });
882
1171
  if (!("args" in prep))
883
1172
  return prep;
884
1173
  const { corrId, args } = prep;
885
1174
  let durationMs = 0;
886
1175
  let wasSuccessful = false;
1176
+ safeFlightStart({
1177
+ correlationId: corrId,
1178
+ cli: "codex",
1179
+ model: prep.resolvedModel || "default",
1180
+ prompt,
1181
+ sessionId,
1182
+ });
887
1183
  logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prompt.length}`);
888
1184
  try {
889
1185
  const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs));
@@ -895,6 +1191,16 @@ server.tool("codex_request", {
895
1191
  durationMs = Math.max(0, Date.now() - startTime);
896
1192
  if (code !== 0) {
897
1193
  logger.info(`[${corrId}] codex_request failed in ${durationMs}ms`);
1194
+ safeFlightComplete(corrId, {
1195
+ response: stderr || "",
1196
+ durationMs,
1197
+ retryCount: 0,
1198
+ circuitBreakerState: "closed",
1199
+ optimizationApplied: optimizePrompt || optimizeResponse,
1200
+ exitCode: code,
1201
+ errorMessage: stderr || `Exit code ${code}`,
1202
+ status: "failed",
1203
+ });
898
1204
  return createErrorResponse("codex", code, stderr, corrId);
899
1205
  }
900
1206
  wasSuccessful = true;
@@ -918,11 +1224,30 @@ server.tool("codex_request", {
918
1224
  effectiveSessionId = newSession.id;
919
1225
  }
920
1226
  logger.info(`[${corrId}] codex_request completed successfully in ${durationMs}ms`);
921
- return buildCliResponse(stdout, optimizeResponse, corrId, effectiveSessionId, prep);
1227
+ safeFlightComplete(corrId, {
1228
+ response: stdout,
1229
+ durationMs,
1230
+ retryCount: 0,
1231
+ circuitBreakerState: "closed",
1232
+ optimizationApplied: optimizePrompt || optimizeResponse,
1233
+ exitCode: 0,
1234
+ status: "completed",
1235
+ });
1236
+ return buildCliResponse("codex", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs);
922
1237
  }
923
1238
  catch (error) {
924
1239
  const elapsedMs = Math.max(0, Date.now() - startTime);
925
1240
  logger.info(`[${corrId}] codex_request threw exception after ${elapsedMs}ms`);
1241
+ safeFlightComplete(corrId, {
1242
+ response: "",
1243
+ durationMs: elapsedMs,
1244
+ retryCount: 0,
1245
+ circuitBreakerState: "closed",
1246
+ optimizationApplied: optimizePrompt || optimizeResponse,
1247
+ exitCode: 1,
1248
+ errorMessage: error.message,
1249
+ status: "failed",
1250
+ });
926
1251
  return createErrorResponse("codex", 1, "", corrId, error);
927
1252
  }
928
1253
  finally {
@@ -934,49 +1259,137 @@ server.tool("codex_request", {
934
1259
  // Gemini Tool
935
1260
  //──────────────────────────────────────────────────────────────────────────────
936
1261
  server.tool("gemini_request", {
937
- prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Gemini"),
938
- model: z.string().optional().describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
1262
+ prompt: z
1263
+ .string()
1264
+ .min(1, "Prompt cannot be empty")
1265
+ .max(100000, "Prompt too long (max 100k chars)")
1266
+ .describe("Prompt text for Gemini"),
1267
+ model: z
1268
+ .string()
1269
+ .optional()
1270
+ .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
939
1271
  sessionId: z.string().optional().describe("Session ID or 'latest'"),
940
1272
  resumeLatest: z.boolean().default(false).describe("Resume latest session"),
941
1273
  createNewSession: z.boolean().default(false).describe("Force new session"),
942
- approvalMode: z.enum(["default", "auto_edit", "yolo"]).optional().describe("Approval: default|auto_edit|yolo"),
943
- approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
944
- approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
945
- mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
946
- allowedTools: z.array(z.string()).optional().describe("Allowed tools (['Write','Edit','Bash'])"),
1274
+ approvalMode: z
1275
+ .enum(["default", "auto_edit", "yolo"])
1276
+ .optional()
1277
+ .describe("Approval: default|auto_edit|yolo"),
1278
+ approvalStrategy: z
1279
+ .enum(["legacy", "mcp_managed"])
1280
+ .default("legacy")
1281
+ .describe("Approval strategy"),
1282
+ approvalPolicy: z
1283
+ .enum(["strict", "balanced", "permissive"])
1284
+ .optional()
1285
+ .describe("Approval policy override"),
1286
+ mcpServers: z
1287
+ .array(MCP_SERVER_ENUM)
1288
+ .default(["sqry"])
1289
+ .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
1290
+ allowedTools: z
1291
+ .array(z.string())
1292
+ .optional()
1293
+ .describe("Allowed tools (['Write','Edit','Bash'])"),
947
1294
  includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
948
1295
  correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
949
1296
  optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
950
1297
  optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
951
- idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
952
- }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs }) => {
953
- return handleGeminiRequest({ sessionManager, logger }, { prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs });
1298
+ idleTimeoutMs: z
1299
+ .number()
1300
+ .int()
1301
+ .min(30_000)
1302
+ .max(3_600_000)
1303
+ .optional()
1304
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1305
+ }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, }) => {
1306
+ return handleGeminiRequest({ sessionManager, logger }, {
1307
+ prompt,
1308
+ model,
1309
+ sessionId,
1310
+ resumeLatest,
1311
+ createNewSession,
1312
+ approvalMode,
1313
+ approvalStrategy,
1314
+ approvalPolicy,
1315
+ mcpServers,
1316
+ allowedTools,
1317
+ includeDirs,
1318
+ correlationId,
1319
+ optimizePrompt,
1320
+ optimizeResponse,
1321
+ idleTimeoutMs,
1322
+ });
954
1323
  });
955
1324
  //──────────────────────────────────────────────────────────────────────────────
956
1325
  // Async Long-Running Job Tools (No Time-Bound LLM Execution)
957
1326
  //──────────────────────────────────────────────────────────────────────────────
958
1327
  server.tool("claude_request_async", {
959
- prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Claude"),
960
- model: z.string().optional().describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
961
- outputFormat: z.enum(["text", "json", "stream-json"]).default("text").describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
1328
+ prompt: z
1329
+ .string()
1330
+ .min(1, "Prompt cannot be empty")
1331
+ .max(100000, "Prompt too long (max 100k chars)")
1332
+ .describe("Prompt text for Claude"),
1333
+ model: z
1334
+ .string()
1335
+ .optional()
1336
+ .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
1337
+ outputFormat: z
1338
+ .enum(["text", "json", "stream-json"])
1339
+ .default("text")
1340
+ .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
962
1341
  sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
963
1342
  continueSession: z.boolean().default(false).describe("Continue active session"),
964
1343
  createNewSession: z.boolean().default(false).describe("Force new session"),
965
- allowedTools: z.array(z.string()).optional().describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
1344
+ allowedTools: z
1345
+ .array(z.string())
1346
+ .optional()
1347
+ .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
966
1348
  disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
967
- dangerouslySkipPermissions: z.boolean().default(false).describe("Bypass permissions (sandbox only)"),
968
- approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
969
- approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
970
- mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP servers exposed to Claude"),
971
- strictMcpConfig: z.boolean().default(false).describe("Restrict Claude to provided MCP config only"),
1349
+ dangerouslySkipPermissions: z
1350
+ .boolean()
1351
+ .default(false)
1352
+ .describe("Bypass permissions (sandbox only)"),
1353
+ approvalStrategy: z
1354
+ .enum(["legacy", "mcp_managed"])
1355
+ .default("legacy")
1356
+ .describe("Approval strategy"),
1357
+ approvalPolicy: z
1358
+ .enum(["strict", "balanced", "permissive"])
1359
+ .optional()
1360
+ .describe("Approval policy override"),
1361
+ mcpServers: z
1362
+ .array(MCP_SERVER_ENUM)
1363
+ .default(["sqry"])
1364
+ .describe("MCP servers exposed to Claude"),
1365
+ strictMcpConfig: z
1366
+ .boolean()
1367
+ .default(false)
1368
+ .describe("Restrict Claude to provided MCP config only"),
972
1369
  correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
973
1370
  optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
974
- idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
975
- }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs }) => {
1371
+ idleTimeoutMs: z
1372
+ .number()
1373
+ .int()
1374
+ .min(30_000)
1375
+ .max(3_600_000)
1376
+ .optional()
1377
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1378
+ }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, }) => {
976
1379
  const prep = prepareClaudeRequest({
977
- prompt, model, outputFormat, allowedTools, disallowedTools, dangerouslySkipPermissions,
978
- approvalStrategy, approvalPolicy, mcpServers,
979
- strictMcpConfig, correlationId, optimizePrompt, operation: "claude_request_async"
1380
+ prompt,
1381
+ model,
1382
+ outputFormat,
1383
+ allowedTools,
1384
+ disallowedTools,
1385
+ dangerouslySkipPermissions,
1386
+ approvalStrategy,
1387
+ approvalPolicy,
1388
+ mcpServers,
1389
+ strictMcpConfig,
1390
+ correlationId,
1391
+ optimizePrompt,
1392
+ operation: "claude_request_async",
980
1393
  });
981
1394
  if (!("args" in prep))
982
1395
  return prep;
@@ -1007,9 +1420,7 @@ server.tool("claude_request_async", {
1007
1420
  }
1008
1421
  }
1009
1422
  // Idle timeout only for stream-json (text/json produce no output until done)
1010
- const effectiveIdleTimeout = outputFormat === "stream-json"
1011
- ? resolveIdleTimeout("claude", idleTimeoutMs)
1012
- : undefined;
1423
+ const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
1013
1424
  const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat);
1014
1425
  logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
1015
1426
  const asyncResponse = {
@@ -1020,17 +1431,19 @@ server.tool("claude_request_async", {
1020
1431
  mcpServers: {
1021
1432
  requested: requestedMcpServers,
1022
1433
  enabled: mcpConfig?.enabled,
1023
- missing: mcpConfig?.missing
1024
- }
1434
+ missing: mcpConfig?.missing,
1435
+ },
1025
1436
  };
1026
1437
  if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
1027
1438
  asyncResponse.reviewIntegrity = prep.reviewIntegrity;
1028
1439
  }
1029
1440
  return {
1030
- content: [{
1441
+ content: [
1442
+ {
1031
1443
  type: "text",
1032
- text: JSON.stringify(asyncResponse, null, 2)
1033
- }]
1444
+ text: JSON.stringify(asyncResponse, null, 2),
1445
+ },
1446
+ ],
1034
1447
  };
1035
1448
  }
1036
1449
  catch (error) {
@@ -1038,82 +1451,172 @@ server.tool("claude_request_async", {
1038
1451
  }
1039
1452
  });
1040
1453
  server.tool("codex_request_async", {
1041
- prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Codex"),
1454
+ prompt: z
1455
+ .string()
1456
+ .min(1, "Prompt cannot be empty")
1457
+ .max(100000, "Prompt too long (max 100k chars)")
1458
+ .describe("Prompt text for Codex"),
1042
1459
  model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
1043
1460
  fullAuto: z.boolean().default(false).describe("Full-auto mode (sandboxed execution)"),
1044
- dangerouslyBypassApprovalsAndSandbox: z.boolean().default(false).describe("Run Codex without approvals/sandbox"),
1045
- approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
1046
- approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
1047
- mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
1461
+ dangerouslyBypassApprovalsAndSandbox: z
1462
+ .boolean()
1463
+ .default(false)
1464
+ .describe("Run Codex without approvals/sandbox"),
1465
+ approvalStrategy: z
1466
+ .enum(["legacy", "mcp_managed"])
1467
+ .default("legacy")
1468
+ .describe("Approval strategy"),
1469
+ approvalPolicy: z
1470
+ .enum(["strict", "balanced", "permissive"])
1471
+ .optional()
1472
+ .describe("Approval policy override"),
1473
+ mcpServers: z
1474
+ .array(MCP_SERVER_ENUM)
1475
+ .default(["sqry"])
1476
+ .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
1048
1477
  sessionId: z.string().optional().describe("Session ID (Codex manages internally)"),
1049
1478
  createNewSession: z.boolean().default(false).describe("Force new session"),
1050
1479
  correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1051
1480
  optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1052
- idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
1053
- }, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, createNewSession, correlationId, optimizePrompt, idleTimeoutMs }) => {
1054
- return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger }, { prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, createNewSession, correlationId, optimizePrompt, idleTimeoutMs });
1481
+ idleTimeoutMs: z
1482
+ .number()
1483
+ .int()
1484
+ .min(30_000)
1485
+ .max(3_600_000)
1486
+ .optional()
1487
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1488
+ }, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, }) => {
1489
+ return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger }, {
1490
+ prompt,
1491
+ model,
1492
+ fullAuto,
1493
+ dangerouslyBypassApprovalsAndSandbox,
1494
+ approvalStrategy,
1495
+ approvalPolicy,
1496
+ mcpServers,
1497
+ sessionId,
1498
+ createNewSession,
1499
+ correlationId,
1500
+ optimizePrompt,
1501
+ idleTimeoutMs,
1502
+ });
1055
1503
  });
1056
1504
  server.tool("gemini_request_async", {
1057
- prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Gemini"),
1058
- model: z.string().optional().describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
1505
+ prompt: z
1506
+ .string()
1507
+ .min(1, "Prompt cannot be empty")
1508
+ .max(100000, "Prompt too long (max 100k chars)")
1509
+ .describe("Prompt text for Gemini"),
1510
+ model: z
1511
+ .string()
1512
+ .optional()
1513
+ .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
1059
1514
  sessionId: z.string().optional().describe("Session ID (user-provided CLI handle for --resume)"),
1060
1515
  resumeLatest: z.boolean().default(false).describe("Resume latest session"),
1061
1516
  createNewSession: z.boolean().default(false).describe("Force new session"),
1062
- approvalMode: z.enum(["default", "auto_edit", "yolo"]).optional().describe("Approval: default|auto_edit|yolo"),
1063
- approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
1064
- approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
1065
- mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
1066
- allowedTools: z.array(z.string()).optional().describe("Allowed tools (['Write','Edit','Bash'])"),
1517
+ approvalMode: z
1518
+ .enum(["default", "auto_edit", "yolo"])
1519
+ .optional()
1520
+ .describe("Approval: default|auto_edit|yolo"),
1521
+ approvalStrategy: z
1522
+ .enum(["legacy", "mcp_managed"])
1523
+ .default("legacy")
1524
+ .describe("Approval strategy"),
1525
+ approvalPolicy: z
1526
+ .enum(["strict", "balanced", "permissive"])
1527
+ .optional()
1528
+ .describe("Approval policy override"),
1529
+ mcpServers: z
1530
+ .array(MCP_SERVER_ENUM)
1531
+ .default(["sqry"])
1532
+ .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
1533
+ allowedTools: z
1534
+ .array(z.string())
1535
+ .optional()
1536
+ .describe("Allowed tools (['Write','Edit','Bash'])"),
1067
1537
  includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
1068
1538
  correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1069
1539
  optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1070
- idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
1071
- }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs }) => {
1072
- return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger }, { prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs });
1540
+ idleTimeoutMs: z
1541
+ .number()
1542
+ .int()
1543
+ .min(30_000)
1544
+ .max(3_600_000)
1545
+ .optional()
1546
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
1547
+ }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, }) => {
1548
+ return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger }, {
1549
+ prompt,
1550
+ model,
1551
+ sessionId,
1552
+ resumeLatest,
1553
+ createNewSession,
1554
+ approvalMode,
1555
+ approvalStrategy,
1556
+ approvalPolicy,
1557
+ mcpServers,
1558
+ allowedTools,
1559
+ includeDirs,
1560
+ correlationId,
1561
+ optimizePrompt,
1562
+ idleTimeoutMs,
1563
+ });
1073
1564
  });
1074
1565
  server.tool("llm_job_status", {
1075
- jobId: z.string().describe("Async job ID from *_request_async")
1566
+ jobId: z.string().describe("Async job ID from *_request_async"),
1076
1567
  }, async ({ jobId }) => {
1077
1568
  const job = asyncJobManager.getJobSnapshot(jobId);
1078
1569
  if (!job) {
1079
1570
  return {
1080
- content: [{
1571
+ content: [
1572
+ {
1081
1573
  type: "text",
1082
1574
  text: JSON.stringify({
1083
1575
  success: false,
1084
1576
  error: "Job not found",
1085
- jobId
1086
- }, null, 2)
1087
- }],
1088
- isError: true
1577
+ jobId,
1578
+ }, null, 2),
1579
+ },
1580
+ ],
1581
+ isError: true,
1089
1582
  };
1090
1583
  }
1091
1584
  return {
1092
- content: [{
1585
+ content: [
1586
+ {
1093
1587
  type: "text",
1094
1588
  text: JSON.stringify({
1095
1589
  success: true,
1096
- job
1097
- }, null, 2)
1098
- }]
1590
+ job,
1591
+ }, null, 2),
1592
+ },
1593
+ ],
1099
1594
  };
1100
1595
  });
1101
1596
  server.tool("llm_job_result", {
1102
1597
  jobId: z.string().describe("Async job ID from *_request_async"),
1103
- maxChars: z.number().int().min(1000).max(2000000).default(200000).describe("Max chars returned per stream")
1598
+ maxChars: z
1599
+ .number()
1600
+ .int()
1601
+ .min(1000)
1602
+ .max(2000000)
1603
+ .default(200000)
1604
+ .describe("Max chars returned per stream"),
1104
1605
  }, async ({ jobId, maxChars }) => {
1105
1606
  const result = asyncJobManager.getJobResult(jobId, maxChars);
1106
1607
  if (!result) {
1107
1608
  return {
1108
- content: [{
1609
+ content: [
1610
+ {
1109
1611
  type: "text",
1110
1612
  text: JSON.stringify({
1111
1613
  success: false,
1112
1614
  error: "Job not found",
1113
- jobId
1114
- }, null, 2)
1115
- }],
1116
- isError: true
1615
+ jobId,
1616
+ }, null, 2),
1617
+ },
1618
+ ],
1619
+ isError: true,
1117
1620
  };
1118
1621
  }
1119
1622
  // Parse stream-json output for Claude async jobs
@@ -1123,50 +1626,68 @@ server.tool("llm_job_result", {
1123
1626
  parsed = parseStreamJson(result.stdout);
1124
1627
  }
1125
1628
  return {
1126
- content: [{
1629
+ content: [
1630
+ {
1127
1631
  type: "text",
1128
1632
  text: JSON.stringify({
1129
1633
  success: true,
1130
1634
  result,
1131
- ...(parsed ? { parsed: { text: parsed.text, costUsd: parsed.costUsd, usage: parsed.usage, model: parsed.model, numTurns: parsed.numTurns } } : {})
1132
- }, null, 2)
1133
- }]
1635
+ ...(parsed
1636
+ ? {
1637
+ parsed: {
1638
+ text: parsed.text,
1639
+ costUsd: parsed.costUsd,
1640
+ usage: parsed.usage,
1641
+ model: parsed.model,
1642
+ numTurns: parsed.numTurns,
1643
+ },
1644
+ }
1645
+ : {}),
1646
+ }, null, 2),
1647
+ },
1648
+ ],
1134
1649
  };
1135
1650
  });
1136
1651
  server.tool("llm_job_cancel", {
1137
- jobId: z.string().describe("Async job ID from *_request_async")
1652
+ jobId: z.string().describe("Async job ID from *_request_async"),
1138
1653
  }, async ({ jobId }) => {
1139
1654
  const cancel = asyncJobManager.cancelJob(jobId);
1140
1655
  if (!cancel.canceled) {
1141
1656
  return {
1142
- content: [{
1657
+ content: [
1658
+ {
1143
1659
  type: "text",
1144
1660
  text: JSON.stringify({
1145
1661
  success: false,
1146
1662
  jobId,
1147
- reason: cancel.reason || "Unable to cancel"
1148
- }, null, 2)
1149
- }],
1150
- isError: true
1663
+ reason: cancel.reason || "Unable to cancel",
1664
+ }, null, 2),
1665
+ },
1666
+ ],
1667
+ isError: true,
1151
1668
  };
1152
1669
  }
1153
1670
  return {
1154
- content: [{
1671
+ content: [
1672
+ {
1155
1673
  type: "text",
1156
1674
  text: JSON.stringify({
1157
1675
  success: true,
1158
- jobId
1159
- }, null, 2)
1160
- }]
1676
+ jobId,
1677
+ }, null, 2),
1678
+ },
1679
+ ],
1161
1680
  };
1162
1681
  });
1163
1682
  server.tool("llm_process_health", {}, async () => {
1164
1683
  const health = asyncJobManager.getJobHealth();
1165
1684
  return {
1166
- content: [{
1685
+ content: [
1686
+ {
1167
1687
  type: "text",
1168
- text: JSON.stringify({ success: true, ...health }, null, 2)
1169
- }]
1688
+ text: JSON.stringify({ success: true, ...health }, null, 2),
1689
+ },
1690
+ ],
1170
1691
  };
1171
1692
  });
1172
1693
  //──────────────────────────────────────────────────────────────────────────────
@@ -1174,25 +1695,29 @@ server.tool("llm_process_health", {}, async () => {
1174
1695
  //──────────────────────────────────────────────────────────────────────────────
1175
1696
  server.tool("approval_list", {
1176
1697
  limit: z.number().int().min(1).max(500).default(50).describe("Max number of approval records"),
1177
- cli: z.enum(["claude", "codex", "gemini"]).optional().describe("Optional CLI filter")
1698
+ cli: z.enum(["claude", "codex", "gemini"]).optional().describe("Optional CLI filter"),
1178
1699
  }, async ({ limit, cli }) => {
1179
1700
  const approvals = approvalManager.list(limit, cli);
1180
1701
  return {
1181
- content: [{
1702
+ content: [
1703
+ {
1182
1704
  type: "text",
1183
1705
  text: JSON.stringify({
1184
1706
  success: true,
1185
1707
  count: approvals.length,
1186
- approvals
1187
- }, null, 2)
1188
- }]
1708
+ approvals,
1709
+ }, null, 2),
1710
+ },
1711
+ ],
1189
1712
  };
1190
1713
  });
1191
1714
  //──────────────────────────────────────────────────────────────────────────────
1192
1715
  // List Models Tool
1193
1716
  //──────────────────────────────────────────────────────────────────────────────
1194
1717
  server.tool("list_models", {
1195
- cli: z.preprocess((value) => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini"]).optional()).describe("CLI filter (claude|codex|gemini)")
1718
+ cli: z
1719
+ .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini"]).optional())
1720
+ .describe("CLI filter (claude|codex|gemini)"),
1196
1721
  }, async ({ cli }) => {
1197
1722
  const cliInfo = getCliInfo();
1198
1723
  const result = cli ? { [cli]: cliInfo[cli] } : cliInfo;
@@ -1204,7 +1729,7 @@ server.tool("list_models", {
1204
1729
  server.tool("session_create", {
1205
1730
  cli: z.enum(["claude", "codex", "gemini"]).describe("CLI type (claude|codex|gemini)"),
1206
1731
  description: z.string().optional().describe("Session description"),
1207
- setAsActive: z.boolean().default(true).describe("Set as active session")
1732
+ setAsActive: z.boolean().default(true).describe("Set as active session"),
1208
1733
  }, async ({ cli, description, setAsActive }) => {
1209
1734
  try {
1210
1735
  const session = await sessionManager.createSession(cli, description);
@@ -1213,7 +1738,8 @@ server.tool("session_create", {
1213
1738
  }
1214
1739
  logger.info(`Created new ${cli} session: ${session.id}`);
1215
1740
  return {
1216
- content: [{
1741
+ content: [
1742
+ {
1217
1743
  type: "text",
1218
1744
  text: JSON.stringify({
1219
1745
  success: true,
@@ -1222,10 +1748,11 @@ server.tool("session_create", {
1222
1748
  cli: session.cli,
1223
1749
  description: session.description,
1224
1750
  createdAt: session.createdAt,
1225
- isActive: setAsActive
1226
- }
1227
- }, null, 2)
1228
- }]
1751
+ isActive: setAsActive,
1752
+ },
1753
+ }, null, 2),
1754
+ },
1755
+ ],
1229
1756
  };
1230
1757
  }
1231
1758
  catch (error) {
@@ -1233,14 +1760,17 @@ server.tool("session_create", {
1233
1760
  }
1234
1761
  });
1235
1762
  server.tool("session_list", {
1236
- cli: z.enum(["claude", "codex", "gemini"]).optional().describe("CLI filter (claude|codex|gemini)")
1763
+ cli: z
1764
+ .enum(["claude", "codex", "gemini"])
1765
+ .optional()
1766
+ .describe("CLI filter (claude|codex|gemini)"),
1237
1767
  }, async ({ cli }) => {
1238
1768
  try {
1239
1769
  const sessions = await sessionManager.listSessions(cli);
1240
1770
  const activeSessions = {
1241
1771
  claude: await sessionManager.getActiveSession("claude"),
1242
1772
  codex: await sessionManager.getActiveSession("codex"),
1243
- gemini: await sessionManager.getActiveSession("gemini")
1773
+ gemini: await sessionManager.getActiveSession("gemini"),
1244
1774
  };
1245
1775
  const sessionList = sessions.map(s => ({
1246
1776
  id: s.id,
@@ -1248,10 +1778,11 @@ server.tool("session_list", {
1248
1778
  description: s.description,
1249
1779
  createdAt: s.createdAt,
1250
1780
  lastUsedAt: s.lastUsedAt,
1251
- isActive: activeSessions[s.cli]?.id === s.id
1781
+ isActive: activeSessions[s.cli]?.id === s.id,
1252
1782
  }));
1253
1783
  return {
1254
- content: [{
1784
+ content: [
1785
+ {
1255
1786
  type: "text",
1256
1787
  text: JSON.stringify({
1257
1788
  total: sessionList.length,
@@ -1259,10 +1790,11 @@ server.tool("session_list", {
1259
1790
  activeSessions: {
1260
1791
  claude: activeSessions.claude?.id || null,
1261
1792
  codex: activeSessions.codex?.id || null,
1262
- gemini: activeSessions.gemini?.id || null
1263
- }
1264
- }, null, 2)
1265
- }]
1793
+ gemini: activeSessions.gemini?.id || null,
1794
+ },
1795
+ }, null, 2),
1796
+ },
1797
+ ],
1266
1798
  };
1267
1799
  }
1268
1800
  catch (error) {
@@ -1271,32 +1803,36 @@ server.tool("session_list", {
1271
1803
  });
1272
1804
  server.tool("session_set_active", {
1273
1805
  cli: z.enum(["claude", "codex", "gemini"]).describe("CLI type (claude|codex|gemini)"),
1274
- sessionId: z.string().nullable().describe("Session ID (null to clear)")
1806
+ sessionId: z.string().nullable().describe("Session ID (null to clear)"),
1275
1807
  }, async ({ cli, sessionId }) => {
1276
1808
  try {
1277
1809
  const success = await sessionManager.setActiveSession(cli, sessionId || null);
1278
1810
  if (!success) {
1279
1811
  return {
1280
- content: [{
1812
+ content: [
1813
+ {
1281
1814
  type: "text",
1282
1815
  text: JSON.stringify({
1283
1816
  success: false,
1284
- error: "Session not found or does not belong to the specified CLI"
1285
- }, null, 2)
1286
- }],
1287
- isError: true
1817
+ error: "Session not found or does not belong to the specified CLI",
1818
+ }, null, 2),
1819
+ },
1820
+ ],
1821
+ isError: true,
1288
1822
  };
1289
1823
  }
1290
1824
  logger.info(`Set active ${cli} session to: ${sessionId}`);
1291
1825
  return {
1292
- content: [{
1826
+ content: [
1827
+ {
1293
1828
  type: "text",
1294
1829
  text: JSON.stringify({
1295
1830
  success: true,
1296
1831
  cli,
1297
- activeSessionId: sessionId
1298
- }, null, 2)
1299
- }]
1832
+ activeSessionId: sessionId,
1833
+ }, null, 2),
1834
+ },
1835
+ ],
1300
1836
  };
1301
1837
  }
1302
1838
  catch (error) {
@@ -1304,36 +1840,40 @@ server.tool("session_set_active", {
1304
1840
  }
1305
1841
  });
1306
1842
  server.tool("session_delete", {
1307
- sessionId: z.string().describe("Session ID")
1843
+ sessionId: z.string().describe("Session ID"),
1308
1844
  }, async ({ sessionId }) => {
1309
1845
  try {
1310
1846
  const session = await sessionManager.getSession(sessionId);
1311
1847
  if (!session) {
1312
1848
  return {
1313
- content: [{
1849
+ content: [
1850
+ {
1314
1851
  type: "text",
1315
1852
  text: JSON.stringify({
1316
1853
  success: false,
1317
- error: "Session not found"
1318
- }, null, 2)
1319
- }],
1320
- isError: true
1854
+ error: "Session not found",
1855
+ }, null, 2),
1856
+ },
1857
+ ],
1858
+ isError: true,
1321
1859
  };
1322
1860
  }
1323
1861
  const success = await sessionManager.deleteSession(sessionId);
1324
1862
  logger.info(`Deleted session: ${sessionId}`);
1325
1863
  return {
1326
- content: [{
1864
+ content: [
1865
+ {
1327
1866
  type: "text",
1328
1867
  text: JSON.stringify({
1329
1868
  success,
1330
1869
  deletedSession: {
1331
1870
  id: session.id,
1332
1871
  cli: session.cli,
1333
- description: session.description
1334
- }
1335
- }, null, 2)
1336
- }]
1872
+ description: session.description,
1873
+ },
1874
+ }, null, 2),
1875
+ },
1876
+ ],
1337
1877
  };
1338
1878
  }
1339
1879
  catch (error) {
@@ -1341,34 +1881,38 @@ server.tool("session_delete", {
1341
1881
  }
1342
1882
  });
1343
1883
  server.tool("session_get", {
1344
- sessionId: z.string().describe("Session ID")
1884
+ sessionId: z.string().describe("Session ID"),
1345
1885
  }, async ({ sessionId }) => {
1346
1886
  try {
1347
1887
  const session = await sessionManager.getSession(sessionId);
1348
1888
  if (!session) {
1349
1889
  return {
1350
- content: [{
1890
+ content: [
1891
+ {
1351
1892
  type: "text",
1352
1893
  text: JSON.stringify({
1353
1894
  success: false,
1354
- error: "Session not found"
1355
- }, null, 2)
1356
- }],
1357
- isError: true
1895
+ error: "Session not found",
1896
+ }, null, 2),
1897
+ },
1898
+ ],
1899
+ isError: true,
1358
1900
  };
1359
1901
  }
1360
1902
  const activeSession = await sessionManager.getActiveSession(session.cli);
1361
1903
  return {
1362
- content: [{
1904
+ content: [
1905
+ {
1363
1906
  type: "text",
1364
1907
  text: JSON.stringify({
1365
1908
  success: true,
1366
1909
  session: {
1367
1910
  ...session,
1368
- isActive: activeSession?.id === session.id
1369
- }
1370
- }, null, 2)
1371
- }]
1911
+ isActive: activeSession?.id === session.id,
1912
+ },
1913
+ }, null, 2),
1914
+ },
1915
+ ],
1372
1916
  };
1373
1917
  }
1374
1918
  catch (error) {
@@ -1376,20 +1920,25 @@ server.tool("session_get", {
1376
1920
  }
1377
1921
  });
1378
1922
  server.tool("session_clear_all", {
1379
- cli: z.enum(["claude", "codex", "gemini"]).optional().describe("CLI filter (claude|codex|gemini)")
1923
+ cli: z
1924
+ .enum(["claude", "codex", "gemini"])
1925
+ .optional()
1926
+ .describe("CLI filter (claude|codex|gemini)"),
1380
1927
  }, async ({ cli }) => {
1381
1928
  try {
1382
1929
  const count = await sessionManager.clearAllSessions(cli);
1383
- logger.info(`Cleared ${count} sessions${cli ? ` for ${cli}` : ''}`);
1930
+ logger.info(`Cleared ${count} sessions${cli ? ` for ${cli}` : ""}`);
1384
1931
  return {
1385
- content: [{
1932
+ content: [
1933
+ {
1386
1934
  type: "text",
1387
1935
  text: JSON.stringify({
1388
1936
  success: true,
1389
1937
  deletedCount: count,
1390
- cli: cli || "all"
1391
- }, null, 2)
1392
- }]
1938
+ cli: cli || "all",
1939
+ }, null, 2),
1940
+ },
1941
+ ],
1393
1942
  };
1394
1943
  }
1395
1944
  catch (error) {
@@ -1423,15 +1972,17 @@ function registerHealthResource() {
1423
1972
  server.registerResource("health", "health://status", {
1424
1973
  title: "🏥 Health Status",
1425
1974
  description: "DB connectivity and latency",
1426
- mimeType: "application/json"
1975
+ mimeType: "application/json",
1427
1976
  }, async () => {
1428
1977
  const health = await checkHealth(db);
1429
1978
  return {
1430
- contents: [{
1979
+ contents: [
1980
+ {
1431
1981
  uri: "health://status",
1432
1982
  text: JSON.stringify(health, null, 2),
1433
- mimeType: "application/json"
1434
- }]
1983
+ mimeType: "application/json",
1984
+ },
1985
+ ],
1435
1986
  };
1436
1987
  });
1437
1988
  logger.info("Health check resource registered");
@@ -1440,15 +1991,17 @@ function registerHealthResource() {
1440
1991
  server.registerResource("process-health", "metrics://process-health", {
1441
1992
  title: "Process Health",
1442
1993
  description: "Async job health (CPU, memory, zombie detection)",
1443
- mimeType: "application/json"
1994
+ mimeType: "application/json",
1444
1995
  }, async (uri) => {
1445
1996
  const health = asyncJobManager.getJobHealth();
1446
1997
  return {
1447
- contents: [{
1998
+ contents: [
1999
+ {
1448
2000
  uri: uri.href,
1449
2001
  mimeType: "application/json",
1450
- text: JSON.stringify(health, null, 2)
1451
- }]
2002
+ text: JSON.stringify(health, null, 2),
2003
+ },
2004
+ ],
1452
2005
  };
1453
2006
  });
1454
2007
  logger.info("Process health resource registered");
@@ -1468,6 +2021,8 @@ async function shutdown(signal) {
1468
2021
  await db.disconnect();
1469
2022
  logger.info("Database connections closed");
1470
2023
  }
2024
+ flightRecorder.close();
2025
+ logger.info("Flight recorder closed");
1471
2026
  process.exit(0);
1472
2027
  }
1473
2028
  catch (error) {
@@ -1492,11 +2047,9 @@ async function main() {
1492
2047
  }
1493
2048
  // Guard: only auto-start when run directly (not imported for testing)
1494
2049
  // Resolve symlinks so `llm-cli-gateway` (npm-linked bin) matches import.meta.url
1495
- const __entryUrl = process.argv[1]
1496
- ? new URL(realpathSync(process.argv[1]), "file://").href
1497
- : "";
2050
+ const __entryUrl = process.argv[1] ? new URL(realpathSync(process.argv[1]), "file://").href : "";
1498
2051
  if (__entryUrl === import.meta.url) {
1499
- main().catch((error) => {
2052
+ main().catch(error => {
1500
2053
  logger.error("Fatal server error:", error);
1501
2054
  process.exit(1);
1502
2055
  });