llm-cli-gateway 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/CHANGELOG.md +541 -0
  2. package/LICENSE +21 -0
  3. package/README.md +545 -0
  4. package/dist/approval-manager.d.ts +43 -0
  5. package/dist/approval-manager.js +156 -0
  6. package/dist/async-job-manager.d.ts +57 -0
  7. package/dist/async-job-manager.js +334 -0
  8. package/dist/claude-mcp-config.d.ts +8 -0
  9. package/dist/claude-mcp-config.js +161 -0
  10. package/dist/config.d.ts +35 -0
  11. package/dist/config.js +56 -0
  12. package/dist/db.d.ts +48 -0
  13. package/dist/db.js +170 -0
  14. package/dist/executor.d.ts +30 -0
  15. package/dist/executor.js +315 -0
  16. package/dist/health.d.ts +20 -0
  17. package/dist/health.js +32 -0
  18. package/dist/index.d.ts +67 -0
  19. package/dist/index.js +1503 -0
  20. package/dist/logger.d.ts +6 -0
  21. package/dist/logger.js +5 -0
  22. package/dist/metrics.d.ts +23 -0
  23. package/dist/metrics.js +57 -0
  24. package/dist/migrate-sessions.d.ts +12 -0
  25. package/dist/migrate-sessions.js +145 -0
  26. package/dist/migrate.d.ts +2 -0
  27. package/dist/migrate.js +100 -0
  28. package/dist/model-registry.d.ts +10 -0
  29. package/dist/model-registry.js +346 -0
  30. package/dist/optimizer.d.ts +3 -0
  31. package/dist/optimizer.js +183 -0
  32. package/dist/process-monitor.d.ts +54 -0
  33. package/dist/process-monitor.js +146 -0
  34. package/dist/request-helpers.d.ts +25 -0
  35. package/dist/request-helpers.js +32 -0
  36. package/dist/resources.d.ts +26 -0
  37. package/dist/resources.js +201 -0
  38. package/dist/retry.d.ts +72 -0
  39. package/dist/retry.js +146 -0
  40. package/dist/review-integrity.d.ts +50 -0
  41. package/dist/review-integrity.js +283 -0
  42. package/dist/session-manager-pg.d.ts +76 -0
  43. package/dist/session-manager-pg.js +383 -0
  44. package/dist/session-manager.d.ts +62 -0
  45. package/dist/session-manager.js +223 -0
  46. package/dist/stream-json-parser.d.ts +35 -0
  47. package/dist/stream-json-parser.js +94 -0
  48. package/package.json +90 -0
package/dist/index.js ADDED
@@ -0,0 +1,1503 @@
1
+ #!/usr/bin/env node
2
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
+ import { randomUUID } from "crypto";
5
+ import { readFileSync, readdirSync, realpathSync } from "fs";
6
+ import { dirname, join } from "path";
7
+ import { fileURLToPath } from "url";
8
+ import { z } from "zod";
9
+ import { executeCli, killAllProcessGroups } from "./executor.js";
10
+ import { parseStreamJson } from "./stream-json-parser.js";
11
+ import { createSessionManager } from "./session-manager.js";
12
+ import { ResourceProvider } from "./resources.js";
13
+ import { PerformanceMetrics } from "./metrics.js";
14
+ import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse as optimizeResponseText } from "./optimizer.js";
15
+ import { loadConfig } from "./config.js";
16
+ import { checkHealth } from "./health.js";
17
+ import { getCliInfo, resolveModelAlias } from "./model-registry.js";
18
+ import { AsyncJobManager } from "./async-job-manager.js";
19
+ import { ApprovalManager } from "./approval-manager.js";
20
+ import { checkReviewIntegrity } from "./review-integrity.js";
21
+ import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES } from "./claude-mcp-config.js";
22
+ import { resolveSessionResumeArgs, GATEWAY_SESSION_PREFIX } from "./request-helpers.js";
23
+ // Simple logger that writes to stderr (stdout is used for MCP protocol)
24
+ const logger = {
25
+ info: (message, ...args) => {
26
+ console.error(`[INFO] ${new Date().toISOString()} - ${message}`, ...args);
27
+ },
28
+ error: (message, ...args) => {
29
+ console.error(`[ERROR] ${new Date().toISOString()} - ${message}`, ...args);
30
+ },
31
+ debug: (message, ...args) => {
32
+ if (process.env.DEBUG) {
33
+ console.error(`[DEBUG] ${new Date().toISOString()} - ${message}`, ...args);
34
+ }
35
+ }
36
+ };
37
+ function logOptimizationTokens(kind, correlationId, original, optimized) {
38
+ const originalTokens = estimateTokens(original);
39
+ const optimizedTokens = estimateTokens(optimized);
40
+ const reduction = originalTokens === 0 ? 0 : ((originalTokens - optimizedTokens) / originalTokens) * 100;
41
+ logger.info(`[${correlationId}] ${kind} tokens ${originalTokens} → ${optimizedTokens} (${reduction.toFixed(1)}% reduction)`);
42
+ }
43
+ // Sync-to-async deadline: if a sync tool's CLI call hasn't finished within this
44
+ // window, the tool returns a deferred async job reference instead of blocking
45
+ // until the MCP client's tool-call timeout fires (~60s in many runtimes).
46
+ // Configurable via SYNC_DEADLINE_MS env var. Set to 0 to disable (pure sync).
47
+ const SYNC_DEADLINE_MS = (() => {
48
+ const env = process.env.SYNC_DEADLINE_MS;
49
+ if (env !== undefined) {
50
+ const parsed = parseInt(env, 10);
51
+ if (Number.isFinite(parsed) && parsed >= 0)
52
+ return parsed;
53
+ }
54
+ return 45_000; // 45s default — safely under the 60s MCP client cap
55
+ })();
56
+ //──────────────────────────────────────────────────────────────────────────────
57
+ // Skills loader — reads .agents/skills/*/SKILL.md at startup
58
+ //──────────────────────────────────────────────────────────────────────────────
59
+ const __filename = fileURLToPath(import.meta.url);
60
+ const __dirname = dirname(__filename);
61
+ const SKILLS_DIR = join(__dirname, "..", ".agents", "skills");
62
+ function loadSkills() {
63
+ const skills = [];
64
+ try {
65
+ const dirs = readdirSync(SKILLS_DIR, { withFileTypes: true }).filter(d => d.isDirectory());
66
+ for (const dir of dirs) {
67
+ const skillPath = join(SKILLS_DIR, dir.name, "SKILL.md");
68
+ try {
69
+ const content = readFileSync(skillPath, "utf-8");
70
+ // Extract description from YAML frontmatter
71
+ const descMatch = content.match(/^---[\s\S]*?description:\s*(.+?)$/m);
72
+ const description = descMatch?.[1]?.trim() || dir.name;
73
+ skills.push({ name: dir.name, content, description });
74
+ }
75
+ catch {
76
+ // Skill file missing or unreadable — skip silently
77
+ }
78
+ }
79
+ }
80
+ catch {
81
+ // Skills directory missing — not fatal
82
+ }
83
+ return skills;
84
+ }
85
+ const loadedSkills = loadSkills();
86
+ // L1: Compact server instructions (~200 tokens) — injected into every client's
87
+ // system prompt at connection time. Covers key patterns + pointers to L2 resources.
88
+ const SERVER_INSTRUCTIONS = `llm-cli-gateway: Multi-LLM orchestration via MCP.
89
+
90
+ Tools: claude_request, codex_request, gemini_request (sync) | *_request_async (async)
91
+ Jobs: llm_job_status, llm_job_result, llm_job_cancel
92
+ Sessions: session_create, session_list, session_set_active, session_get, session_delete, session_clear_all
93
+ Other: list_models, approval_list, llm_process_health
94
+
95
+ Key behaviors:
96
+ - Sync auto-defers at ${SYNC_DEADLINE_MS}ms. Poll deferred jobs via llm_job_status/llm_job_result.
97
+ - Sessions: Claude --continue, Gemini --resume (real CLI continuity). Codex bookkeeping only.
98
+ - Approval gates: opt-in via approvalStrategy:"mcp_managed".
99
+ - Idle timeout kills stuck processes (default 10min, configurable via idleTimeoutMs).
100
+
101
+ Skills (full docs via MCP resources):
102
+ ${loadedSkills.map(s => `- skills://${s.name} — ${s.description}`).join("\n")}`;
103
+ const server = new McpServer({ name: "llm-cli-gateway", version: "1.0.0" }, { instructions: SERVER_INSTRUCTIONS });
104
+ // Global state (initialized asynchronously)
105
+ let sessionManager;
106
+ let db = null;
107
+ const performanceMetrics = new PerformanceMetrics();
108
+ let resourceProvider;
109
+ const asyncJobManager = new AsyncJobManager(logger, (cli, durationMs, success) => {
110
+ performanceMetrics.recordRequest(cli, durationMs, success);
111
+ });
112
+ const approvalManager = new ApprovalManager(undefined, logger);
113
+ const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
114
+ // Per-CLI idle timeouts: kill process if no stdout/stderr activity for this duration.
115
+ // Claude idle timeout only applies in stream-json mode (with --include-partial-messages).
116
+ // In text/json mode, Claude produces no output until done, so idle timeout would false-positive.
117
+ const CLI_IDLE_TIMEOUTS = {
118
+ claude: 600_000, // 10 minutes — only used when outputFormat=stream-json
119
+ codex: 600_000, // 10 minutes — Codex streams stderr progress
120
+ gemini: 600_000, // 10 minutes — Gemini streams stdout in real-time
121
+ };
122
+ function resolveIdleTimeout(cli, override) {
123
+ if (override !== undefined)
124
+ return override;
125
+ return CLI_IDLE_TIMEOUTS[cli];
126
+ }
127
+ const SYNC_POLL_INTERVAL_MS = 1_000;
128
+ /**
129
+ * Start an async job and poll until completion or deadline.
130
+ * Returns the job result if it finishes in time, or a deferral marker.
131
+ */
132
+ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat) {
133
+ if (SYNC_DEADLINE_MS === 0) {
134
+ // Disabled — fall through to direct execution
135
+ return executeCli(cli, args, { idleTimeout: idleTimeoutMs, logger });
136
+ }
137
+ const job = asyncJobManager.startJob(cli, args, corrId, undefined, idleTimeoutMs, outputFormat);
138
+ const deadline = Date.now() + SYNC_DEADLINE_MS;
139
+ while (Date.now() < deadline) {
140
+ const snapshot = asyncJobManager.getJobSnapshot(job.id);
141
+ if (snapshot && snapshot.status !== "running") {
142
+ // Job finished within deadline — extract result
143
+ const result = asyncJobManager.getJobResult(job.id);
144
+ if (!result) {
145
+ return { stdout: "", stderr: "Job result unavailable", code: 1 };
146
+ }
147
+ return {
148
+ stdout: result.stdout,
149
+ stderr: result.stderr,
150
+ code: result.exitCode ?? 1
151
+ };
152
+ }
153
+ await new Promise(resolve => setTimeout(resolve, SYNC_POLL_INTERVAL_MS));
154
+ }
155
+ // Deadline exceeded — return deferral
156
+ logger.info(`[${corrId}] ${cli} sync deadline exceeded (${SYNC_DEADLINE_MS}ms), deferring to async job ${job.id}`);
157
+ return {
158
+ deferred: true,
159
+ jobId: job.id,
160
+ cli,
161
+ correlationId: corrId,
162
+ message: `Execution exceeded sync deadline (${SYNC_DEADLINE_MS}ms). Poll with llm_job_status, fetch with llm_job_result.`
163
+ };
164
+ }
165
+ function isDeferredResponse(result) {
166
+ return "deferred" in result && result.deferred === true;
167
+ }
168
+ function buildDeferredToolResponse(deferred, sessionId) {
169
+ return {
170
+ content: [{
171
+ type: "text",
172
+ text: JSON.stringify({
173
+ status: "deferred",
174
+ jobId: deferred.jobId,
175
+ cli: deferred.cli,
176
+ correlationId: deferred.correlationId,
177
+ message: deferred.message,
178
+ sessionId: sessionId || null,
179
+ pollWith: "llm_job_status",
180
+ fetchWith: "llm_job_result",
181
+ cancelWith: "llm_job_cancel"
182
+ }, null, 2)
183
+ }]
184
+ };
185
+ }
186
+ // Helper function for standardized error responses
187
+ function createErrorResponse(cli, code, stderr, correlationId, error) {
188
+ let errorMessage = `Error executing ${cli} CLI`;
189
+ if (error) {
190
+ // Command not found or spawn error
191
+ errorMessage += `:\n${error.message}`;
192
+ if (error.message.includes("ENOENT")) {
193
+ errorMessage += `\n\nThe '${cli}' command was not found. Please ensure ${cli} CLI is installed and in your PATH.`;
194
+ }
195
+ logger.error(`[${correlationId || "unknown"}] ${cli} CLI execution failed:`, error.message);
196
+ }
197
+ else if (code === 124) {
198
+ // Wall-clock timeout
199
+ errorMessage += `: Command timed out\n${stderr}`;
200
+ logger.error(`[${correlationId || "unknown"}] ${cli} CLI timed out`);
201
+ }
202
+ else if (code === 125) {
203
+ // Idle timeout (stuck process)
204
+ errorMessage += `: Process killed due to inactivity\n${stderr}`;
205
+ logger.error(`[${correlationId || "unknown"}] ${cli} CLI killed due to inactivity`);
206
+ }
207
+ else if (code !== 0) {
208
+ // Other non-zero exit code
209
+ errorMessage += ` (exit code ${code}):\n${stderr}`;
210
+ logger.error(`[${correlationId || "unknown"}] ${cli} CLI failed with exit code ${code}`);
211
+ }
212
+ return {
213
+ content: [{ type: "text", text: errorMessage }],
214
+ isError: true
215
+ };
216
+ }
217
+ function createApprovalDeniedResponse(operation, decision) {
218
+ return {
219
+ content: [{
220
+ type: "text",
221
+ text: JSON.stringify({
222
+ success: false,
223
+ error: `${operation} denied by MCP-managed approval policy`,
224
+ approval: decision
225
+ }, null, 2)
226
+ }],
227
+ isError: true
228
+ };
229
+ }
230
+ function normalizeMcpServers(mcpServers) {
231
+ if (!mcpServers || mcpServers.length === 0) {
232
+ return ["sqry"];
233
+ }
234
+ return [...new Set(mcpServers)];
235
+ }
236
+ function createMcpConfigErrorResponse(operation, correlationId, requested, message, missing = []) {
237
+ return {
238
+ content: [{
239
+ type: "text",
240
+ text: JSON.stringify({
241
+ success: false,
242
+ error: `${operation} failed to prepare Claude MCP config`,
243
+ message,
244
+ correlationId,
245
+ mcpServers: {
246
+ requested,
247
+ missing
248
+ }
249
+ }, null, 2)
250
+ }],
251
+ isError: true
252
+ };
253
+ }
254
+ function resolveClaudeMcpConfig(operation, correlationId, requestedMcpServers, strictMcpConfig) {
255
+ let mcpConfig;
256
+ try {
257
+ mcpConfig = buildClaudeMcpConfig(requestedMcpServers);
258
+ }
259
+ catch (error) {
260
+ const message = error instanceof Error ? error.message : String(error);
261
+ logger.error(`[${correlationId}] ${operation} failed to build Claude MCP config: ${message}`);
262
+ return {
263
+ errorResponse: createMcpConfigErrorResponse(operation, correlationId, requestedMcpServers, message)
264
+ };
265
+ }
266
+ if (strictMcpConfig && mcpConfig.missing.length > 0) {
267
+ const missing = mcpConfig.missing.join(", ");
268
+ return {
269
+ errorResponse: createMcpConfigErrorResponse(operation, correlationId, requestedMcpServers, `strictMcpConfig=true but requested servers are unavailable: ${missing}`, mcpConfig.missing)
270
+ };
271
+ }
272
+ return { config: mcpConfig };
273
+ }
274
+ //──────────────────────────────────────────────────────────────────────────────
275
+ // MCP Resources
276
+ //──────────────────────────────────────────────────────────────────────────────
277
+ // Register skill resources (L2: full docs, read on demand)
278
+ for (const skill of loadedSkills) {
279
+ server.registerResource(`skill-${skill.name}`, `skills://${skill.name}`, {
280
+ title: skill.name,
281
+ description: skill.description,
282
+ mimeType: "text/markdown"
283
+ }, async () => ({
284
+ contents: [{
285
+ uri: `skills://${skill.name}`,
286
+ mimeType: "text/markdown",
287
+ text: skill.content
288
+ }]
289
+ }));
290
+ }
291
+ logger.info(`Registered ${loadedSkills.length} skill resources`);
292
+ // Register all sessions resource
293
+ server.registerResource("all-sessions", "sessions://all", {
294
+ title: "📋 All Sessions",
295
+ description: "All conversation sessions across CLIs",
296
+ mimeType: "application/json"
297
+ }, async (uri) => {
298
+ logger.debug("Reading all sessions resource");
299
+ const contents = await resourceProvider.readResource(uri.href);
300
+ return { contents: contents ? [contents] : [] };
301
+ });
302
+ // Register Claude sessions resource
303
+ server.registerResource("claude-sessions", "sessions://claude", {
304
+ title: "🤖 Claude Sessions",
305
+ description: "Claude conversation sessions",
306
+ mimeType: "application/json"
307
+ }, async (uri) => {
308
+ logger.debug("Reading Claude sessions resource");
309
+ const contents = await resourceProvider.readResource(uri.href);
310
+ return { contents: contents ? [contents] : [] };
311
+ });
312
+ // Register Codex sessions resource
313
+ server.registerResource("codex-sessions", "sessions://codex", {
314
+ title: "💻 Codex Sessions",
315
+ description: "Codex conversation sessions",
316
+ mimeType: "application/json"
317
+ }, async (uri) => {
318
+ logger.debug("Reading Codex sessions resource");
319
+ const contents = await resourceProvider.readResource(uri.href);
320
+ return { contents: contents ? [contents] : [] };
321
+ });
322
+ // Register Gemini sessions resource
323
+ server.registerResource("gemini-sessions", "sessions://gemini", {
324
+ title: "✨ Gemini Sessions",
325
+ description: "Gemini conversation sessions",
326
+ mimeType: "application/json"
327
+ }, async (uri) => {
328
+ logger.debug("Reading Gemini sessions resource");
329
+ const contents = await resourceProvider.readResource(uri.href);
330
+ return { contents: contents ? [contents] : [] };
331
+ });
332
+ // Register Claude models resource
333
+ server.registerResource("claude-models", "models://claude", {
334
+ title: "🧠 Claude Models",
335
+ description: "Claude models and capabilities",
336
+ mimeType: "application/json"
337
+ }, async (uri) => {
338
+ logger.debug("Reading Claude models resource");
339
+ const contents = await resourceProvider.readResource(uri.href);
340
+ return { contents: contents ? [contents] : [] };
341
+ });
342
+ // Register Codex models resource
343
+ server.registerResource("codex-models", "models://codex", {
344
+ title: "🔧 Codex Models",
345
+ description: "Codex models and capabilities",
346
+ mimeType: "application/json"
347
+ }, async (uri) => {
348
+ logger.debug("Reading Codex models resource");
349
+ const contents = await resourceProvider.readResource(uri.href);
350
+ return { contents: contents ? [contents] : [] };
351
+ });
352
+ // Register Gemini models resource
353
+ server.registerResource("gemini-models", "models://gemini", {
354
+ title: "🌟 Gemini Models",
355
+ description: "Gemini models and capabilities",
356
+ mimeType: "application/json"
357
+ }, async (uri) => {
358
+ logger.debug("Reading Gemini models resource");
359
+ const contents = await resourceProvider.readResource(uri.href);
360
+ return { contents: contents ? [contents] : [] };
361
+ });
362
+ // Register performance metrics resource
363
+ server.registerResource("performance-metrics", "metrics://performance", {
364
+ title: "📈 Performance Metrics",
365
+ description: "Request counts, latency, success/failure rates",
366
+ mimeType: "application/json"
367
+ }, async (uri) => {
368
+ logger.debug("Reading performance metrics resource");
369
+ const contents = await resourceProvider.readResource(uri.href);
370
+ return { contents: contents ? [contents] : [] };
371
+ });
372
+ function prepareClaudeRequest(params) {
373
+ const corrId = params.correlationId || randomUUID();
374
+ const cliInfo = getCliInfo();
375
+ const resolvedModel = resolveModelAlias("claude", params.model, cliInfo);
376
+ // Review integrity check on raw prompt (before optimization)
377
+ const reviewIntegrity = checkReviewIntegrity({ prompt: params.prompt, allowedTools: params.allowedTools, disallowedTools: params.disallowedTools });
378
+ if (reviewIntegrity.violations.length > 0) {
379
+ logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
380
+ cli: "claude", operation: params.operation, score: reviewIntegrity.totalScore
381
+ });
382
+ }
383
+ let effectivePrompt = params.prompt;
384
+ if (params.optimizePrompt) {
385
+ const optimized = optimizePromptText(effectivePrompt);
386
+ logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
387
+ effectivePrompt = optimized;
388
+ }
389
+ const requestedMcpServers = normalizeMcpServers(params.mcpServers);
390
+ const mcpConfigResolution = resolveClaudeMcpConfig(params.operation, corrId, requestedMcpServers, params.strictMcpConfig);
391
+ if ("errorResponse" in mcpConfigResolution) {
392
+ return mcpConfigResolution.errorResponse;
393
+ }
394
+ const mcpConfig = mcpConfigResolution.config;
395
+ let approvalDecision = null;
396
+ if (params.approvalStrategy === "mcp_managed") {
397
+ approvalDecision = approvalManager.decide({
398
+ cli: "claude",
399
+ operation: params.operation,
400
+ prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
401
+ bypassRequested: params.dangerouslySkipPermissions,
402
+ fullAuto: false,
403
+ requestedMcpServers,
404
+ allowedTools: params.allowedTools,
405
+ disallowedTools: params.disallowedTools,
406
+ policy: params.approvalPolicy,
407
+ metadata: { model: resolvedModel || "default", strictMcpConfig: params.strictMcpConfig },
408
+ reviewIntegrity
409
+ });
410
+ if (approvalDecision.status !== "approved") {
411
+ return createApprovalDeniedResponse(params.operation, approvalDecision);
412
+ }
413
+ }
414
+ const args = ["-p", effectivePrompt];
415
+ if (resolvedModel)
416
+ args.push("--model", resolvedModel);
417
+ if (params.outputFormat === "json") {
418
+ args.push("--output-format", "json");
419
+ }
420
+ else if (params.outputFormat === "stream-json") {
421
+ args.push("--output-format", "stream-json", "--include-partial-messages");
422
+ }
423
+ if (params.allowedTools && params.allowedTools.length > 0) {
424
+ args.push("--allowed-tools", ...params.allowedTools);
425
+ }
426
+ if (params.disallowedTools && params.disallowedTools.length > 0) {
427
+ args.push("--disallowed-tools", ...params.disallowedTools);
428
+ }
429
+ if (params.approvalStrategy === "mcp_managed") {
430
+ args.push("--permission-mode", "bypassPermissions");
431
+ }
432
+ else if (params.dangerouslySkipPermissions) {
433
+ args.push("--permission-mode", "bypassPermissions");
434
+ }
435
+ if (params.strictMcpConfig || mcpConfig.enabled.length > 0) {
436
+ args.push("--mcp-config", mcpConfig.path);
437
+ if (params.strictMcpConfig) {
438
+ args.push("--strict-mcp-config");
439
+ }
440
+ }
441
+ return { corrId, effectivePrompt, resolvedModel, requestedMcpServers, mcpConfig, approvalDecision, reviewIntegrity, args };
442
+ }
443
+ function prepareCodexRequest(params) {
444
+ const corrId = params.correlationId || randomUUID();
445
+ const cliInfo = getCliInfo();
446
+ const resolvedModel = resolveModelAlias("codex", params.model, cliInfo);
447
+ // Review integrity check on raw prompt (before optimization)
448
+ const reviewIntegrity = checkReviewIntegrity({ prompt: params.prompt });
449
+ if (reviewIntegrity.violations.length > 0) {
450
+ logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
451
+ cli: "codex", operation: params.operation, score: reviewIntegrity.totalScore
452
+ });
453
+ }
454
+ let effectivePrompt = params.prompt;
455
+ if (params.optimizePrompt) {
456
+ const optimized = optimizePromptText(effectivePrompt);
457
+ logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
458
+ effectivePrompt = optimized;
459
+ }
460
+ const requestedMcpServers = normalizeMcpServers(params.mcpServers);
461
+ let approvalDecision = null;
462
+ if (params.approvalStrategy === "mcp_managed") {
463
+ approvalDecision = approvalManager.decide({
464
+ cli: "codex",
465
+ operation: params.operation,
466
+ prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
467
+ bypassRequested: params.dangerouslyBypassApprovalsAndSandbox,
468
+ fullAuto: params.fullAuto,
469
+ requestedMcpServers,
470
+ policy: params.approvalPolicy,
471
+ metadata: { model: resolvedModel || "default" },
472
+ reviewIntegrity
473
+ });
474
+ if (approvalDecision.status !== "approved") {
475
+ return createApprovalDeniedResponse(params.operation, approvalDecision);
476
+ }
477
+ }
478
+ const args = ["exec"];
479
+ if (resolvedModel)
480
+ args.push("--model", resolvedModel);
481
+ if (params.fullAuto)
482
+ args.push("--full-auto");
483
+ if (params.dangerouslyBypassApprovalsAndSandbox) {
484
+ args.push("--dangerously-bypass-approvals-and-sandbox");
485
+ }
486
+ args.push("--skip-git-repo-check", effectivePrompt);
487
+ return { corrId, effectivePrompt, resolvedModel, requestedMcpServers, approvalDecision, reviewIntegrity, args };
488
+ }
489
+ function prepareGeminiRequest(params) {
490
+ const corrId = params.correlationId || randomUUID();
491
+ const cliInfo = getCliInfo();
492
+ const resolvedModel = resolveModelAlias("gemini", params.model, cliInfo);
493
+ // Review integrity check on raw prompt (before optimization)
494
+ const reviewIntegrity = checkReviewIntegrity({ prompt: params.prompt, allowedTools: params.allowedTools });
495
+ if (reviewIntegrity.violations.length > 0) {
496
+ logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
497
+ cli: "gemini", operation: params.operation, score: reviewIntegrity.totalScore
498
+ });
499
+ }
500
+ let effectivePrompt = params.prompt;
501
+ if (params.optimizePrompt) {
502
+ const optimized = optimizePromptText(effectivePrompt);
503
+ logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
504
+ effectivePrompt = optimized;
505
+ }
506
+ const requestedMcpServers = normalizeMcpServers(params.mcpServers);
507
+ let approvalDecision = null;
508
+ if (params.approvalStrategy === "mcp_managed") {
509
+ approvalDecision = approvalManager.decide({
510
+ cli: "gemini",
511
+ operation: params.operation,
512
+ prompt: params.prompt, // Use raw prompt for review-context detection, not optimized
513
+ bypassRequested: params.approvalMode === "yolo",
514
+ fullAuto: false,
515
+ requestedMcpServers,
516
+ allowedTools: params.allowedTools,
517
+ policy: params.approvalPolicy,
518
+ metadata: { model: resolvedModel || "default" },
519
+ reviewIntegrity
520
+ });
521
+ if (approvalDecision.status !== "approved") {
522
+ return createApprovalDeniedResponse(params.operation, approvalDecision);
523
+ }
524
+ }
525
+ const effectiveApprovalMode = params.approvalStrategy === "mcp_managed" ? "yolo" : params.approvalMode;
526
+ const args = [effectivePrompt];
527
+ if (resolvedModel)
528
+ args.push("--model", resolvedModel);
529
+ if (effectiveApprovalMode)
530
+ args.push("--approval-mode", effectiveApprovalMode);
531
+ if (params.allowedTools && params.allowedTools.length > 0) {
532
+ params.allowedTools.forEach(tool => args.push("--allowed-tools", tool));
533
+ }
534
+ if (requestedMcpServers.length > 0) {
535
+ requestedMcpServers.forEach(serverName => args.push("--allowed-mcp-server-names", serverName));
536
+ }
537
+ if (params.includeDirs && params.includeDirs.length > 0) {
538
+ params.includeDirs.forEach(dir => args.push("--include-directories", dir));
539
+ }
540
+ return { corrId, effectivePrompt, resolvedModel, requestedMcpServers, approvalDecision, reviewIntegrity, args };
541
+ }
542
+ function buildCliResponse(stdout, optimizeResponse, corrId, sessionId, prep, resumable, outputFormat) {
543
+ let finalStdout = stdout;
544
+ // Skip response optimization for JSON output to prevent corrupting structured data
545
+ if (optimizeResponse && outputFormat !== "json") {
546
+ const optimized = optimizeResponseText(finalStdout);
547
+ logOptimizationTokens("response", corrId, finalStdout, optimized);
548
+ finalStdout = optimized;
549
+ }
550
+ // Append review integrity warnings to response text (skip for JSON output to avoid corruption)
551
+ if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0 && outputFormat !== "json") {
552
+ const warnings = prep.reviewIntegrity.violations
553
+ .map(v => `- [${v.type}] ${v.detail}`)
554
+ .join("\n");
555
+ finalStdout += `\n\n⚠️ Review Integrity Warnings (score: ${prep.reviewIntegrity.totalScore}):\n${warnings}`;
556
+ }
557
+ const response = {
558
+ content: [{ type: "text", text: finalStdout }],
559
+ mcpServers: prep.mcpConfig
560
+ ? { requested: prep.requestedMcpServers, enabled: prep.mcpConfig.enabled, missing: prep.mcpConfig.missing }
561
+ : { requested: prep.requestedMcpServers }
562
+ };
563
+ if (sessionId) {
564
+ response.sessionId = sessionId;
565
+ }
566
+ if (resumable !== undefined) {
567
+ response.resumable = resumable;
568
+ }
569
+ if (prep.approvalDecision) {
570
+ response.approval = prep.approvalDecision;
571
+ }
572
+ if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
573
+ response.reviewIntegrity = prep.reviewIntegrity;
574
+ }
575
+ return response;
576
+ }
577
+ export async function handleGeminiRequest(deps, params) {
578
+ const startTime = Date.now();
579
+ const prep = prepareGeminiRequest({
580
+ prompt: params.prompt, model: params.model, approvalMode: params.approvalMode,
581
+ approvalStrategy: params.approvalStrategy, approvalPolicy: params.approvalPolicy,
582
+ allowedTools: params.allowedTools, includeDirs: params.includeDirs,
583
+ mcpServers: params.mcpServers, correlationId: params.correlationId,
584
+ optimizePrompt: params.optimizePrompt, operation: "gemini_request"
585
+ });
586
+ if (!("args" in prep))
587
+ return prep;
588
+ const { corrId, args } = prep;
589
+ let durationMs = 0;
590
+ let wasSuccessful = false;
591
+ deps.logger.info(`[${corrId}] gemini_request invoked with model=${prep.resolvedModel || "default"}, approvalMode=${params.approvalMode}, prompt length=${params.prompt.length}`);
592
+ try {
593
+ // Session arg planning (pure, no I/O)
594
+ const sessionResult = resolveSessionResumeArgs({
595
+ sessionId: params.sessionId, resumeLatest: params.resumeLatest, createNewSession: params.createNewSession
596
+ });
597
+ args.push(...sessionResult.resumeArgs);
598
+ const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs));
599
+ // Deferred — job still running, return async reference
600
+ if (isDeferredResponse(result)) {
601
+ return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
602
+ }
603
+ const { stdout, stderr, code } = result;
604
+ durationMs = Math.max(0, Date.now() - startTime);
605
+ if (code !== 0) {
606
+ deps.logger.info(`[${corrId}] gemini_request failed in ${durationMs}ms`);
607
+ return createErrorResponse("gemini", code, stderr, corrId);
608
+ }
609
+ wasSuccessful = true;
610
+ // Post-success session I/O (sync handlers: no phantom sessions on CLI failure)
611
+ let effectiveSessionId = sessionResult.effectiveSessionId;
612
+ if (sessionResult.userProvidedSession && effectiveSessionId) {
613
+ const existing = await deps.sessionManager.getSession(effectiveSessionId);
614
+ if (!existing) {
615
+ try {
616
+ await deps.sessionManager.createSession("gemini", "Gemini Session", effectiveSessionId);
617
+ }
618
+ catch {
619
+ const rechecked = await deps.sessionManager.getSession(effectiveSessionId);
620
+ if (!rechecked)
621
+ throw new Error(`Failed to create or find session ${effectiveSessionId}`);
622
+ }
623
+ }
624
+ await deps.sessionManager.updateSessionUsage(effectiveSessionId);
625
+ }
626
+ else if (!params.createNewSession && !effectiveSessionId) {
627
+ const newSession = await deps.sessionManager.createSession("gemini", "Gemini Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
628
+ effectiveSessionId = newSession.id;
629
+ }
630
+ deps.logger.info(`[${corrId}] gemini_request completed successfully in ${durationMs}ms`);
631
+ return buildCliResponse(stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, sessionResult.userProvidedSession);
632
+ }
633
+ catch (error) {
634
+ const elapsedMs = Math.max(0, Date.now() - startTime);
635
+ deps.logger.info(`[${corrId}] gemini_request threw exception after ${elapsedMs}ms`);
636
+ return createErrorResponse("gemini", 1, "", corrId, error);
637
+ }
638
+ finally {
639
+ const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
640
+ performanceMetrics.recordRequest("gemini", finalizedDurationMs, wasSuccessful);
641
+ }
642
+ }
643
+ export async function handleGeminiRequestAsync(deps, params) {
644
+ const prep = prepareGeminiRequest({
645
+ prompt: params.prompt, model: params.model, approvalMode: params.approvalMode,
646
+ approvalStrategy: params.approvalStrategy, approvalPolicy: params.approvalPolicy,
647
+ allowedTools: params.allowedTools, includeDirs: params.includeDirs,
648
+ mcpServers: params.mcpServers, correlationId: params.correlationId,
649
+ optimizePrompt: params.optimizePrompt, operation: "gemini_request_async"
650
+ });
651
+ if (!("args" in prep))
652
+ return prep;
653
+ const { corrId, args, requestedMcpServers, approvalDecision } = prep;
654
+ try {
655
+ // Session arg planning (pure, no I/O)
656
+ const sessionResult = resolveSessionResumeArgs({
657
+ sessionId: params.sessionId, resumeLatest: params.resumeLatest, createNewSession: params.createNewSession
658
+ });
659
+ args.push(...sessionResult.resumeArgs);
660
+ // Pre-start session I/O (async handlers: prevent orphaned jobs)
661
+ let effectiveSessionId = sessionResult.effectiveSessionId;
662
+ if (sessionResult.userProvidedSession && effectiveSessionId) {
663
+ const existing = await deps.sessionManager.getSession(effectiveSessionId);
664
+ if (!existing) {
665
+ try {
666
+ await deps.sessionManager.createSession("gemini", "Gemini Session", effectiveSessionId);
667
+ }
668
+ catch {
669
+ const rechecked = await deps.sessionManager.getSession(effectiveSessionId);
670
+ if (!rechecked)
671
+ throw new Error(`Failed to create or find session ${effectiveSessionId}`);
672
+ }
673
+ }
674
+ await deps.sessionManager.updateSessionUsage(effectiveSessionId);
675
+ }
676
+ else if (!params.createNewSession && !effectiveSessionId) {
677
+ const newSession = await deps.sessionManager.createSession("gemini", "Gemini Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
678
+ effectiveSessionId = newSession.id;
679
+ }
680
+ // Start job only after all session I/O succeeds
681
+ const job = deps.asyncJobManager.startJob("gemini", args, corrId, undefined, resolveIdleTimeout("gemini", params.idleTimeoutMs));
682
+ deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
683
+ const asyncResponse = {
684
+ success: true,
685
+ job,
686
+ sessionId: effectiveSessionId || null,
687
+ resumable: sessionResult.userProvidedSession,
688
+ approval: approvalDecision,
689
+ mcpServers: { requested: requestedMcpServers }
690
+ };
691
+ if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
692
+ asyncResponse.reviewIntegrity = prep.reviewIntegrity;
693
+ }
694
+ return {
695
+ content: [{
696
+ type: "text",
697
+ text: JSON.stringify(asyncResponse, null, 2)
698
+ }]
699
+ };
700
+ }
701
+ catch (error) {
702
+ return createErrorResponse("gemini_request_async", 1, "", corrId, error);
703
+ }
704
+ }
705
+ export async function handleCodexRequestAsync(deps, params) {
706
+ const prep = prepareCodexRequest({
707
+ prompt: params.prompt, model: params.model, fullAuto: params.fullAuto,
708
+ dangerouslyBypassApprovalsAndSandbox: params.dangerouslyBypassApprovalsAndSandbox,
709
+ approvalStrategy: params.approvalStrategy, approvalPolicy: params.approvalPolicy,
710
+ mcpServers: params.mcpServers,
711
+ correlationId: params.correlationId, optimizePrompt: params.optimizePrompt,
712
+ operation: "codex_request_async"
713
+ });
714
+ if (!("args" in prep))
715
+ return prep;
716
+ const { corrId, args, requestedMcpServers, approvalDecision } = prep;
717
+ try {
718
+ // Pre-start session I/O (async handlers: prevent orphaned jobs)
719
+ let effectiveSessionId = params.sessionId;
720
+ if (!params.createNewSession && !params.sessionId) {
721
+ const activeSession = await deps.sessionManager.getActiveSession("codex");
722
+ if (activeSession) {
723
+ effectiveSessionId = activeSession.id;
724
+ }
725
+ else {
726
+ const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
727
+ effectiveSessionId = newSession.id;
728
+ }
729
+ }
730
+ else if (params.sessionId) {
731
+ await deps.sessionManager.updateSessionUsage(params.sessionId);
732
+ }
733
+ else if (params.createNewSession) {
734
+ const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
735
+ effectiveSessionId = newSession.id;
736
+ }
737
+ // Start job only after all session I/O succeeds
738
+ const job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs));
739
+ deps.logger.info(`[${corrId}] codex_request_async started job ${job.id}`);
740
+ const asyncResponse = {
741
+ success: true,
742
+ job,
743
+ sessionId: effectiveSessionId || null,
744
+ approval: approvalDecision,
745
+ mcpServers: { requested: requestedMcpServers }
746
+ };
747
+ if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
748
+ asyncResponse.reviewIntegrity = prep.reviewIntegrity;
749
+ }
750
+ return {
751
+ content: [{
752
+ type: "text",
753
+ text: JSON.stringify(asyncResponse, null, 2)
754
+ }]
755
+ };
756
+ }
757
+ catch (error) {
758
+ return createErrorResponse("codex_request_async", 1, "", corrId, error);
759
+ }
760
+ }
761
+ //──────────────────────────────────────────────────────────────────────────────
762
+ // Claude Code Tool
763
+ //──────────────────────────────────────────────────────────────────────────────
764
+ server.tool("claude_request", {
765
+ prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Claude"),
766
+ model: z.string().optional().describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
767
+ outputFormat: z.enum(["text", "json", "stream-json"]).default("text").describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
768
+ sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
769
+ continueSession: z.boolean().default(false).describe("Continue active session"),
770
+ createNewSession: z.boolean().default(false).describe("Force new session"),
771
+ allowedTools: z.array(z.string()).optional().describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
772
+ disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
773
+ dangerouslySkipPermissions: z.boolean().default(false).describe("Bypass permissions (sandbox only)"),
774
+ approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
775
+ approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
776
+ mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP servers exposed to Claude"),
777
+ strictMcpConfig: z.boolean().default(false).describe("Restrict Claude to provided MCP config only"),
778
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
779
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
780
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
781
+ idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
782
+ }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs }) => {
783
+ const startTime = Date.now();
784
+ const prep = prepareClaudeRequest({
785
+ prompt, model, outputFormat, allowedTools, disallowedTools, dangerouslySkipPermissions,
786
+ approvalStrategy, approvalPolicy, mcpServers,
787
+ strictMcpConfig, correlationId, optimizePrompt, operation: "claude_request"
788
+ });
789
+ if (!("args" in prep))
790
+ return prep;
791
+ const { corrId, args } = prep;
792
+ let durationMs = 0;
793
+ let wasSuccessful = false;
794
+ logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prompt.length}, sessionId=${sessionId}`);
795
+ try {
796
+ // Session management
797
+ let effectiveSessionId = sessionId;
798
+ let useContinue = continueSession;
799
+ const activeSession = await sessionManager.getActiveSession("claude");
800
+ if (!createNewSession && !continueSession && !sessionId && activeSession) {
801
+ effectiveSessionId = activeSession.id;
802
+ useContinue = true;
803
+ }
804
+ if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
805
+ useContinue = true;
806
+ }
807
+ if (useContinue) {
808
+ args.push("--continue");
809
+ }
810
+ else if (effectiveSessionId) {
811
+ args.push("--session-id", effectiveSessionId);
812
+ await sessionManager.updateSessionUsage(effectiveSessionId);
813
+ }
814
+ // Idle timeout only for stream-json (text/json produce no output until done)
815
+ const effectiveIdleTimeout = outputFormat === "stream-json"
816
+ ? resolveIdleTimeout("claude", idleTimeoutMs)
817
+ : undefined;
818
+ const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat);
819
+ // Deferred — job still running, return async reference
820
+ if (isDeferredResponse(result)) {
821
+ return buildDeferredToolResponse(result, effectiveSessionId);
822
+ }
823
+ const { stdout, stderr, code } = result;
824
+ durationMs = Math.max(0, Date.now() - startTime);
825
+ if (code !== 0) {
826
+ logger.info(`[${corrId}] claude_request failed in ${durationMs}ms`);
827
+ return createErrorResponse("claude", code, stderr, corrId);
828
+ }
829
+ wasSuccessful = true;
830
+ // If we used a session ID and it's not tracked yet, create a session record
831
+ if (effectiveSessionId) {
832
+ const existingSession = await sessionManager.getSession(effectiveSessionId);
833
+ if (!existingSession) {
834
+ await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
835
+ }
836
+ }
837
+ logger.info(`[${corrId}] claude_request completed successfully in ${durationMs}ms`);
838
+ // Parse stream-json NDJSON output to extract result text
839
+ if (outputFormat === "stream-json") {
840
+ const parsed = parseStreamJson(stdout);
841
+ if (parsed.costUsd !== null) {
842
+ logger.debug(`[${corrId}] stream-json cost=$${parsed.costUsd}, model=${parsed.model}, turns=${parsed.numTurns}`);
843
+ }
844
+ return buildCliResponse(parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, undefined, outputFormat);
845
+ }
846
+ return buildCliResponse(stdout, optimizeResponse, corrId, effectiveSessionId, prep, undefined, outputFormat);
847
+ }
848
+ catch (error) {
849
+ const elapsedMs = Math.max(0, Date.now() - startTime);
850
+ logger.info(`[${corrId}] claude_request threw exception after ${elapsedMs}ms`);
851
+ return createErrorResponse("claude", 1, "", corrId, error);
852
+ }
853
+ finally {
854
+ const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
855
+ performanceMetrics.recordRequest("claude", finalizedDurationMs, wasSuccessful);
856
+ }
857
+ });
858
+ //──────────────────────────────────────────────────────────────────────────────
859
+ // Codex Tool
860
+ //──────────────────────────────────────────────────────────────────────────────
861
+ server.tool("codex_request", {
862
+ prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Codex"),
863
+ model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
864
+ fullAuto: z.boolean().default(false).describe("Full-auto mode (sandboxed execution)"),
865
+ dangerouslyBypassApprovalsAndSandbox: z.boolean().default(false).describe("Run Codex without approvals/sandbox"),
866
+ approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
867
+ approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
868
+ mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
869
+ sessionId: z.string().optional().describe("Session ID (Codex manages internally)"),
870
+ createNewSession: z.boolean().default(false).describe("Force new session"),
871
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
872
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
873
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
874
+ idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
875
+ }, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs }) => {
876
+ const startTime = Date.now();
877
+ const prep = prepareCodexRequest({
878
+ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox,
879
+ approvalStrategy, approvalPolicy, mcpServers,
880
+ correlationId, optimizePrompt, operation: "codex_request"
881
+ });
882
+ if (!("args" in prep))
883
+ return prep;
884
+ const { corrId, args } = prep;
885
+ let durationMs = 0;
886
+ let wasSuccessful = false;
887
+ logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prompt.length}`);
888
+ try {
889
+ const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs));
890
+ // Deferred — job still running, return async reference
891
+ if (isDeferredResponse(result)) {
892
+ return buildDeferredToolResponse(result, sessionId);
893
+ }
894
+ const { stdout, stderr, code } = result;
895
+ durationMs = Math.max(0, Date.now() - startTime);
896
+ if (code !== 0) {
897
+ logger.info(`[${corrId}] codex_request failed in ${durationMs}ms`);
898
+ return createErrorResponse("codex", code, stderr, corrId);
899
+ }
900
+ wasSuccessful = true;
901
+ // Track session usage
902
+ let effectiveSessionId = sessionId;
903
+ if (!createNewSession && !sessionId) {
904
+ const activeSession = await sessionManager.getActiveSession("codex");
905
+ if (activeSession) {
906
+ effectiveSessionId = activeSession.id;
907
+ }
908
+ else {
909
+ const newSession = await sessionManager.createSession("codex", "Codex Session");
910
+ effectiveSessionId = newSession.id;
911
+ }
912
+ }
913
+ else if (sessionId) {
914
+ await sessionManager.updateSessionUsage(sessionId);
915
+ }
916
+ else if (createNewSession) {
917
+ const newSession = await sessionManager.createSession("codex", "Codex Session");
918
+ effectiveSessionId = newSession.id;
919
+ }
920
+ logger.info(`[${corrId}] codex_request completed successfully in ${durationMs}ms`);
921
+ return buildCliResponse(stdout, optimizeResponse, corrId, effectiveSessionId, prep);
922
+ }
923
+ catch (error) {
924
+ const elapsedMs = Math.max(0, Date.now() - startTime);
925
+ logger.info(`[${corrId}] codex_request threw exception after ${elapsedMs}ms`);
926
+ return createErrorResponse("codex", 1, "", corrId, error);
927
+ }
928
+ finally {
929
+ const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
930
+ performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
931
+ }
932
+ });
933
+ //──────────────────────────────────────────────────────────────────────────────
934
+ // Gemini Tool
935
+ //──────────────────────────────────────────────────────────────────────────────
936
+ server.tool("gemini_request", {
937
+ prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Gemini"),
938
+ model: z.string().optional().describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
939
+ sessionId: z.string().optional().describe("Session ID or 'latest'"),
940
+ resumeLatest: z.boolean().default(false).describe("Resume latest session"),
941
+ createNewSession: z.boolean().default(false).describe("Force new session"),
942
+ approvalMode: z.enum(["default", "auto_edit", "yolo"]).optional().describe("Approval: default|auto_edit|yolo"),
943
+ approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
944
+ approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
945
+ mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
946
+ allowedTools: z.array(z.string()).optional().describe("Allowed tools (['Write','Edit','Bash'])"),
947
+ includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
948
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
949
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
950
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
951
+ idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
952
+ }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs }) => {
953
+ return handleGeminiRequest({ sessionManager, logger }, { prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs });
954
+ });
955
+ //──────────────────────────────────────────────────────────────────────────────
956
+ // Async Long-Running Job Tools (No Time-Bound LLM Execution)
957
+ //──────────────────────────────────────────────────────────────────────────────
958
+ server.tool("claude_request_async", {
959
+ prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Claude"),
960
+ model: z.string().optional().describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
961
+ outputFormat: z.enum(["text", "json", "stream-json"]).default("text").describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
962
+ sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
963
+ continueSession: z.boolean().default(false).describe("Continue active session"),
964
+ createNewSession: z.boolean().default(false).describe("Force new session"),
965
+ allowedTools: z.array(z.string()).optional().describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
966
+ disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
967
+ dangerouslySkipPermissions: z.boolean().default(false).describe("Bypass permissions (sandbox only)"),
968
+ approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
969
+ approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
970
+ mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP servers exposed to Claude"),
971
+ strictMcpConfig: z.boolean().default(false).describe("Restrict Claude to provided MCP config only"),
972
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
973
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
974
+ idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
975
+ }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs }) => {
976
+ const prep = prepareClaudeRequest({
977
+ prompt, model, outputFormat, allowedTools, disallowedTools, dangerouslySkipPermissions,
978
+ approvalStrategy, approvalPolicy, mcpServers,
979
+ strictMcpConfig, correlationId, optimizePrompt, operation: "claude_request_async"
980
+ });
981
+ if (!("args" in prep))
982
+ return prep;
983
+ const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
984
+ try {
985
+ // Session management (before job start for async)
986
+ let effectiveSessionId = sessionId;
987
+ let useContinue = continueSession;
988
+ const activeSession = await sessionManager.getActiveSession("claude");
989
+ if (!createNewSession && !continueSession && !sessionId && activeSession) {
990
+ effectiveSessionId = activeSession.id;
991
+ useContinue = true;
992
+ }
993
+ if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
994
+ useContinue = true;
995
+ }
996
+ if (useContinue) {
997
+ args.push("--continue");
998
+ }
999
+ else if (effectiveSessionId) {
1000
+ args.push("--session-id", effectiveSessionId);
1001
+ await sessionManager.updateSessionUsage(effectiveSessionId);
1002
+ }
1003
+ if (effectiveSessionId) {
1004
+ const existingSession = await sessionManager.getSession(effectiveSessionId);
1005
+ if (!existingSession) {
1006
+ await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
1007
+ }
1008
+ }
1009
+ // Idle timeout only for stream-json (text/json produce no output until done)
1010
+ const effectiveIdleTimeout = outputFormat === "stream-json"
1011
+ ? resolveIdleTimeout("claude", idleTimeoutMs)
1012
+ : undefined;
1013
+ const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat);
1014
+ logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
1015
+ const asyncResponse = {
1016
+ success: true,
1017
+ job,
1018
+ sessionId: effectiveSessionId || activeSession?.id || null,
1019
+ approval: approvalDecision,
1020
+ mcpServers: {
1021
+ requested: requestedMcpServers,
1022
+ enabled: mcpConfig?.enabled,
1023
+ missing: mcpConfig?.missing
1024
+ }
1025
+ };
1026
+ if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
1027
+ asyncResponse.reviewIntegrity = prep.reviewIntegrity;
1028
+ }
1029
+ return {
1030
+ content: [{
1031
+ type: "text",
1032
+ text: JSON.stringify(asyncResponse, null, 2)
1033
+ }]
1034
+ };
1035
+ }
1036
+ catch (error) {
1037
+ return createErrorResponse("claude_request_async", 1, "", corrId, error);
1038
+ }
1039
+ });
1040
+ server.tool("codex_request_async", {
1041
+ prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Codex"),
1042
+ model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
1043
+ fullAuto: z.boolean().default(false).describe("Full-auto mode (sandboxed execution)"),
1044
+ dangerouslyBypassApprovalsAndSandbox: z.boolean().default(false).describe("Run Codex without approvals/sandbox"),
1045
+ approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
1046
+ approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
1047
+ mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
1048
+ sessionId: z.string().optional().describe("Session ID (Codex manages internally)"),
1049
+ createNewSession: z.boolean().default(false).describe("Force new session"),
1050
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1051
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1052
+ idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
1053
+ }, async ({ prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, createNewSession, correlationId, optimizePrompt, idleTimeoutMs }) => {
1054
+ return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger }, { prompt, model, fullAuto, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, createNewSession, correlationId, optimizePrompt, idleTimeoutMs });
1055
+ });
1056
+ server.tool("gemini_request_async", {
1057
+ prompt: z.string().min(1, "Prompt cannot be empty").max(100000, "Prompt too long (max 100k chars)").describe("Prompt text for Gemini"),
1058
+ model: z.string().optional().describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
1059
+ sessionId: z.string().optional().describe("Session ID (user-provided CLI handle for --resume)"),
1060
+ resumeLatest: z.boolean().default(false).describe("Resume latest session"),
1061
+ createNewSession: z.boolean().default(false).describe("Force new session"),
1062
+ approvalMode: z.enum(["default", "auto_edit", "yolo"]).optional().describe("Approval: default|auto_edit|yolo"),
1063
+ approvalStrategy: z.enum(["legacy", "mcp_managed"]).default("legacy").describe("Approval strategy"),
1064
+ approvalPolicy: z.enum(["strict", "balanced", "permissive"]).optional().describe("Approval policy override"),
1065
+ mcpServers: z.array(MCP_SERVER_ENUM).default(["sqry"]).describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
1066
+ allowedTools: z.array(z.string()).optional().describe("Allowed tools (['Write','Edit','Bash'])"),
1067
+ includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
1068
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
1069
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
1070
+ idleTimeoutMs: z.number().int().min(30_000).max(3_600_000).optional().describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)")
1071
+ }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs }) => {
1072
+ return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger }, { prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs });
1073
+ });
1074
+ server.tool("llm_job_status", {
1075
+ jobId: z.string().describe("Async job ID from *_request_async")
1076
+ }, async ({ jobId }) => {
1077
+ const job = asyncJobManager.getJobSnapshot(jobId);
1078
+ if (!job) {
1079
+ return {
1080
+ content: [{
1081
+ type: "text",
1082
+ text: JSON.stringify({
1083
+ success: false,
1084
+ error: "Job not found",
1085
+ jobId
1086
+ }, null, 2)
1087
+ }],
1088
+ isError: true
1089
+ };
1090
+ }
1091
+ return {
1092
+ content: [{
1093
+ type: "text",
1094
+ text: JSON.stringify({
1095
+ success: true,
1096
+ job
1097
+ }, null, 2)
1098
+ }]
1099
+ };
1100
+ });
1101
+ server.tool("llm_job_result", {
1102
+ jobId: z.string().describe("Async job ID from *_request_async"),
1103
+ maxChars: z.number().int().min(1000).max(2000000).default(200000).describe("Max chars returned per stream")
1104
+ }, async ({ jobId, maxChars }) => {
1105
+ const result = asyncJobManager.getJobResult(jobId, maxChars);
1106
+ if (!result) {
1107
+ return {
1108
+ content: [{
1109
+ type: "text",
1110
+ text: JSON.stringify({
1111
+ success: false,
1112
+ error: "Job not found",
1113
+ jobId
1114
+ }, null, 2)
1115
+ }],
1116
+ isError: true
1117
+ };
1118
+ }
1119
+ // Parse stream-json output for Claude async jobs
1120
+ const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
1121
+ let parsed;
1122
+ if (outputFormat === "stream-json" && result.stdout) {
1123
+ parsed = parseStreamJson(result.stdout);
1124
+ }
1125
+ return {
1126
+ content: [{
1127
+ type: "text",
1128
+ text: JSON.stringify({
1129
+ success: true,
1130
+ result,
1131
+ ...(parsed ? { parsed: { text: parsed.text, costUsd: parsed.costUsd, usage: parsed.usage, model: parsed.model, numTurns: parsed.numTurns } } : {})
1132
+ }, null, 2)
1133
+ }]
1134
+ };
1135
+ });
1136
+ server.tool("llm_job_cancel", {
1137
+ jobId: z.string().describe("Async job ID from *_request_async")
1138
+ }, async ({ jobId }) => {
1139
+ const cancel = asyncJobManager.cancelJob(jobId);
1140
+ if (!cancel.canceled) {
1141
+ return {
1142
+ content: [{
1143
+ type: "text",
1144
+ text: JSON.stringify({
1145
+ success: false,
1146
+ jobId,
1147
+ reason: cancel.reason || "Unable to cancel"
1148
+ }, null, 2)
1149
+ }],
1150
+ isError: true
1151
+ };
1152
+ }
1153
+ return {
1154
+ content: [{
1155
+ type: "text",
1156
+ text: JSON.stringify({
1157
+ success: true,
1158
+ jobId
1159
+ }, null, 2)
1160
+ }]
1161
+ };
1162
+ });
1163
+ server.tool("llm_process_health", {}, async () => {
1164
+ const health = asyncJobManager.getJobHealth();
1165
+ return {
1166
+ content: [{
1167
+ type: "text",
1168
+ text: JSON.stringify({ success: true, ...health }, null, 2)
1169
+ }]
1170
+ };
1171
+ });
1172
+ //──────────────────────────────────────────────────────────────────────────────
1173
+ // Approval Audit Tools
1174
+ //──────────────────────────────────────────────────────────────────────────────
1175
+ server.tool("approval_list", {
1176
+ limit: z.number().int().min(1).max(500).default(50).describe("Max number of approval records"),
1177
+ cli: z.enum(["claude", "codex", "gemini"]).optional().describe("Optional CLI filter")
1178
+ }, async ({ limit, cli }) => {
1179
+ const approvals = approvalManager.list(limit, cli);
1180
+ return {
1181
+ content: [{
1182
+ type: "text",
1183
+ text: JSON.stringify({
1184
+ success: true,
1185
+ count: approvals.length,
1186
+ approvals
1187
+ }, null, 2)
1188
+ }]
1189
+ };
1190
+ });
1191
+ //──────────────────────────────────────────────────────────────────────────────
1192
+ // List Models Tool
1193
+ //──────────────────────────────────────────────────────────────────────────────
1194
+ server.tool("list_models", {
1195
+ cli: z.preprocess((value) => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini"]).optional()).describe("CLI filter (claude|codex|gemini)")
1196
+ }, async ({ cli }) => {
1197
+ const cliInfo = getCliInfo();
1198
+ const result = cli ? { [cli]: cliInfo[cli] } : cliInfo;
1199
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
1200
+ });
1201
+ //──────────────────────────────────────────────────────────────────────────────
1202
+ // Session Management Tools
1203
+ //──────────────────────────────────────────────────────────────────────────────
1204
+ server.tool("session_create", {
1205
+ cli: z.enum(["claude", "codex", "gemini"]).describe("CLI type (claude|codex|gemini)"),
1206
+ description: z.string().optional().describe("Session description"),
1207
+ setAsActive: z.boolean().default(true).describe("Set as active session")
1208
+ }, async ({ cli, description, setAsActive }) => {
1209
+ try {
1210
+ const session = await sessionManager.createSession(cli, description);
1211
+ if (setAsActive) {
1212
+ await sessionManager.setActiveSession(cli, session.id);
1213
+ }
1214
+ logger.info(`Created new ${cli} session: ${session.id}`);
1215
+ return {
1216
+ content: [{
1217
+ type: "text",
1218
+ text: JSON.stringify({
1219
+ success: true,
1220
+ session: {
1221
+ id: session.id,
1222
+ cli: session.cli,
1223
+ description: session.description,
1224
+ createdAt: session.createdAt,
1225
+ isActive: setAsActive
1226
+ }
1227
+ }, null, 2)
1228
+ }]
1229
+ };
1230
+ }
1231
+ catch (error) {
1232
+ return createErrorResponse("session_create", 1, "", undefined, error);
1233
+ }
1234
+ });
1235
+ server.tool("session_list", {
1236
+ cli: z.enum(["claude", "codex", "gemini"]).optional().describe("CLI filter (claude|codex|gemini)")
1237
+ }, async ({ cli }) => {
1238
+ try {
1239
+ const sessions = await sessionManager.listSessions(cli);
1240
+ const activeSessions = {
1241
+ claude: await sessionManager.getActiveSession("claude"),
1242
+ codex: await sessionManager.getActiveSession("codex"),
1243
+ gemini: await sessionManager.getActiveSession("gemini")
1244
+ };
1245
+ const sessionList = sessions.map(s => ({
1246
+ id: s.id,
1247
+ cli: s.cli,
1248
+ description: s.description,
1249
+ createdAt: s.createdAt,
1250
+ lastUsedAt: s.lastUsedAt,
1251
+ isActive: activeSessions[s.cli]?.id === s.id
1252
+ }));
1253
+ return {
1254
+ content: [{
1255
+ type: "text",
1256
+ text: JSON.stringify({
1257
+ total: sessionList.length,
1258
+ sessions: sessionList,
1259
+ activeSessions: {
1260
+ claude: activeSessions.claude?.id || null,
1261
+ codex: activeSessions.codex?.id || null,
1262
+ gemini: activeSessions.gemini?.id || null
1263
+ }
1264
+ }, null, 2)
1265
+ }]
1266
+ };
1267
+ }
1268
+ catch (error) {
1269
+ return createErrorResponse("session_list", 1, "", undefined, error);
1270
+ }
1271
+ });
1272
+ server.tool("session_set_active", {
1273
+ cli: z.enum(["claude", "codex", "gemini"]).describe("CLI type (claude|codex|gemini)"),
1274
+ sessionId: z.string().nullable().describe("Session ID (null to clear)")
1275
+ }, async ({ cli, sessionId }) => {
1276
+ try {
1277
+ const success = await sessionManager.setActiveSession(cli, sessionId || null);
1278
+ if (!success) {
1279
+ return {
1280
+ content: [{
1281
+ type: "text",
1282
+ text: JSON.stringify({
1283
+ success: false,
1284
+ error: "Session not found or does not belong to the specified CLI"
1285
+ }, null, 2)
1286
+ }],
1287
+ isError: true
1288
+ };
1289
+ }
1290
+ logger.info(`Set active ${cli} session to: ${sessionId}`);
1291
+ return {
1292
+ content: [{
1293
+ type: "text",
1294
+ text: JSON.stringify({
1295
+ success: true,
1296
+ cli,
1297
+ activeSessionId: sessionId
1298
+ }, null, 2)
1299
+ }]
1300
+ };
1301
+ }
1302
+ catch (error) {
1303
+ return createErrorResponse("session_set_active", 1, "", undefined, error);
1304
+ }
1305
+ });
1306
+ server.tool("session_delete", {
1307
+ sessionId: z.string().describe("Session ID")
1308
+ }, async ({ sessionId }) => {
1309
+ try {
1310
+ const session = await sessionManager.getSession(sessionId);
1311
+ if (!session) {
1312
+ return {
1313
+ content: [{
1314
+ type: "text",
1315
+ text: JSON.stringify({
1316
+ success: false,
1317
+ error: "Session not found"
1318
+ }, null, 2)
1319
+ }],
1320
+ isError: true
1321
+ };
1322
+ }
1323
+ const success = await sessionManager.deleteSession(sessionId);
1324
+ logger.info(`Deleted session: ${sessionId}`);
1325
+ return {
1326
+ content: [{
1327
+ type: "text",
1328
+ text: JSON.stringify({
1329
+ success,
1330
+ deletedSession: {
1331
+ id: session.id,
1332
+ cli: session.cli,
1333
+ description: session.description
1334
+ }
1335
+ }, null, 2)
1336
+ }]
1337
+ };
1338
+ }
1339
+ catch (error) {
1340
+ return createErrorResponse("session_delete", 1, "", undefined, error);
1341
+ }
1342
+ });
1343
+ server.tool("session_get", {
1344
+ sessionId: z.string().describe("Session ID")
1345
+ }, async ({ sessionId }) => {
1346
+ try {
1347
+ const session = await sessionManager.getSession(sessionId);
1348
+ if (!session) {
1349
+ return {
1350
+ content: [{
1351
+ type: "text",
1352
+ text: JSON.stringify({
1353
+ success: false,
1354
+ error: "Session not found"
1355
+ }, null, 2)
1356
+ }],
1357
+ isError: true
1358
+ };
1359
+ }
1360
+ const activeSession = await sessionManager.getActiveSession(session.cli);
1361
+ return {
1362
+ content: [{
1363
+ type: "text",
1364
+ text: JSON.stringify({
1365
+ success: true,
1366
+ session: {
1367
+ ...session,
1368
+ isActive: activeSession?.id === session.id
1369
+ }
1370
+ }, null, 2)
1371
+ }]
1372
+ };
1373
+ }
1374
+ catch (error) {
1375
+ return createErrorResponse("session_get", 1, "", undefined, error);
1376
+ }
1377
+ });
1378
+ server.tool("session_clear_all", {
1379
+ cli: z.enum(["claude", "codex", "gemini"]).optional().describe("CLI filter (claude|codex|gemini)")
1380
+ }, async ({ cli }) => {
1381
+ try {
1382
+ const count = await sessionManager.clearAllSessions(cli);
1383
+ logger.info(`Cleared ${count} sessions${cli ? ` for ${cli}` : ''}`);
1384
+ return {
1385
+ content: [{
1386
+ type: "text",
1387
+ text: JSON.stringify({
1388
+ success: true,
1389
+ deletedCount: count,
1390
+ cli: cli || "all"
1391
+ }, null, 2)
1392
+ }]
1393
+ };
1394
+ }
1395
+ catch (error) {
1396
+ return createErrorResponse("session_clear_all", 1, "", undefined, error);
1397
+ }
1398
+ });
1399
+ //──────────────────────────────────────────────────────────────────────────────
1400
+ // Async Initialization
1401
+ //──────────────────────────────────────────────────────────────────────────────
1402
+ async function initializeSessionManager() {
1403
+ const config = loadConfig();
1404
+ if (config.database && config.redis) {
1405
+ logger.info("Initializing PostgreSQL + Redis session manager");
1406
+ const { createDatabaseConnection } = await import("./db.js");
1407
+ db = await createDatabaseConnection(config, logger);
1408
+ sessionManager = await createSessionManager(config, db, logger);
1409
+ logger.info("PostgreSQL session manager initialized");
1410
+ }
1411
+ else {
1412
+ logger.info("Initializing file-based session manager");
1413
+ sessionManager = await createSessionManager(config, undefined, logger);
1414
+ logger.info("File-based session manager initialized");
1415
+ }
1416
+ resourceProvider = new ResourceProvider(sessionManager, performanceMetrics);
1417
+ }
1418
+ //──────────────────────────────────────────────────────────────────────────────
1419
+ // Health Check Resource (only if using PostgreSQL)
1420
+ //──────────────────────────────────────────────────────────────────────────────
1421
+ function registerHealthResource() {
1422
+ if (db) {
1423
+ server.registerResource("health", "health://status", {
1424
+ title: "🏥 Health Status",
1425
+ description: "DB connectivity and latency",
1426
+ mimeType: "application/json"
1427
+ }, async () => {
1428
+ const health = await checkHealth(db);
1429
+ return {
1430
+ contents: [{
1431
+ uri: "health://status",
1432
+ text: JSON.stringify(health, null, 2),
1433
+ mimeType: "application/json"
1434
+ }]
1435
+ };
1436
+ });
1437
+ logger.info("Health check resource registered");
1438
+ }
1439
+ // Process health resource (always available, not dependent on DB)
1440
+ server.registerResource("process-health", "metrics://process-health", {
1441
+ title: "Process Health",
1442
+ description: "Async job health (CPU, memory, zombie detection)",
1443
+ mimeType: "application/json"
1444
+ }, async (uri) => {
1445
+ const health = asyncJobManager.getJobHealth();
1446
+ return {
1447
+ contents: [{
1448
+ uri: uri.href,
1449
+ mimeType: "application/json",
1450
+ text: JSON.stringify(health, null, 2)
1451
+ }]
1452
+ };
1453
+ });
1454
+ logger.info("Process health resource registered");
1455
+ }
1456
+ //──────────────────────────────────────────────────────────────────────────────
1457
+ // Graceful Shutdown
1458
+ //──────────────────────────────────────────────────────────────────────────────
1459
+ async function shutdown(signal) {
1460
+ logger.info(`Received ${signal}, shutting down gracefully...`);
1461
+ try {
1462
+ // Kill all active process groups (SIGTERM → wait 3s → SIGKILL)
1463
+ await killAllProcessGroups();
1464
+ logger.info("All process groups terminated");
1465
+ await server.close();
1466
+ logger.info("MCP server closed");
1467
+ if (db) {
1468
+ await db.disconnect();
1469
+ logger.info("Database connections closed");
1470
+ }
1471
+ process.exit(0);
1472
+ }
1473
+ catch (error) {
1474
+ logger.error("Error during shutdown:", error);
1475
+ process.exit(1);
1476
+ }
1477
+ }
1478
+ process.on("SIGTERM", () => shutdown("SIGTERM"));
1479
+ process.on("SIGINT", () => shutdown("SIGINT"));
1480
+ //──────────────────────────────────────────────────────────────────────────────
1481
+ // Server Startup
1482
+ //──────────────────────────────────────────────────────────────────────────────
1483
+ async function main() {
1484
+ logger.info("Starting llm-cli-gateway MCP server");
1485
+ // Initialize session manager first
1486
+ await initializeSessionManager();
1487
+ // Register health check resource if using PostgreSQL
1488
+ registerHealthResource();
1489
+ const transport = new StdioServerTransport();
1490
+ await server.connect(transport);
1491
+ logger.info("llm-cli-gateway MCP server connected and ready");
1492
+ }
1493
+ // Guard: only auto-start when run directly (not imported for testing)
1494
+ // Resolve symlinks so `llm-cli-gateway` (npm-linked bin) matches import.meta.url
1495
+ const __entryUrl = process.argv[1]
1496
+ ? new URL(realpathSync(process.argv[1]), "file://").href
1497
+ : "";
1498
+ if (__entryUrl === import.meta.url) {
1499
+ main().catch((error) => {
1500
+ logger.error("Fatal server error:", error);
1501
+ process.exit(1);
1502
+ });
1503
+ }