llm-cli-gateway 1.5.4 → 1.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -2,7 +2,7 @@
2
2
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
3
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
4
  import { randomUUID } from "crypto";
5
- import { readFileSync, readdirSync, realpathSync } from "fs";
5
+ import { readFileSync, readdirSync } from "fs";
6
6
  import { dirname, join } from "path";
7
7
  import { fileURLToPath } from "url";
8
8
  import { z } from "zod";
@@ -14,11 +14,11 @@ import { createSessionManager } from "./session-manager.js";
14
14
  import { ResourceProvider } from "./resources.js";
15
15
  import { PerformanceMetrics } from "./metrics.js";
16
16
  import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse as optimizeResponseText, } from "./optimizer.js";
17
- import { loadConfig } from "./config.js";
17
+ import { loadConfig, loadPersistenceConfig } from "./config.js";
18
18
  import { checkHealth } from "./health.js";
19
19
  import { getCliInfo, resolveModelAlias } from "./model-registry.js";
20
20
  import { AsyncJobManager } from "./async-job-manager.js";
21
- import { JobStore, resolveJobStoreDbPath } from "./job-store.js";
21
+ import { createJobStore } from "./job-store.js";
22
22
  import { ApprovalManager } from "./approval-manager.js";
23
23
  import { checkReviewIntegrity } from "./review-integrity.js";
24
24
  import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES, } from "./claude-mcp-config.js";
@@ -28,6 +28,8 @@ import { getCliVersions, runCliUpgrade } from "./cli-updater.js";
28
28
  import { startHttpGateway } from "./http-transport.js";
29
29
  import { printDoctorJson } from "./doctor.js";
30
30
  import { registerValidationTools } from "./validation-tools.js";
31
+ import { assertUpstreamCliArgs, assertUpstreamCliEnv, buildUpstreamContractReport, } from "./upstream-contracts.js";
32
+ import { entrypointFileURL } from "./entrypoint-url.js";
31
33
  // Simple logger that writes to stderr (stdout is used for MCP protocol)
32
34
  const logger = {
33
35
  info: (message, ...args) => {
@@ -102,7 +104,7 @@ Tools: claude_request, codex_request, gemini_request, grok_request, mistral_requ
102
104
  Validation: validate_with_models, second_opinion, compare_answers, red_team_review, consensus_check, ask_model, synthesize_validation
103
105
  Jobs: llm_job_status, llm_job_result, llm_job_cancel
104
106
  Sessions: session_create, session_list, session_set_active, session_get, session_delete, session_clear_all
105
- Other: list_models, cli_versions, cli_upgrade, approval_list, llm_process_health
107
+ Other: list_models, cli_versions, upstream_contracts, cli_upgrade, approval_list, llm_process_health
106
108
 
107
109
  Key behaviors:
108
110
  - Sync auto-defers at ${SYNC_DEADLINE_MS}ms. Poll deferred jobs via llm_job_status/llm_job_result.
@@ -121,20 +123,18 @@ let db = null;
121
123
  const performanceMetrics = new PerformanceMetrics();
122
124
  let resourceProvider;
123
125
  const flightRecorder = createFlightRecorder(logger);
124
- // Durable job store: persists every async job to ~/.llm-cli-gateway/logs.db so callers
125
- // can collect results across long polling gaps and gateway restarts, and so repeated
126
- // identical requests dedup onto the running/completed job instead of starting over.
126
+ // Resolved persistence config single source of truth for the async-job backend.
127
+ // Driven by ~/.llm-cli-gateway/config.toml (+ deprecated env-var overrides).
128
+ // When backend = "none", the JobStore is null AND *_request_async tools are not
129
+ // registered (see createGatewayServer), making silent in-memory loss
130
+ // structurally impossible.
131
+ const persistenceConfig = loadPersistenceConfig(logger);
127
132
  const jobStore = (() => {
128
- const dbPath = resolveJobStoreDbPath();
129
- if (!dbPath) {
130
- logger.info("Durable job store disabled (LLM_GATEWAY_LOGS_DB=none)");
131
- return null;
132
- }
133
133
  try {
134
- return new JobStore(dbPath, logger);
134
+ return createJobStore(persistenceConfig, logger);
135
135
  }
136
136
  catch (err) {
137
- logger.error("Failed to open durable job store; continuing in-memory only", err);
137
+ logger.error("Failed to open durable job store; async tools will be unavailable", err);
138
138
  return null;
139
139
  }
140
140
  })();
@@ -178,6 +178,7 @@ function resolveGatewayServerRuntime(deps = {}, options = {}) {
178
178
  approvalManager: runtimeApprovalManager,
179
179
  flightRecorder: deps.flightRecorder ?? flightRecorder,
180
180
  logger: runtimeLogger,
181
+ persistence: deps.persistence ?? persistenceConfig,
181
182
  };
182
183
  }
183
184
  // Per-CLI idle timeouts: kill process if no stdout/stderr activity for this duration.
@@ -217,6 +218,14 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, f
217
218
  runtime.logger.error(`awaitJobOrDefer onComplete (${cli}) threw`, err);
218
219
  }
219
220
  };
221
+ try {
222
+ assertUpstreamCliArgs(cli, args);
223
+ assertUpstreamCliEnv(cli, env);
224
+ }
225
+ catch (err) {
226
+ consumeOnComplete();
227
+ throw err;
228
+ }
220
229
  if (SYNC_DEADLINE_MS === 0) {
221
230
  // Disabled — fall through to direct execution.
222
231
  // Note: direct execution bypasses dedup. forceRefresh is implied.
@@ -269,7 +278,7 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, f
269
278
  }
270
279
  return {
271
280
  stdout: result.stdout,
272
- stderr: result.stderr,
281
+ stderr: result.stderr || result.error || "",
273
282
  code: result.exitCode ?? 1,
274
283
  };
275
284
  }
@@ -1362,6 +1371,8 @@ export async function handleGeminiRequestAsync(deps, params) {
1362
1371
  // Start job only after all session I/O succeeds. U23: forward outputFormat
1363
1372
  // so AsyncJobManager records it in the durable store (the manager also
1364
1373
  // surfaces it in the snapshot).
1374
+ assertUpstreamCliArgs("gemini", args);
1375
+ assertUpstreamCliEnv("gemini", undefined);
1365
1376
  const job = deps.asyncJobManager.startJob("gemini", args, corrId, undefined, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh);
1366
1377
  deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
1367
1378
  const asyncResponse = {
@@ -1556,6 +1567,8 @@ export async function handleGrokRequestAsync(deps, params) {
1556
1567
  effectiveSessionId = newSession.id;
1557
1568
  }
1558
1569
  // Start job only after all session I/O succeeds
1570
+ assertUpstreamCliArgs("grok", args);
1571
+ assertUpstreamCliEnv("grok", undefined);
1559
1572
  const job = deps.asyncJobManager.startJob("grok", args, corrId, undefined, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh);
1560
1573
  deps.logger.info(`[${corrId}] grok_request_async started job ${job.id}`);
1561
1574
  const asyncResponse = {
@@ -1742,6 +1755,8 @@ export async function handleMistralRequestAsync(deps, params) {
1742
1755
  const newSession = await deps.sessionManager.createSession("mistral", "Mistral Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
1743
1756
  effectiveSessionId = newSession.id;
1744
1757
  }
1758
+ assertUpstreamCliArgs("mistral", args);
1759
+ assertUpstreamCliEnv("mistral", mistralEnv);
1745
1760
  const job = deps.asyncJobManager.startJob("mistral", args, corrId, undefined, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, mistralEnv);
1746
1761
  deps.logger.info(`[${corrId}] mistral_request_async started job ${job.id}`);
1747
1762
  const asyncResponse = {
@@ -1840,6 +1855,8 @@ export async function handleCodexRequestAsync(deps, params) {
1840
1855
  }
1841
1856
  // Start job only after all session I/O succeeds. If startJob throws before
1842
1857
  // registering the record, ownership stays here and we run it in the catch.
1858
+ assertUpstreamCliArgs("codex", args);
1859
+ assertUpstreamCliEnv("codex", undefined);
1843
1860
  let job;
1844
1861
  try {
1845
1862
  job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup);
@@ -1886,7 +1903,24 @@ export async function handleCodexRequestAsync(deps, params) {
1886
1903
  //──────────────────────────────────────────────────────────────────────────────
1887
1904
  export function createGatewayServer(deps = {}) {
1888
1905
  const runtime = resolveGatewayServerRuntime(deps, { isolateState: true });
1889
- const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger } = runtime;
1906
+ const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger, persistence, } = runtime;
1907
+ // Structural invariant: tools register iff ALL THREE conditions hold:
1908
+ // (1) persistence.backend !== "none" — the operator/config has not
1909
+ // explicitly disabled durable persistence;
1910
+ // (2) persistence.asyncJobsEnabled === true — the derived opt-in flag
1911
+ // agrees (loadPersistenceConfig sets this iff backend is one of
1912
+ // sqlite/postgres/memory);
1913
+ // (3) asyncJobManager.hasStore() === true — the runtime manager
1914
+ // actually has a store attached (isolate-mode runtimes use null).
1915
+ //
1916
+ // Each guard closes a distinct re-entry path for the silent-loss footgun:
1917
+ // - Without (1), a caller can inject {backend:'none', asyncJobsEnabled:true}
1918
+ // and re-advertise the async tools while reporting backend='none' in
1919
+ // llm_process_health — exactly contradicting SPEC CLAIM 4f.
1920
+ // - Without (2), config that opts out is ignored.
1921
+ // - Without (3), a null-store manager (isolate-mode / HTTP per-session)
1922
+ // accepts registrations that have nowhere to persist results.
1923
+ const asyncJobsEnabled = persistence.backend !== "none" && persistence.asyncJobsEnabled && asyncJobManager.hasStore();
1890
1924
  const server = newGatewayMcpServer();
1891
1925
  registerBaseResources(server, runtime);
1892
1926
  registerValidationTools(server, { asyncJobManager });
@@ -2736,686 +2770,712 @@ export function createGatewayServer(deps = {}) {
2736
2770
  });
2737
2771
  //──────────────────────────────────────────────────────────────────────────────
2738
2772
  // Async Long-Running Job Tools (No Time-Bound LLM Execution)
2773
+ //
2774
+ // STRUCTURAL INVARIANT: these tools are only registered when a real job
2775
+ // store is attached (`persistence.asyncJobsEnabled === true`). When the
2776
+ // operator has configured `[persistence].backend = "none"`, none of the
2777
+ // *_request_async / llm_job_* tools exist in the MCP tool list at all —
2778
+ // orchestrating agents get a clean "tool not found" signal at connect
2779
+ // time instead of silent in-memory loss after the 1-hour TTL.
2739
2780
  //──────────────────────────────────────────────────────────────────────────────
2740
- server.tool("claude_request_async", {
2741
- prompt: z
2742
- .string()
2743
- .min(1, "Prompt cannot be empty")
2744
- .max(100000, "Prompt too long (max 100k chars)")
2745
- .describe("Prompt text for Claude"),
2746
- model: z
2747
- .string()
2748
- .optional()
2749
- .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
2750
- outputFormat: z
2751
- .enum(["text", "json", "stream-json"])
2752
- .default("text")
2753
- .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
2754
- sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
2755
- continueSession: z.boolean().default(false).describe("Continue active session"),
2756
- createNewSession: z.boolean().default(false).describe("Force new session"),
2757
- allowedTools: z
2758
- .array(z.string())
2759
- .optional()
2760
- .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
2761
- disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
2762
- dangerouslySkipPermissions: z
2763
- .boolean()
2764
- .default(false)
2765
- .describe('DEPRECATED: prefer `permissionMode: "bypassPermissions"`. Maps to it when `permissionMode` is unset.'),
2766
- permissionMode: z
2767
- .enum(CLAUDE_PERMISSION_MODES)
2768
- .optional()
2769
- .describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op."),
2770
- // U25 Claude high-impact features
2771
- agent: z
2772
- .string()
2773
- .optional()
2774
- .describe("Claude --agent: dispatch to a named single sub-agent."),
2775
- agents: z
2776
- .record(z.record(z.unknown()))
2777
- .optional()
2778
- .describe("Claude --agents: inline JSON map of agent name → { description, prompt, tools?, model? }."),
2779
- forkSession: z
2780
- .boolean()
2781
- .optional()
2782
- .describe("Claude --fork-session: branch from an existing session into a fresh fork."),
2783
- systemPrompt: z
2784
- .string()
2785
- .optional()
2786
- .describe("Claude --system-prompt: replace the system prompt entirely."),
2787
- appendSystemPrompt: z
2788
- .string()
2789
- .optional()
2790
- .describe("Claude --append-system-prompt: append to the existing system prompt."),
2791
- maxBudgetUsd: z
2792
- .number()
2793
- .positive()
2794
- .optional()
2795
- .describe("Claude --max-budget-usd: spend cap for this request in USD."),
2796
- maxTurns: z
2797
- .number()
2798
- .int()
2799
- .positive()
2800
- .optional()
2801
- .describe("Claude --max-turns: cap on agent loop iterations."),
2802
- effort: z
2803
- .enum(CLAUDE_EFFORT_LEVELS)
2804
- .optional()
2805
- .describe("Claude --effort: low|medium|high|xhigh|max."),
2806
- excludeDynamicSystemPromptSections: z
2807
- .boolean()
2808
- .optional()
2809
- .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
2810
- approvalStrategy: z
2811
- .enum(["legacy", "mcp_managed"])
2812
- .default("legacy")
2813
- .describe("Approval strategy"),
2814
- approvalPolicy: z
2815
- .enum(["strict", "balanced", "permissive"])
2816
- .optional()
2817
- .describe("Approval policy override"),
2818
- mcpServers: z
2819
- .array(MCP_SERVER_ENUM)
2820
- .default(["sqry"])
2821
- .describe("MCP servers exposed to Claude"),
2822
- strictMcpConfig: z
2823
- .boolean()
2824
- .default(false)
2825
- .describe("Restrict Claude to provided MCP config only"),
2826
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2827
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2828
- idleTimeoutMs: z
2829
- .number()
2830
- .int()
2831
- .min(30_000)
2832
- .max(3_600_000)
2833
- .optional()
2834
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2835
- forceRefresh: z
2836
- .boolean()
2837
- .default(false)
2838
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2839
- }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
2840
- if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
2841
- return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
2842
- }
2843
- const prep = prepareClaudeRequest({
2844
- prompt,
2845
- model,
2846
- outputFormat,
2847
- allowedTools,
2848
- disallowedTools,
2849
- dangerouslySkipPermissions,
2850
- permissionMode,
2851
- approvalStrategy,
2852
- approvalPolicy,
2853
- mcpServers,
2854
- strictMcpConfig,
2855
- correlationId,
2856
- optimizePrompt,
2857
- operation: "claude_request_async",
2858
- agent,
2859
- agents,
2860
- forkSession,
2861
- systemPrompt,
2862
- appendSystemPrompt,
2863
- maxBudgetUsd,
2864
- maxTurns,
2865
- effort,
2866
- excludeDynamicSystemPromptSections,
2867
- }, runtime);
2868
- if (!("args" in prep))
2869
- return prep;
2870
- const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
2871
- try {
2872
- // Session management (before job start for async)
2873
- let effectiveSessionId = sessionId;
2874
- let useContinue = continueSession;
2875
- const activeSession = await sessionManager.getActiveSession("claude");
2876
- if (!createNewSession && !continueSession && !sessionId && activeSession) {
2877
- effectiveSessionId = activeSession.id;
2878
- useContinue = true;
2781
+ if (asyncJobsEnabled) {
2782
+ server.tool("claude_request_async", {
2783
+ prompt: z
2784
+ .string()
2785
+ .min(1, "Prompt cannot be empty")
2786
+ .max(100000, "Prompt too long (max 100k chars)")
2787
+ .describe("Prompt text for Claude"),
2788
+ model: z
2789
+ .string()
2790
+ .optional()
2791
+ .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
2792
+ outputFormat: z
2793
+ .enum(["text", "json", "stream-json"])
2794
+ .default("text")
2795
+ .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
2796
+ sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
2797
+ continueSession: z.boolean().default(false).describe("Continue active session"),
2798
+ createNewSession: z.boolean().default(false).describe("Force new session"),
2799
+ allowedTools: z
2800
+ .array(z.string())
2801
+ .optional()
2802
+ .describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
2803
+ disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
2804
+ dangerouslySkipPermissions: z
2805
+ .boolean()
2806
+ .default(false)
2807
+ .describe('DEPRECATED: prefer `permissionMode: "bypassPermissions"`. Maps to it when `permissionMode` is unset.'),
2808
+ permissionMode: z
2809
+ .enum(CLAUDE_PERMISSION_MODES)
2810
+ .optional()
2811
+ .describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op."),
2812
+ // U25 — Claude high-impact features
2813
+ agent: z
2814
+ .string()
2815
+ .optional()
2816
+ .describe("Claude --agent: dispatch to a named single sub-agent."),
2817
+ agents: z
2818
+ .record(z.record(z.unknown()))
2819
+ .optional()
2820
+ .describe("Claude --agents: inline JSON map of agent name → { description, prompt, tools?, model? }."),
2821
+ forkSession: z
2822
+ .boolean()
2823
+ .optional()
2824
+ .describe("Claude --fork-session: branch from an existing session into a fresh fork."),
2825
+ systemPrompt: z
2826
+ .string()
2827
+ .optional()
2828
+ .describe("Claude --system-prompt: replace the system prompt entirely."),
2829
+ appendSystemPrompt: z
2830
+ .string()
2831
+ .optional()
2832
+ .describe("Claude --append-system-prompt: append to the existing system prompt."),
2833
+ maxBudgetUsd: z
2834
+ .number()
2835
+ .positive()
2836
+ .optional()
2837
+ .describe("Claude --max-budget-usd: spend cap for this request in USD."),
2838
+ maxTurns: z
2839
+ .number()
2840
+ .int()
2841
+ .positive()
2842
+ .optional()
2843
+ .describe("Claude --max-turns: cap on agent loop iterations."),
2844
+ effort: z
2845
+ .enum(CLAUDE_EFFORT_LEVELS)
2846
+ .optional()
2847
+ .describe("Claude --effort: low|medium|high|xhigh|max."),
2848
+ excludeDynamicSystemPromptSections: z
2849
+ .boolean()
2850
+ .optional()
2851
+ .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
2852
+ approvalStrategy: z
2853
+ .enum(["legacy", "mcp_managed"])
2854
+ .default("legacy")
2855
+ .describe("Approval strategy"),
2856
+ approvalPolicy: z
2857
+ .enum(["strict", "balanced", "permissive"])
2858
+ .optional()
2859
+ .describe("Approval policy override"),
2860
+ mcpServers: z
2861
+ .array(MCP_SERVER_ENUM)
2862
+ .default(["sqry"])
2863
+ .describe("MCP servers exposed to Claude"),
2864
+ strictMcpConfig: z
2865
+ .boolean()
2866
+ .default(false)
2867
+ .describe("Restrict Claude to provided MCP config only"),
2868
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2869
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2870
+ idleTimeoutMs: z
2871
+ .number()
2872
+ .int()
2873
+ .min(30_000)
2874
+ .max(3_600_000)
2875
+ .optional()
2876
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2877
+ forceRefresh: z
2878
+ .boolean()
2879
+ .default(false)
2880
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2881
+ }, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
2882
+ if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
2883
+ return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
2879
2884
  }
2880
- if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
2881
- useContinue = true;
2885
+ const prep = prepareClaudeRequest({
2886
+ prompt,
2887
+ model,
2888
+ outputFormat,
2889
+ allowedTools,
2890
+ disallowedTools,
2891
+ dangerouslySkipPermissions,
2892
+ permissionMode,
2893
+ approvalStrategy,
2894
+ approvalPolicy,
2895
+ mcpServers,
2896
+ strictMcpConfig,
2897
+ correlationId,
2898
+ optimizePrompt,
2899
+ operation: "claude_request_async",
2900
+ agent,
2901
+ agents,
2902
+ forkSession,
2903
+ systemPrompt,
2904
+ appendSystemPrompt,
2905
+ maxBudgetUsd,
2906
+ maxTurns,
2907
+ effort,
2908
+ excludeDynamicSystemPromptSections,
2909
+ }, runtime);
2910
+ if (!("args" in prep))
2911
+ return prep;
2912
+ const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
2913
+ try {
2914
+ // Session management (before job start for async)
2915
+ let effectiveSessionId = sessionId;
2916
+ let useContinue = continueSession;
2917
+ const activeSession = await sessionManager.getActiveSession("claude");
2918
+ if (!createNewSession && !continueSession && !sessionId && activeSession) {
2919
+ effectiveSessionId = activeSession.id;
2920
+ useContinue = true;
2921
+ }
2922
+ if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
2923
+ useContinue = true;
2924
+ }
2925
+ if (useContinue) {
2926
+ args.push("--continue");
2927
+ }
2928
+ else if (effectiveSessionId) {
2929
+ args.push("--session-id", effectiveSessionId);
2930
+ await sessionManager.updateSessionUsage(effectiveSessionId);
2931
+ }
2932
+ if (effectiveSessionId) {
2933
+ const existingSession = await sessionManager.getSession(effectiveSessionId);
2934
+ if (!existingSession) {
2935
+ await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
2936
+ }
2937
+ }
2938
+ // Idle timeout only for stream-json (text/json produce no output until done)
2939
+ const effectiveIdleTimeout = outputFormat === "stream-json"
2940
+ ? resolveIdleTimeout("claude", idleTimeoutMs)
2941
+ : undefined;
2942
+ assertUpstreamCliArgs("claude", args);
2943
+ assertUpstreamCliEnv("claude", undefined);
2944
+ const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat, forceRefresh);
2945
+ logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
2946
+ const asyncResponse = {
2947
+ success: true,
2948
+ job,
2949
+ sessionId: effectiveSessionId || activeSession?.id || null,
2950
+ approval: approvalDecision,
2951
+ mcpServers: {
2952
+ requested: requestedMcpServers,
2953
+ enabled: mcpConfig?.enabled,
2954
+ missing: mcpConfig?.missing,
2955
+ },
2956
+ };
2957
+ if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
2958
+ asyncResponse.reviewIntegrity = prep.reviewIntegrity;
2959
+ }
2960
+ return {
2961
+ content: [
2962
+ {
2963
+ type: "text",
2964
+ text: JSON.stringify(asyncResponse, null, 2),
2965
+ },
2966
+ ],
2967
+ };
2882
2968
  }
2883
- if (useContinue) {
2884
- args.push("--continue");
2969
+ catch (error) {
2970
+ return createErrorResponse("claude_request_async", 1, "", corrId, error);
2885
2971
  }
2886
- else if (effectiveSessionId) {
2887
- args.push("--session-id", effectiveSessionId);
2888
- await sessionManager.updateSessionUsage(effectiveSessionId);
2972
+ });
2973
+ server.tool("codex_request_async", {
2974
+ prompt: z
2975
+ .string()
2976
+ .min(1, "Prompt cannot be empty")
2977
+ .max(100000, "Prompt too long (max 100k chars)")
2978
+ .describe("Prompt text for Codex"),
2979
+ model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
2980
+ fullAuto: z
2981
+ .boolean()
2982
+ .default(false)
2983
+ .describe("DEPRECATED: prefer `sandboxMode` + `askForApproval`. Expands to `--sandbox workspace-write --ask-for-approval never`."),
2984
+ sandboxMode: z
2985
+ .enum(CODEX_SANDBOX_MODES)
2986
+ .optional()
2987
+ .describe("Codex --sandbox: read-only|workspace-write|danger-full-access."),
2988
+ askForApproval: z
2989
+ .enum(CODEX_ASK_FOR_APPROVAL_MODES)
2990
+ .optional()
2991
+ .describe("Codex --ask-for-approval: untrusted|on-request|never."),
2992
+ useLegacyFullAutoFlag: z
2993
+ .boolean()
2994
+ .default(false)
2995
+ .describe("Escape hatch: emit `--full-auto` directly (deprecated)."),
2996
+ dangerouslyBypassApprovalsAndSandbox: z
2997
+ .boolean()
2998
+ .default(false)
2999
+ .describe("Run Codex without approvals/sandbox"),
3000
+ approvalStrategy: z
3001
+ .enum(["legacy", "mcp_managed"])
3002
+ .default("legacy")
3003
+ .describe("Approval strategy"),
3004
+ approvalPolicy: z
3005
+ .enum(["strict", "balanced", "permissive"])
3006
+ .optional()
3007
+ .describe("Approval policy override"),
3008
+ mcpServers: z
3009
+ .array(MCP_SERVER_ENUM)
3010
+ .default(["sqry"])
3011
+ .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
3012
+ sessionId: z
3013
+ .string()
3014
+ .optional()
3015
+ .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
3016
+ resumeLatest: z
3017
+ .boolean()
3018
+ .default(false)
3019
+ .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
3020
+ createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
3021
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3022
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3023
+ idleTimeoutMs: z
3024
+ .number()
3025
+ .int()
3026
+ .min(30_000)
3027
+ .max(3_600_000)
3028
+ .optional()
3029
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3030
+ forceRefresh: z
3031
+ .boolean()
3032
+ .default(false)
3033
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3034
+ // U23: emit `--json` to enable JSONL event-stream parsing for token usage.
3035
+ outputFormat: z
3036
+ .enum(["text", "json"])
3037
+ .default("text")
3038
+ .describe("Codex output format. `json` emits --json (JSONL events) for token usage extraction."),
3039
+ // U26: high-impact feature flags. All optional.
3040
+ outputSchema: z
3041
+ .union([z.string(), z.record(z.unknown())])
3042
+ .optional()
3043
+ .describe("Codex --output-schema. Pass a path (string) or an inline JSON Schema object."),
3044
+ search: z.boolean().optional().describe("Emit Codex --search to enable web search."),
3045
+ profile: z.string().optional().describe("Codex --profile <name>."),
3046
+ configOverrides: CODEX_CONFIG_OVERRIDES_SCHEMA.describe("Codex -c key=value overrides. Keys: /^[a-zA-Z0-9._]+$/. Values: no CR/LF."),
3047
+ ephemeral: z.boolean().optional().describe("Codex --ephemeral."),
3048
+ images: z.array(z.string()).optional().describe("Codex -i <path>: image attachments."),
3049
+ ignoreUserConfig: z.boolean().optional().describe("Codex --ignore-user-config."),
3050
+ ignoreRules: z.boolean().optional().describe("Codex --ignore-rules."),
3051
+ }, async ({ prompt, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
3052
+ return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3053
+ prompt,
3054
+ model,
3055
+ fullAuto,
3056
+ sandboxMode,
3057
+ askForApproval,
3058
+ useLegacyFullAutoFlag,
3059
+ dangerouslyBypassApprovalsAndSandbox,
3060
+ approvalStrategy,
3061
+ approvalPolicy,
3062
+ mcpServers,
3063
+ sessionId,
3064
+ resumeLatest,
3065
+ createNewSession,
3066
+ correlationId,
3067
+ optimizePrompt,
3068
+ idleTimeoutMs,
3069
+ forceRefresh,
3070
+ outputFormat,
3071
+ outputSchema,
3072
+ search,
3073
+ profile,
3074
+ configOverrides,
3075
+ ephemeral,
3076
+ images,
3077
+ ignoreUserConfig,
3078
+ ignoreRules,
3079
+ });
3080
+ });
3081
+ server.tool("gemini_request_async", {
3082
+ prompt: z
3083
+ .string()
3084
+ .min(1, "Prompt cannot be empty")
3085
+ .max(100000, "Prompt too long (max 100k chars)")
3086
+ .describe("Prompt text for Gemini"),
3087
+ model: z
3088
+ .string()
3089
+ .optional()
3090
+ .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
3091
+ sessionId: z
3092
+ .string()
3093
+ .optional()
3094
+ .describe("Session ID (user-provided CLI handle for --resume)"),
3095
+ resumeLatest: z.boolean().default(false).describe("Resume latest session"),
3096
+ createNewSession: z.boolean().default(false).describe("Force new session"),
3097
+ approvalMode: z
3098
+ .enum(GEMINI_APPROVAL_MODES)
3099
+ .optional()
3100
+ .describe("Approval: default|auto_edit|yolo|plan"),
3101
+ approvalStrategy: z
3102
+ .enum(["legacy", "mcp_managed"])
3103
+ .default("legacy")
3104
+ .describe("Approval strategy"),
3105
+ approvalPolicy: z
3106
+ .enum(["strict", "balanced", "permissive"])
3107
+ .optional()
3108
+ .describe("Approval policy override"),
3109
+ mcpServers: z
3110
+ .array(MCP_SERVER_ENUM)
3111
+ .default(["sqry"])
3112
+ .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
3113
+ allowedTools: z
3114
+ .array(z.string())
3115
+ .optional()
3116
+ .describe("Allowed tools (['Write','Edit','Bash'])"),
3117
+ includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
3118
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3119
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3120
+ idleTimeoutMs: z
3121
+ .number()
3122
+ .int()
3123
+ .min(30_000)
3124
+ .max(3_600_000)
3125
+ .optional()
3126
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3127
+ forceRefresh: z
3128
+ .boolean()
3129
+ .default(false)
3130
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3131
+ // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
3132
+ // remains text so existing callers see no behavior change.
3133
+ outputFormat: z
3134
+ .enum(["text", "json"])
3135
+ .default("text")
3136
+ .describe("Gemini output format. `json` emits `-o json` so usageMetadata is parsed and reported."),
3137
+ sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
3138
+ policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
3139
+ adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
3140
+ attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
3141
+ }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
3142
+ return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3143
+ prompt,
3144
+ model,
3145
+ sessionId,
3146
+ resumeLatest,
3147
+ createNewSession,
3148
+ approvalMode,
3149
+ approvalStrategy,
3150
+ approvalPolicy,
3151
+ mcpServers,
3152
+ allowedTools,
3153
+ includeDirs,
3154
+ correlationId,
3155
+ optimizePrompt,
3156
+ idleTimeoutMs,
3157
+ forceRefresh,
3158
+ outputFormat,
3159
+ sandbox,
3160
+ policyFiles,
3161
+ adminPolicyFiles,
3162
+ attachments,
3163
+ });
3164
+ });
3165
+ server.tool("grok_request_async", {
3166
+ prompt: z
3167
+ .string()
3168
+ .min(1, "Prompt cannot be empty")
3169
+ .max(100000, "Prompt too long (max 100k chars)")
3170
+ .describe("Prompt text for Grok"),
3171
+ model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
3172
+ outputFormat: z
3173
+ .enum(["plain", "json", "streaming-json"])
3174
+ .optional()
3175
+ .describe("Output format (plain|json|streaming-json). Grok default is plain."),
3176
+ sessionId: z
3177
+ .string()
3178
+ .optional()
3179
+ .describe("Session ID (user-provided CLI handle for --resume)"),
3180
+ resumeLatest: z
3181
+ .boolean()
3182
+ .default(false)
3183
+ .describe("Resume most recent Grok session in cwd (--continue)"),
3184
+ createNewSession: z.boolean().default(false).describe("Force new session"),
3185
+ alwaysApprove: z
3186
+ .boolean()
3187
+ .default(false)
3188
+ .describe("Auto-approve all tool executions (--always-approve)"),
3189
+ permissionMode: z
3190
+ .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
3191
+ .optional()
3192
+ .describe("Grok permission mode"),
3193
+ effort: z
3194
+ .enum(["low", "medium", "high", "xhigh", "max"])
3195
+ .optional()
3196
+ .describe("Grok effort level"),
3197
+ reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
3198
+ approvalStrategy: z
3199
+ .enum(["legacy", "mcp_managed"])
3200
+ .default("legacy")
3201
+ .describe("Approval strategy"),
3202
+ approvalPolicy: z
3203
+ .enum(["strict", "balanced", "permissive"])
3204
+ .optional()
3205
+ .describe("Approval policy override"),
3206
+ mcpServers: z
3207
+ .array(MCP_SERVER_ENUM)
3208
+ .default(["sqry"])
3209
+ .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
3210
+ allowedTools: z
3211
+ .array(z.string())
3212
+ .optional()
3213
+ .describe("Allowed built-in tools (passed as --tools comma list)"),
3214
+ disallowedTools: z
3215
+ .array(z.string())
3216
+ .optional()
3217
+ .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
3218
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3219
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3220
+ idleTimeoutMs: z
3221
+ .number()
3222
+ .int()
3223
+ .min(30_000)
3224
+ .max(3_600_000)
3225
+ .optional()
3226
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3227
+ forceRefresh: z
3228
+ .boolean()
3229
+ .default(false)
3230
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3231
+ }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3232
+ return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3233
+ prompt,
3234
+ model,
3235
+ outputFormat,
3236
+ sessionId,
3237
+ resumeLatest,
3238
+ createNewSession,
3239
+ alwaysApprove,
3240
+ permissionMode,
3241
+ effort,
3242
+ reasoningEffort,
3243
+ approvalStrategy,
3244
+ approvalPolicy,
3245
+ mcpServers,
3246
+ allowedTools,
3247
+ disallowedTools,
3248
+ correlationId,
3249
+ optimizePrompt,
3250
+ idleTimeoutMs,
3251
+ forceRefresh,
3252
+ });
3253
+ });
3254
+ server.tool("mistral_request_async", {
3255
+ prompt: z
3256
+ .string()
3257
+ .min(1, "Prompt cannot be empty")
3258
+ .max(100000, "Prompt too long (max 100k chars)")
3259
+ .describe("Prompt text for Mistral Vibe"),
3260
+ model: z
3261
+ .string()
3262
+ .optional()
3263
+ .describe("Model alias (resolved into VIBE_ACTIVE_MODEL env var — Vibe has no --model flag)"),
3264
+ outputFormat: z
3265
+ .enum(["plain", "json", "stream-json"])
3266
+ .optional()
3267
+ .describe("Output format (plain|json|stream-json). Vibe default is plain."),
3268
+ sessionId: z
3269
+ .string()
3270
+ .optional()
3271
+ .describe("Session ID (user-provided CLI handle for --resume). Requires [session_logging] enabled = true in ~/.vibe/config.toml."),
3272
+ resumeLatest: z
3273
+ .boolean()
3274
+ .default(false)
3275
+ .describe("Resume most recent Vibe session in cwd (--continue)"),
3276
+ createNewSession: z.boolean().default(false).describe("Force new session"),
3277
+ permissionMode: z
3278
+ .enum(MISTRAL_AGENT_MODES)
3279
+ .optional()
3280
+ .describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
3281
+ effort: z
3282
+ .enum(["low", "medium", "high", "xhigh", "max"])
3283
+ .optional()
3284
+ .describe("Vibe effort level"),
3285
+ reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
3286
+ approvalStrategy: z
3287
+ .enum(["legacy", "mcp_managed"])
3288
+ .default("legacy")
3289
+ .describe("Approval strategy"),
3290
+ approvalPolicy: z
3291
+ .enum(["strict", "balanced", "permissive"])
3292
+ .optional()
3293
+ .describe("Approval policy override"),
3294
+ mcpServers: z
3295
+ .array(MCP_SERVER_ENUM)
3296
+ .default(["sqry"])
3297
+ .describe("MCP server names for approval tracking (Vibe manages its own MCP config via `vibe mcp`)"),
3298
+ allowedTools: z
3299
+ .array(z.string())
3300
+ .optional()
3301
+ .describe("Allowlist of built-in tools — each emitted as a separate --enabled-tools <tool> flag"),
3302
+ disallowedTools: z
3303
+ .array(z.string())
3304
+ .optional()
3305
+ .describe("Accepted for caller parity; Vibe has no deny-list flag, so values are ignored (a warning is logged)."),
3306
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3307
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3308
+ idleTimeoutMs: z
3309
+ .number()
3310
+ .int()
3311
+ .min(30_000)
3312
+ .max(3_600_000)
3313
+ .optional()
3314
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3315
+ forceRefresh: z
3316
+ .boolean()
3317
+ .default(false)
3318
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3319
+ }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3320
+ return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3321
+ prompt,
3322
+ model,
3323
+ outputFormat,
3324
+ sessionId,
3325
+ resumeLatest,
3326
+ createNewSession,
3327
+ permissionMode,
3328
+ effort,
3329
+ reasoningEffort,
3330
+ approvalStrategy,
3331
+ approvalPolicy,
3332
+ mcpServers,
3333
+ allowedTools,
3334
+ disallowedTools,
3335
+ correlationId,
3336
+ optimizePrompt,
3337
+ idleTimeoutMs,
3338
+ forceRefresh,
3339
+ });
3340
+ });
3341
+ server.tool("llm_job_status", {
3342
+ jobId: z.string().describe("Async job ID from *_request_async"),
3343
+ }, async ({ jobId }) => {
3344
+ const job = asyncJobManager.getJobSnapshot(jobId);
3345
+ if (!job) {
3346
+ return {
3347
+ content: [
3348
+ {
3349
+ type: "text",
3350
+ text: JSON.stringify({
3351
+ success: false,
3352
+ error: "Job not found",
3353
+ jobId,
3354
+ }, null, 2),
3355
+ },
3356
+ ],
3357
+ isError: true,
3358
+ };
2889
3359
  }
2890
- if (effectiveSessionId) {
2891
- const existingSession = await sessionManager.getSession(effectiveSessionId);
2892
- if (!existingSession) {
2893
- await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
2894
- }
3360
+ return {
3361
+ content: [
3362
+ {
3363
+ type: "text",
3364
+ text: JSON.stringify({
3365
+ success: true,
3366
+ job,
3367
+ }, null, 2),
3368
+ },
3369
+ ],
3370
+ };
3371
+ });
3372
+ server.tool("llm_job_result", {
3373
+ jobId: z.string().describe("Async job ID from *_request_async"),
3374
+ maxChars: z
3375
+ .number()
3376
+ .int()
3377
+ .min(1000)
3378
+ .max(2000000)
3379
+ .default(200000)
3380
+ .describe("Max chars returned per stream"),
3381
+ }, async ({ jobId, maxChars }) => {
3382
+ const result = asyncJobManager.getJobResult(jobId, maxChars);
3383
+ if (!result) {
3384
+ return {
3385
+ content: [
3386
+ {
3387
+ type: "text",
3388
+ text: JSON.stringify({
3389
+ success: false,
3390
+ error: "Job not found",
3391
+ jobId,
3392
+ }, null, 2),
3393
+ },
3394
+ ],
3395
+ isError: true,
3396
+ };
2895
3397
  }
2896
- // Idle timeout only for stream-json (text/json produce no output until done)
2897
- const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
2898
- const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat, forceRefresh);
2899
- logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
2900
- const asyncResponse = {
2901
- success: true,
2902
- job,
2903
- sessionId: effectiveSessionId || activeSession?.id || null,
2904
- approval: approvalDecision,
2905
- mcpServers: {
2906
- requested: requestedMcpServers,
2907
- enabled: mcpConfig?.enabled,
2908
- missing: mcpConfig?.missing,
2909
- },
3398
+ // Parse stream-json output for Claude async jobs
3399
+ const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
3400
+ let parsed;
3401
+ if (outputFormat === "stream-json" && result.stdout) {
3402
+ parsed = parseStreamJson(result.stdout);
3403
+ }
3404
+ return {
3405
+ content: [
3406
+ {
3407
+ type: "text",
3408
+ text: JSON.stringify({
3409
+ success: true,
3410
+ result,
3411
+ ...(parsed
3412
+ ? {
3413
+ parsed: {
3414
+ text: parsed.text,
3415
+ costUsd: parsed.costUsd,
3416
+ usage: parsed.usage,
3417
+ model: parsed.model,
3418
+ numTurns: parsed.numTurns,
3419
+ },
3420
+ }
3421
+ : {}),
3422
+ }, null, 2),
3423
+ },
3424
+ ],
2910
3425
  };
2911
- if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
2912
- asyncResponse.reviewIntegrity = prep.reviewIntegrity;
3426
+ });
3427
+ server.tool("llm_job_cancel", {
3428
+ jobId: z.string().describe("Async job ID from *_request_async"),
3429
+ }, async ({ jobId }) => {
3430
+ const cancel = asyncJobManager.cancelJob(jobId);
3431
+ if (!cancel.canceled) {
3432
+ return {
3433
+ content: [
3434
+ {
3435
+ type: "text",
3436
+ text: JSON.stringify({
3437
+ success: false,
3438
+ jobId,
3439
+ reason: cancel.reason || "Unable to cancel",
3440
+ }, null, 2),
3441
+ },
3442
+ ],
3443
+ isError: true,
3444
+ };
2913
3445
  }
2914
3446
  return {
2915
3447
  content: [
2916
3448
  {
2917
3449
  type: "text",
2918
- text: JSON.stringify(asyncResponse, null, 2),
3450
+ text: JSON.stringify({
3451
+ success: true,
3452
+ jobId,
3453
+ }, null, 2),
2919
3454
  },
2920
3455
  ],
2921
3456
  };
2922
- }
2923
- catch (error) {
2924
- return createErrorResponse("claude_request_async", 1, "", corrId, error);
2925
- }
2926
- });
2927
- server.tool("codex_request_async", {
2928
- prompt: z
2929
- .string()
2930
- .min(1, "Prompt cannot be empty")
2931
- .max(100000, "Prompt too long (max 100k chars)")
2932
- .describe("Prompt text for Codex"),
2933
- model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
2934
- fullAuto: z
2935
- .boolean()
2936
- .default(false)
2937
- .describe("DEPRECATED: prefer `sandboxMode` + `askForApproval`. Expands to `--sandbox workspace-write --ask-for-approval never`."),
2938
- sandboxMode: z
2939
- .enum(CODEX_SANDBOX_MODES)
2940
- .optional()
2941
- .describe("Codex --sandbox: read-only|workspace-write|danger-full-access."),
2942
- askForApproval: z
2943
- .enum(CODEX_ASK_FOR_APPROVAL_MODES)
2944
- .optional()
2945
- .describe("Codex --ask-for-approval: untrusted|on-request|never."),
2946
- useLegacyFullAutoFlag: z
2947
- .boolean()
2948
- .default(false)
2949
- .describe("Escape hatch: emit `--full-auto` directly (deprecated)."),
2950
- dangerouslyBypassApprovalsAndSandbox: z
2951
- .boolean()
2952
- .default(false)
2953
- .describe("Run Codex without approvals/sandbox"),
2954
- approvalStrategy: z
2955
- .enum(["legacy", "mcp_managed"])
2956
- .default("legacy")
2957
- .describe("Approval strategy"),
2958
- approvalPolicy: z
2959
- .enum(["strict", "balanced", "permissive"])
2960
- .optional()
2961
- .describe("Approval policy override"),
2962
- mcpServers: z
2963
- .array(MCP_SERVER_ENUM)
2964
- .default(["sqry"])
2965
- .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
2966
- sessionId: z
2967
- .string()
2968
- .optional()
2969
- .describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
2970
- resumeLatest: z
2971
- .boolean()
2972
- .default(false)
2973
- .describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
2974
- createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
2975
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2976
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
2977
- idleTimeoutMs: z
2978
- .number()
2979
- .int()
2980
- .min(30_000)
2981
- .max(3_600_000)
2982
- .optional()
2983
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
2984
- forceRefresh: z
2985
- .boolean()
2986
- .default(false)
2987
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2988
- // U23: emit `--json` to enable JSONL event-stream parsing for token usage.
2989
- outputFormat: z
2990
- .enum(["text", "json"])
2991
- .default("text")
2992
- .describe("Codex output format. `json` emits --json (JSONL events) for token usage extraction."),
2993
- // U26: high-impact feature flags. All optional.
2994
- outputSchema: z
2995
- .union([z.string(), z.record(z.unknown())])
2996
- .optional()
2997
- .describe("Codex --output-schema. Pass a path (string) or an inline JSON Schema object."),
2998
- search: z.boolean().optional().describe("Emit Codex --search to enable web search."),
2999
- profile: z.string().optional().describe("Codex --profile <name>."),
3000
- configOverrides: CODEX_CONFIG_OVERRIDES_SCHEMA.describe("Codex -c key=value overrides. Keys: /^[a-zA-Z0-9._]+$/. Values: no CR/LF."),
3001
- ephemeral: z.boolean().optional().describe("Codex --ephemeral."),
3002
- images: z.array(z.string()).optional().describe("Codex -i <path>: image attachments."),
3003
- ignoreUserConfig: z.boolean().optional().describe("Codex --ignore-user-config."),
3004
- ignoreRules: z.boolean().optional().describe("Codex --ignore-rules."),
3005
- }, async ({ prompt, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
3006
- return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3007
- prompt,
3008
- model,
3009
- fullAuto,
3010
- sandboxMode,
3011
- askForApproval,
3012
- useLegacyFullAutoFlag,
3013
- dangerouslyBypassApprovalsAndSandbox,
3014
- approvalStrategy,
3015
- approvalPolicy,
3016
- mcpServers,
3017
- sessionId,
3018
- resumeLatest,
3019
- createNewSession,
3020
- correlationId,
3021
- optimizePrompt,
3022
- idleTimeoutMs,
3023
- forceRefresh,
3024
- outputFormat,
3025
- outputSchema,
3026
- search,
3027
- profile,
3028
- configOverrides,
3029
- ephemeral,
3030
- images,
3031
- ignoreUserConfig,
3032
- ignoreRules,
3033
- });
3034
- });
3035
- server.tool("gemini_request_async", {
3036
- prompt: z
3037
- .string()
3038
- .min(1, "Prompt cannot be empty")
3039
- .max(100000, "Prompt too long (max 100k chars)")
3040
- .describe("Prompt text for Gemini"),
3041
- model: z
3042
- .string()
3043
- .optional()
3044
- .describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
3045
- sessionId: z
3046
- .string()
3047
- .optional()
3048
- .describe("Session ID (user-provided CLI handle for --resume)"),
3049
- resumeLatest: z.boolean().default(false).describe("Resume latest session"),
3050
- createNewSession: z.boolean().default(false).describe("Force new session"),
3051
- approvalMode: z
3052
- .enum(GEMINI_APPROVAL_MODES)
3053
- .optional()
3054
- .describe("Approval: default|auto_edit|yolo|plan"),
3055
- approvalStrategy: z
3056
- .enum(["legacy", "mcp_managed"])
3057
- .default("legacy")
3058
- .describe("Approval strategy"),
3059
- approvalPolicy: z
3060
- .enum(["strict", "balanced", "permissive"])
3061
- .optional()
3062
- .describe("Approval policy override"),
3063
- mcpServers: z
3064
- .array(MCP_SERVER_ENUM)
3065
- .default(["sqry"])
3066
- .describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
3067
- allowedTools: z
3068
- .array(z.string())
3069
- .optional()
3070
- .describe("Allowed tools (['Write','Edit','Bash'])"),
3071
- includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
3072
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3073
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3074
- idleTimeoutMs: z
3075
- .number()
3076
- .int()
3077
- .min(30_000)
3078
- .max(3_600_000)
3079
- .optional()
3080
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3081
- forceRefresh: z
3082
- .boolean()
3083
- .default(false)
3084
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3085
- // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
3086
- // remains text so existing callers see no behavior change.
3087
- outputFormat: z
3088
- .enum(["text", "json"])
3089
- .default("text")
3090
- .describe("Gemini output format. `json` emits `-o json` so usageMetadata is parsed and reported."),
3091
- sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
3092
- policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
3093
- adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
3094
- attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
3095
- }, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
3096
- return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3097
- prompt,
3098
- model,
3099
- sessionId,
3100
- resumeLatest,
3101
- createNewSession,
3102
- approvalMode,
3103
- approvalStrategy,
3104
- approvalPolicy,
3105
- mcpServers,
3106
- allowedTools,
3107
- includeDirs,
3108
- correlationId,
3109
- optimizePrompt,
3110
- idleTimeoutMs,
3111
- forceRefresh,
3112
- outputFormat,
3113
- sandbox,
3114
- policyFiles,
3115
- adminPolicyFiles,
3116
- attachments,
3117
- });
3118
- });
3119
- server.tool("grok_request_async", {
3120
- prompt: z
3121
- .string()
3122
- .min(1, "Prompt cannot be empty")
3123
- .max(100000, "Prompt too long (max 100k chars)")
3124
- .describe("Prompt text for Grok"),
3125
- model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
3126
- outputFormat: z
3127
- .enum(["plain", "json", "streaming-json"])
3128
- .optional()
3129
- .describe("Output format (plain|json|streaming-json). Grok default is plain."),
3130
- sessionId: z
3131
- .string()
3132
- .optional()
3133
- .describe("Session ID (user-provided CLI handle for --resume)"),
3134
- resumeLatest: z
3135
- .boolean()
3136
- .default(false)
3137
- .describe("Resume most recent Grok session in cwd (--continue)"),
3138
- createNewSession: z.boolean().default(false).describe("Force new session"),
3139
- alwaysApprove: z
3140
- .boolean()
3141
- .default(false)
3142
- .describe("Auto-approve all tool executions (--always-approve)"),
3143
- permissionMode: z
3144
- .enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
3145
- .optional()
3146
- .describe("Grok permission mode"),
3147
- effort: z
3148
- .enum(["low", "medium", "high", "xhigh", "max"])
3149
- .optional()
3150
- .describe("Grok effort level"),
3151
- reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
3152
- approvalStrategy: z
3153
- .enum(["legacy", "mcp_managed"])
3154
- .default("legacy")
3155
- .describe("Approval strategy"),
3156
- approvalPolicy: z
3157
- .enum(["strict", "balanced", "permissive"])
3158
- .optional()
3159
- .describe("Approval policy override"),
3160
- mcpServers: z
3161
- .array(MCP_SERVER_ENUM)
3162
- .default(["sqry"])
3163
- .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
3164
- allowedTools: z
3165
- .array(z.string())
3166
- .optional()
3167
- .describe("Allowed built-in tools (passed as --tools comma list)"),
3168
- disallowedTools: z
3169
- .array(z.string())
3170
- .optional()
3171
- .describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
3172
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3173
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3174
- idleTimeoutMs: z
3175
- .number()
3176
- .int()
3177
- .min(30_000)
3178
- .max(3_600_000)
3179
- .optional()
3180
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3181
- forceRefresh: z
3182
- .boolean()
3183
- .default(false)
3184
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3185
- }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3186
- return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3187
- prompt,
3188
- model,
3189
- outputFormat,
3190
- sessionId,
3191
- resumeLatest,
3192
- createNewSession,
3193
- alwaysApprove,
3194
- permissionMode,
3195
- effort,
3196
- reasoningEffort,
3197
- approvalStrategy,
3198
- approvalPolicy,
3199
- mcpServers,
3200
- allowedTools,
3201
- disallowedTools,
3202
- correlationId,
3203
- optimizePrompt,
3204
- idleTimeoutMs,
3205
- forceRefresh,
3206
- });
3207
- });
3208
- server.tool("mistral_request_async", {
3209
- prompt: z
3210
- .string()
3211
- .min(1, "Prompt cannot be empty")
3212
- .max(100000, "Prompt too long (max 100k chars)")
3213
- .describe("Prompt text for Mistral Vibe"),
3214
- model: z
3215
- .string()
3216
- .optional()
3217
- .describe("Model alias (resolved into VIBE_ACTIVE_MODEL env var — Vibe has no --model flag)"),
3218
- outputFormat: z
3219
- .enum(["plain", "json", "stream-json"])
3220
- .optional()
3221
- .describe("Output format (plain|json|stream-json). Vibe default is plain."),
3222
- sessionId: z
3223
- .string()
3224
- .optional()
3225
- .describe("Session ID (user-provided CLI handle for --resume). Requires [session_logging] enabled = true in ~/.vibe/config.toml."),
3226
- resumeLatest: z
3227
- .boolean()
3228
- .default(false)
3229
- .describe("Resume most recent Vibe session in cwd (--continue)"),
3230
- createNewSession: z.boolean().default(false).describe("Force new session"),
3231
- permissionMode: z
3232
- .enum(MISTRAL_AGENT_MODES)
3233
- .optional()
3234
- .describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
3235
- effort: z
3236
- .enum(["low", "medium", "high", "xhigh", "max"])
3237
- .optional()
3238
- .describe("Vibe effort level"),
3239
- reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
3240
- approvalStrategy: z
3241
- .enum(["legacy", "mcp_managed"])
3242
- .default("legacy")
3243
- .describe("Approval strategy"),
3244
- approvalPolicy: z
3245
- .enum(["strict", "balanced", "permissive"])
3246
- .optional()
3247
- .describe("Approval policy override"),
3248
- mcpServers: z
3249
- .array(MCP_SERVER_ENUM)
3250
- .default(["sqry"])
3251
- .describe("MCP server names for approval tracking (Vibe manages its own MCP config via `vibe mcp`)"),
3252
- allowedTools: z
3253
- .array(z.string())
3254
- .optional()
3255
- .describe("Allowlist of built-in tools — each emitted as a separate --enabled-tools <tool> flag"),
3256
- disallowedTools: z
3257
- .array(z.string())
3258
- .optional()
3259
- .describe("Accepted for caller parity; Vibe has no deny-list flag, so values are ignored (a warning is logged)."),
3260
- correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
3261
- optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
3262
- idleTimeoutMs: z
3263
- .number()
3264
- .int()
3265
- .min(30_000)
3266
- .max(3_600_000)
3267
- .optional()
3268
- .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
3269
- forceRefresh: z
3270
- .boolean()
3271
- .default(false)
3272
- .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3273
- }, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3274
- return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3275
- prompt,
3276
- model,
3277
- outputFormat,
3278
- sessionId,
3279
- resumeLatest,
3280
- createNewSession,
3281
- permissionMode,
3282
- effort,
3283
- reasoningEffort,
3284
- approvalStrategy,
3285
- approvalPolicy,
3286
- mcpServers,
3287
- allowedTools,
3288
- disallowedTools,
3289
- correlationId,
3290
- optimizePrompt,
3291
- idleTimeoutMs,
3292
- forceRefresh,
3293
3457
  });
3294
- });
3295
- server.tool("llm_job_status", {
3296
- jobId: z.string().describe("Async job ID from *_request_async"),
3297
- }, async ({ jobId }) => {
3298
- const job = asyncJobManager.getJobSnapshot(jobId);
3299
- if (!job) {
3300
- return {
3301
- content: [
3302
- {
3303
- type: "text",
3304
- text: JSON.stringify({
3305
- success: false,
3306
- error: "Job not found",
3307
- jobId,
3308
- }, null, 2),
3309
- },
3310
- ],
3311
- isError: true,
3312
- };
3313
- }
3314
- return {
3315
- content: [
3316
- {
3317
- type: "text",
3318
- text: JSON.stringify({
3319
- success: true,
3320
- job,
3321
- }, null, 2),
3322
- },
3323
- ],
3324
- };
3325
- });
3326
- server.tool("llm_job_result", {
3327
- jobId: z.string().describe("Async job ID from *_request_async"),
3328
- maxChars: z
3329
- .number()
3330
- .int()
3331
- .min(1000)
3332
- .max(2000000)
3333
- .default(200000)
3334
- .describe("Max chars returned per stream"),
3335
- }, async ({ jobId, maxChars }) => {
3336
- const result = asyncJobManager.getJobResult(jobId, maxChars);
3337
- if (!result) {
3338
- return {
3339
- content: [
3340
- {
3341
- type: "text",
3342
- text: JSON.stringify({
3343
- success: false,
3344
- error: "Job not found",
3345
- jobId,
3346
- }, null, 2),
3347
- },
3348
- ],
3349
- isError: true,
3350
- };
3351
- }
3352
- // Parse stream-json output for Claude async jobs
3353
- const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
3354
- let parsed;
3355
- if (outputFormat === "stream-json" && result.stdout) {
3356
- parsed = parseStreamJson(result.stdout);
3357
- }
3358
- return {
3359
- content: [
3360
- {
3361
- type: "text",
3362
- text: JSON.stringify({
3363
- success: true,
3364
- result,
3365
- ...(parsed
3366
- ? {
3367
- parsed: {
3368
- text: parsed.text,
3369
- costUsd: parsed.costUsd,
3370
- usage: parsed.usage,
3371
- model: parsed.model,
3372
- numTurns: parsed.numTurns,
3373
- },
3374
- }
3375
- : {}),
3376
- }, null, 2),
3377
- },
3378
- ],
3379
- };
3380
- });
3381
- server.tool("llm_job_cancel", {
3382
- jobId: z.string().describe("Async job ID from *_request_async"),
3383
- }, async ({ jobId }) => {
3384
- const cancel = asyncJobManager.cancelJob(jobId);
3385
- if (!cancel.canceled) {
3386
- return {
3387
- content: [
3388
- {
3389
- type: "text",
3390
- text: JSON.stringify({
3391
- success: false,
3392
- jobId,
3393
- reason: cancel.reason || "Unable to cancel",
3394
- }, null, 2),
3395
- },
3396
- ],
3397
- isError: true,
3398
- };
3399
- }
3400
- return {
3401
- content: [
3402
- {
3403
- type: "text",
3404
- text: JSON.stringify({
3405
- success: true,
3406
- jobId,
3407
- }, null, 2),
3408
- },
3409
- ],
3410
- };
3411
- });
3458
+ } // end if (asyncJobsEnabled)
3412
3459
  server.tool("llm_process_health", {}, async () => {
3413
3460
  const health = asyncJobManager.getJobHealth();
3461
+ const persistenceBlock = {
3462
+ backend: persistence.backend,
3463
+ dbPath: persistence.path,
3464
+ dsn: persistence.dsn ? "[redacted]" : null,
3465
+ retentionDays: persistence.retentionDays,
3466
+ dedupWindowMs: persistence.dedupWindowMs,
3467
+ asyncJobsEnabled: persistence.asyncJobsEnabled,
3468
+ acknowledgeEphemeral: persistence.acknowledgeEphemeral,
3469
+ sources: persistence.sources,
3470
+ warning: persistence.asyncJobsEnabled
3471
+ ? null
3472
+ : "Async job persistence is disabled (backend = 'none'). *_request_async tools are NOT registered on this gateway. Set [persistence].backend = 'sqlite' (or 'memory' + acknowledgeEphemeral = true) to enable them.",
3473
+ };
3414
3474
  return {
3415
3475
  content: [
3416
3476
  {
3417
3477
  type: "text",
3418
- text: JSON.stringify({ success: true, ...health }, null, 2),
3478
+ text: JSON.stringify({ success: true, ...health, persistence: persistenceBlock }, null, 2),
3419
3479
  },
3420
3480
  ],
3421
3481
  };
@@ -3470,6 +3530,18 @@ export function createGatewayServer(deps = {}) {
3470
3530
  const versions = await getCliVersions(cli);
3471
3531
  return { content: [{ type: "text", text: JSON.stringify({ versions }, null, 2) }] };
3472
3532
  });
3533
+ server.tool("upstream_contracts", {
3534
+ cli: z
3535
+ .preprocess(value => (value === "" || value === null ? undefined : value), SESSION_PROVIDER_ENUM.optional())
3536
+ .describe("CLI filter (claude|codex|gemini|grok|mistral)"),
3537
+ probeInstalled: z
3538
+ .boolean()
3539
+ .default(false)
3540
+ .describe("When true, run local --help probes and compare advertised flags"),
3541
+ }, async ({ cli, probeInstalled }) => {
3542
+ const report = buildUpstreamContractReport({ cli, probeInstalled });
3543
+ return { content: [{ type: "text", text: JSON.stringify(report, null, 2) }] };
3544
+ });
3473
3545
  server.tool("cli_upgrade", {
3474
3546
  cli: z.enum(["claude", "codex", "gemini", "grok", "mistral"]).describe("CLI to upgrade"),
3475
3547
  target: z
@@ -3849,6 +3921,23 @@ async function main() {
3849
3921
  process.stderr.write("Only doctor --json is supported in this layer.\n");
3850
3922
  process.exit(2);
3851
3923
  }
3924
+ if (args[0] === "contracts") {
3925
+ if (args.includes("--json")) {
3926
+ const cliArg = args.find(arg => arg.startsWith("--cli="))?.split("=")[1];
3927
+ const cli = SESSION_PROVIDER_VALUES.includes(cliArg)
3928
+ ? cliArg
3929
+ : undefined;
3930
+ if (cliArg && !cli) {
3931
+ process.stderr.write(`Unsupported --cli value: ${cliArg}\n`);
3932
+ process.exit(2);
3933
+ }
3934
+ const probeInstalled = args.includes("--probe-installed");
3935
+ process.stdout.write(JSON.stringify(buildUpstreamContractReport({ cli, probeInstalled }), null, 2) + "\n");
3936
+ return;
3937
+ }
3938
+ process.stderr.write("Usage: llm-cli-gateway contracts --json [--cli=claude|codex|gemini|grok|mistral] [--probe-installed]\n");
3939
+ process.exit(2);
3940
+ }
3852
3941
  const transportArg = args.find(arg => arg.startsWith("--transport="));
3853
3942
  const transportMode = transportArg?.split("=")[1] ||
3854
3943
  process.env.LLM_GATEWAY_TRANSPORT ||
@@ -3890,7 +3979,7 @@ async function main() {
3890
3979
  }
3891
3980
  // Guard: only auto-start when run directly (not imported for testing)
3892
3981
  // Resolve symlinks so `llm-cli-gateway` (npm-linked bin) matches import.meta.url
3893
- const __entryUrl = process.argv[1] ? new URL(realpathSync(process.argv[1]), "file://").href : "";
3982
+ const __entryUrl = entrypointFileURL(process.argv[1]);
3894
3983
  if (__entryUrl === import.meta.url) {
3895
3984
  main().catch(error => {
3896
3985
  logger.error("Fatal server error:", error);