llm-cli-gateway 1.5.4 → 1.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +68 -0
- package/README.md +249 -9
- package/dist/async-job-manager.d.ts +8 -0
- package/dist/async-job-manager.js +31 -10
- package/dist/config.d.ts +30 -0
- package/dist/config.js +167 -0
- package/dist/entrypoint-url.d.ts +1 -0
- package/dist/entrypoint-url.js +5 -0
- package/dist/executor.d.ts +7 -1
- package/dist/executor.js +50 -15
- package/dist/index.d.ts +3 -0
- package/dist/index.js +764 -675
- package/dist/job-store.d.ts +118 -2
- package/dist/job-store.js +176 -5
- package/dist/upstream-contracts.d.ts +62 -0
- package/dist/upstream-contracts.js +620 -0
- package/package.json +11 -6
package/dist/index.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
3
3
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
4
4
|
import { randomUUID } from "crypto";
|
|
5
|
-
import { readFileSync, readdirSync
|
|
5
|
+
import { readFileSync, readdirSync } from "fs";
|
|
6
6
|
import { dirname, join } from "path";
|
|
7
7
|
import { fileURLToPath } from "url";
|
|
8
8
|
import { z } from "zod";
|
|
@@ -14,11 +14,11 @@ import { createSessionManager } from "./session-manager.js";
|
|
|
14
14
|
import { ResourceProvider } from "./resources.js";
|
|
15
15
|
import { PerformanceMetrics } from "./metrics.js";
|
|
16
16
|
import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse as optimizeResponseText, } from "./optimizer.js";
|
|
17
|
-
import { loadConfig } from "./config.js";
|
|
17
|
+
import { loadConfig, loadPersistenceConfig } from "./config.js";
|
|
18
18
|
import { checkHealth } from "./health.js";
|
|
19
19
|
import { getCliInfo, resolveModelAlias } from "./model-registry.js";
|
|
20
20
|
import { AsyncJobManager } from "./async-job-manager.js";
|
|
21
|
-
import {
|
|
21
|
+
import { createJobStore } from "./job-store.js";
|
|
22
22
|
import { ApprovalManager } from "./approval-manager.js";
|
|
23
23
|
import { checkReviewIntegrity } from "./review-integrity.js";
|
|
24
24
|
import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES, } from "./claude-mcp-config.js";
|
|
@@ -28,6 +28,8 @@ import { getCliVersions, runCliUpgrade } from "./cli-updater.js";
|
|
|
28
28
|
import { startHttpGateway } from "./http-transport.js";
|
|
29
29
|
import { printDoctorJson } from "./doctor.js";
|
|
30
30
|
import { registerValidationTools } from "./validation-tools.js";
|
|
31
|
+
import { assertUpstreamCliArgs, assertUpstreamCliEnv, buildUpstreamContractReport, } from "./upstream-contracts.js";
|
|
32
|
+
import { entrypointFileURL } from "./entrypoint-url.js";
|
|
31
33
|
// Simple logger that writes to stderr (stdout is used for MCP protocol)
|
|
32
34
|
const logger = {
|
|
33
35
|
info: (message, ...args) => {
|
|
@@ -102,7 +104,7 @@ Tools: claude_request, codex_request, gemini_request, grok_request, mistral_requ
|
|
|
102
104
|
Validation: validate_with_models, second_opinion, compare_answers, red_team_review, consensus_check, ask_model, synthesize_validation
|
|
103
105
|
Jobs: llm_job_status, llm_job_result, llm_job_cancel
|
|
104
106
|
Sessions: session_create, session_list, session_set_active, session_get, session_delete, session_clear_all
|
|
105
|
-
Other: list_models, cli_versions, cli_upgrade, approval_list, llm_process_health
|
|
107
|
+
Other: list_models, cli_versions, upstream_contracts, cli_upgrade, approval_list, llm_process_health
|
|
106
108
|
|
|
107
109
|
Key behaviors:
|
|
108
110
|
- Sync auto-defers at ${SYNC_DEADLINE_MS}ms. Poll deferred jobs via llm_job_status/llm_job_result.
|
|
@@ -121,20 +123,18 @@ let db = null;
|
|
|
121
123
|
const performanceMetrics = new PerformanceMetrics();
|
|
122
124
|
let resourceProvider;
|
|
123
125
|
const flightRecorder = createFlightRecorder(logger);
|
|
124
|
-
//
|
|
125
|
-
//
|
|
126
|
-
//
|
|
126
|
+
// Resolved persistence config — single source of truth for the async-job backend.
|
|
127
|
+
// Driven by ~/.llm-cli-gateway/config.toml (+ deprecated env-var overrides).
|
|
128
|
+
// When backend = "none", the JobStore is null AND *_request_async tools are not
|
|
129
|
+
// registered (see createGatewayServer), making silent in-memory loss
|
|
130
|
+
// structurally impossible.
|
|
131
|
+
const persistenceConfig = loadPersistenceConfig(logger);
|
|
127
132
|
const jobStore = (() => {
|
|
128
|
-
const dbPath = resolveJobStoreDbPath();
|
|
129
|
-
if (!dbPath) {
|
|
130
|
-
logger.info("Durable job store disabled (LLM_GATEWAY_LOGS_DB=none)");
|
|
131
|
-
return null;
|
|
132
|
-
}
|
|
133
133
|
try {
|
|
134
|
-
return
|
|
134
|
+
return createJobStore(persistenceConfig, logger);
|
|
135
135
|
}
|
|
136
136
|
catch (err) {
|
|
137
|
-
logger.error("Failed to open durable job store;
|
|
137
|
+
logger.error("Failed to open durable job store; async tools will be unavailable", err);
|
|
138
138
|
return null;
|
|
139
139
|
}
|
|
140
140
|
})();
|
|
@@ -178,6 +178,7 @@ function resolveGatewayServerRuntime(deps = {}, options = {}) {
|
|
|
178
178
|
approvalManager: runtimeApprovalManager,
|
|
179
179
|
flightRecorder: deps.flightRecorder ?? flightRecorder,
|
|
180
180
|
logger: runtimeLogger,
|
|
181
|
+
persistence: deps.persistence ?? persistenceConfig,
|
|
181
182
|
};
|
|
182
183
|
}
|
|
183
184
|
// Per-CLI idle timeouts: kill process if no stdout/stderr activity for this duration.
|
|
@@ -217,6 +218,14 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, f
|
|
|
217
218
|
runtime.logger.error(`awaitJobOrDefer onComplete (${cli}) threw`, err);
|
|
218
219
|
}
|
|
219
220
|
};
|
|
221
|
+
try {
|
|
222
|
+
assertUpstreamCliArgs(cli, args);
|
|
223
|
+
assertUpstreamCliEnv(cli, env);
|
|
224
|
+
}
|
|
225
|
+
catch (err) {
|
|
226
|
+
consumeOnComplete();
|
|
227
|
+
throw err;
|
|
228
|
+
}
|
|
220
229
|
if (SYNC_DEADLINE_MS === 0) {
|
|
221
230
|
// Disabled — fall through to direct execution.
|
|
222
231
|
// Note: direct execution bypasses dedup. forceRefresh is implied.
|
|
@@ -269,7 +278,7 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, f
|
|
|
269
278
|
}
|
|
270
279
|
return {
|
|
271
280
|
stdout: result.stdout,
|
|
272
|
-
stderr: result.stderr,
|
|
281
|
+
stderr: result.stderr || result.error || "",
|
|
273
282
|
code: result.exitCode ?? 1,
|
|
274
283
|
};
|
|
275
284
|
}
|
|
@@ -1362,6 +1371,8 @@ export async function handleGeminiRequestAsync(deps, params) {
|
|
|
1362
1371
|
// Start job only after all session I/O succeeds. U23: forward outputFormat
|
|
1363
1372
|
// so AsyncJobManager records it in the durable store (the manager also
|
|
1364
1373
|
// surfaces it in the snapshot).
|
|
1374
|
+
assertUpstreamCliArgs("gemini", args);
|
|
1375
|
+
assertUpstreamCliEnv("gemini", undefined);
|
|
1365
1376
|
const job = deps.asyncJobManager.startJob("gemini", args, corrId, undefined, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh);
|
|
1366
1377
|
deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
|
|
1367
1378
|
const asyncResponse = {
|
|
@@ -1556,6 +1567,8 @@ export async function handleGrokRequestAsync(deps, params) {
|
|
|
1556
1567
|
effectiveSessionId = newSession.id;
|
|
1557
1568
|
}
|
|
1558
1569
|
// Start job only after all session I/O succeeds
|
|
1570
|
+
assertUpstreamCliArgs("grok", args);
|
|
1571
|
+
assertUpstreamCliEnv("grok", undefined);
|
|
1559
1572
|
const job = deps.asyncJobManager.startJob("grok", args, corrId, undefined, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh);
|
|
1560
1573
|
deps.logger.info(`[${corrId}] grok_request_async started job ${job.id}`);
|
|
1561
1574
|
const asyncResponse = {
|
|
@@ -1742,6 +1755,8 @@ export async function handleMistralRequestAsync(deps, params) {
|
|
|
1742
1755
|
const newSession = await deps.sessionManager.createSession("mistral", "Mistral Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
|
|
1743
1756
|
effectiveSessionId = newSession.id;
|
|
1744
1757
|
}
|
|
1758
|
+
assertUpstreamCliArgs("mistral", args);
|
|
1759
|
+
assertUpstreamCliEnv("mistral", mistralEnv);
|
|
1745
1760
|
const job = deps.asyncJobManager.startJob("mistral", args, corrId, undefined, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, mistralEnv);
|
|
1746
1761
|
deps.logger.info(`[${corrId}] mistral_request_async started job ${job.id}`);
|
|
1747
1762
|
const asyncResponse = {
|
|
@@ -1840,6 +1855,8 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
1840
1855
|
}
|
|
1841
1856
|
// Start job only after all session I/O succeeds. If startJob throws before
|
|
1842
1857
|
// registering the record, ownership stays here and we run it in the catch.
|
|
1858
|
+
assertUpstreamCliArgs("codex", args);
|
|
1859
|
+
assertUpstreamCliEnv("codex", undefined);
|
|
1843
1860
|
let job;
|
|
1844
1861
|
try {
|
|
1845
1862
|
job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup);
|
|
@@ -1886,7 +1903,24 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
1886
1903
|
//──────────────────────────────────────────────────────────────────────────────
|
|
1887
1904
|
export function createGatewayServer(deps = {}) {
|
|
1888
1905
|
const runtime = resolveGatewayServerRuntime(deps, { isolateState: true });
|
|
1889
|
-
const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger } = runtime;
|
|
1906
|
+
const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger, persistence, } = runtime;
|
|
1907
|
+
// Structural invariant: tools register iff ALL THREE conditions hold:
|
|
1908
|
+
// (1) persistence.backend !== "none" — the operator/config has not
|
|
1909
|
+
// explicitly disabled durable persistence;
|
|
1910
|
+
// (2) persistence.asyncJobsEnabled === true — the derived opt-in flag
|
|
1911
|
+
// agrees (loadPersistenceConfig sets this iff backend is one of
|
|
1912
|
+
// sqlite/postgres/memory);
|
|
1913
|
+
// (3) asyncJobManager.hasStore() === true — the runtime manager
|
|
1914
|
+
// actually has a store attached (isolate-mode runtimes use null).
|
|
1915
|
+
//
|
|
1916
|
+
// Each guard closes a distinct re-entry path for the silent-loss footgun:
|
|
1917
|
+
// - Without (1), a caller can inject {backend:'none', asyncJobsEnabled:true}
|
|
1918
|
+
// and re-advertise the async tools while reporting backend='none' in
|
|
1919
|
+
// llm_process_health — exactly contradicting SPEC CLAIM 4f.
|
|
1920
|
+
// - Without (2), config that opts out is ignored.
|
|
1921
|
+
// - Without (3), a null-store manager (isolate-mode / HTTP per-session)
|
|
1922
|
+
// accepts registrations that have nowhere to persist results.
|
|
1923
|
+
const asyncJobsEnabled = persistence.backend !== "none" && persistence.asyncJobsEnabled && asyncJobManager.hasStore();
|
|
1890
1924
|
const server = newGatewayMcpServer();
|
|
1891
1925
|
registerBaseResources(server, runtime);
|
|
1892
1926
|
registerValidationTools(server, { asyncJobManager });
|
|
@@ -2736,686 +2770,712 @@ export function createGatewayServer(deps = {}) {
|
|
|
2736
2770
|
});
|
|
2737
2771
|
//──────────────────────────────────────────────────────────────────────────────
|
|
2738
2772
|
// Async Long-Running Job Tools (No Time-Bound LLM Execution)
|
|
2773
|
+
//
|
|
2774
|
+
// STRUCTURAL INVARIANT: these tools are only registered when a real job
|
|
2775
|
+
// store is attached (`persistence.asyncJobsEnabled === true`). When the
|
|
2776
|
+
// operator has configured `[persistence].backend = "none"`, none of the
|
|
2777
|
+
// *_request_async / llm_job_* tools exist in the MCP tool list at all —
|
|
2778
|
+
// orchestrating agents get a clean "tool not found" signal at connect
|
|
2779
|
+
// time instead of silent in-memory loss after the 1-hour TTL.
|
|
2739
2780
|
//──────────────────────────────────────────────────────────────────────────────
|
|
2740
|
-
|
|
2741
|
-
|
|
2742
|
-
|
|
2743
|
-
|
|
2744
|
-
|
|
2745
|
-
|
|
2746
|
-
|
|
2747
|
-
|
|
2748
|
-
|
|
2749
|
-
|
|
2750
|
-
|
|
2751
|
-
|
|
2752
|
-
|
|
2753
|
-
|
|
2754
|
-
|
|
2755
|
-
|
|
2756
|
-
|
|
2757
|
-
|
|
2758
|
-
|
|
2759
|
-
|
|
2760
|
-
|
|
2761
|
-
|
|
2762
|
-
|
|
2763
|
-
|
|
2764
|
-
|
|
2765
|
-
|
|
2766
|
-
|
|
2767
|
-
|
|
2768
|
-
|
|
2769
|
-
|
|
2770
|
-
|
|
2771
|
-
|
|
2772
|
-
|
|
2773
|
-
|
|
2774
|
-
|
|
2775
|
-
|
|
2776
|
-
|
|
2777
|
-
|
|
2778
|
-
|
|
2779
|
-
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
|
|
2793
|
-
|
|
2794
|
-
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
|
|
2798
|
-
|
|
2799
|
-
|
|
2800
|
-
|
|
2801
|
-
|
|
2802
|
-
|
|
2803
|
-
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
|
|
2807
|
-
|
|
2808
|
-
|
|
2809
|
-
|
|
2810
|
-
|
|
2811
|
-
|
|
2812
|
-
|
|
2813
|
-
|
|
2814
|
-
|
|
2815
|
-
|
|
2816
|
-
|
|
2817
|
-
|
|
2818
|
-
|
|
2819
|
-
|
|
2820
|
-
|
|
2821
|
-
|
|
2822
|
-
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
2828
|
-
|
|
2829
|
-
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
|
|
2833
|
-
|
|
2834
|
-
|
|
2835
|
-
|
|
2836
|
-
|
|
2837
|
-
|
|
2838
|
-
|
|
2839
|
-
|
|
2840
|
-
|
|
2841
|
-
|
|
2842
|
-
|
|
2843
|
-
const prep = prepareClaudeRequest({
|
|
2844
|
-
prompt,
|
|
2845
|
-
model,
|
|
2846
|
-
outputFormat,
|
|
2847
|
-
allowedTools,
|
|
2848
|
-
disallowedTools,
|
|
2849
|
-
dangerouslySkipPermissions,
|
|
2850
|
-
permissionMode,
|
|
2851
|
-
approvalStrategy,
|
|
2852
|
-
approvalPolicy,
|
|
2853
|
-
mcpServers,
|
|
2854
|
-
strictMcpConfig,
|
|
2855
|
-
correlationId,
|
|
2856
|
-
optimizePrompt,
|
|
2857
|
-
operation: "claude_request_async",
|
|
2858
|
-
agent,
|
|
2859
|
-
agents,
|
|
2860
|
-
forkSession,
|
|
2861
|
-
systemPrompt,
|
|
2862
|
-
appendSystemPrompt,
|
|
2863
|
-
maxBudgetUsd,
|
|
2864
|
-
maxTurns,
|
|
2865
|
-
effort,
|
|
2866
|
-
excludeDynamicSystemPromptSections,
|
|
2867
|
-
}, runtime);
|
|
2868
|
-
if (!("args" in prep))
|
|
2869
|
-
return prep;
|
|
2870
|
-
const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
|
|
2871
|
-
try {
|
|
2872
|
-
// Session management (before job start for async)
|
|
2873
|
-
let effectiveSessionId = sessionId;
|
|
2874
|
-
let useContinue = continueSession;
|
|
2875
|
-
const activeSession = await sessionManager.getActiveSession("claude");
|
|
2876
|
-
if (!createNewSession && !continueSession && !sessionId && activeSession) {
|
|
2877
|
-
effectiveSessionId = activeSession.id;
|
|
2878
|
-
useContinue = true;
|
|
2781
|
+
if (asyncJobsEnabled) {
|
|
2782
|
+
server.tool("claude_request_async", {
|
|
2783
|
+
prompt: z
|
|
2784
|
+
.string()
|
|
2785
|
+
.min(1, "Prompt cannot be empty")
|
|
2786
|
+
.max(100000, "Prompt too long (max 100k chars)")
|
|
2787
|
+
.describe("Prompt text for Claude"),
|
|
2788
|
+
model: z
|
|
2789
|
+
.string()
|
|
2790
|
+
.optional()
|
|
2791
|
+
.describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
|
|
2792
|
+
outputFormat: z
|
|
2793
|
+
.enum(["text", "json", "stream-json"])
|
|
2794
|
+
.default("text")
|
|
2795
|
+
.describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
|
|
2796
|
+
sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
|
|
2797
|
+
continueSession: z.boolean().default(false).describe("Continue active session"),
|
|
2798
|
+
createNewSession: z.boolean().default(false).describe("Force new session"),
|
|
2799
|
+
allowedTools: z
|
|
2800
|
+
.array(z.string())
|
|
2801
|
+
.optional()
|
|
2802
|
+
.describe("Allowed tools (['Bash(git:*)','Edit','Write'])"),
|
|
2803
|
+
disallowedTools: z.array(z.string()).optional().describe("Disallowed tools"),
|
|
2804
|
+
dangerouslySkipPermissions: z
|
|
2805
|
+
.boolean()
|
|
2806
|
+
.default(false)
|
|
2807
|
+
.describe('DEPRECATED: prefer `permissionMode: "bypassPermissions"`. Maps to it when `permissionMode` is unset.'),
|
|
2808
|
+
permissionMode: z
|
|
2809
|
+
.enum(CLAUDE_PERMISSION_MODES)
|
|
2810
|
+
.optional()
|
|
2811
|
+
.describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op."),
|
|
2812
|
+
// U25 — Claude high-impact features
|
|
2813
|
+
agent: z
|
|
2814
|
+
.string()
|
|
2815
|
+
.optional()
|
|
2816
|
+
.describe("Claude --agent: dispatch to a named single sub-agent."),
|
|
2817
|
+
agents: z
|
|
2818
|
+
.record(z.record(z.unknown()))
|
|
2819
|
+
.optional()
|
|
2820
|
+
.describe("Claude --agents: inline JSON map of agent name → { description, prompt, tools?, model? }."),
|
|
2821
|
+
forkSession: z
|
|
2822
|
+
.boolean()
|
|
2823
|
+
.optional()
|
|
2824
|
+
.describe("Claude --fork-session: branch from an existing session into a fresh fork."),
|
|
2825
|
+
systemPrompt: z
|
|
2826
|
+
.string()
|
|
2827
|
+
.optional()
|
|
2828
|
+
.describe("Claude --system-prompt: replace the system prompt entirely."),
|
|
2829
|
+
appendSystemPrompt: z
|
|
2830
|
+
.string()
|
|
2831
|
+
.optional()
|
|
2832
|
+
.describe("Claude --append-system-prompt: append to the existing system prompt."),
|
|
2833
|
+
maxBudgetUsd: z
|
|
2834
|
+
.number()
|
|
2835
|
+
.positive()
|
|
2836
|
+
.optional()
|
|
2837
|
+
.describe("Claude --max-budget-usd: spend cap for this request in USD."),
|
|
2838
|
+
maxTurns: z
|
|
2839
|
+
.number()
|
|
2840
|
+
.int()
|
|
2841
|
+
.positive()
|
|
2842
|
+
.optional()
|
|
2843
|
+
.describe("Claude --max-turns: cap on agent loop iterations."),
|
|
2844
|
+
effort: z
|
|
2845
|
+
.enum(CLAUDE_EFFORT_LEVELS)
|
|
2846
|
+
.optional()
|
|
2847
|
+
.describe("Claude --effort: low|medium|high|xhigh|max."),
|
|
2848
|
+
excludeDynamicSystemPromptSections: z
|
|
2849
|
+
.boolean()
|
|
2850
|
+
.optional()
|
|
2851
|
+
.describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
|
|
2852
|
+
approvalStrategy: z
|
|
2853
|
+
.enum(["legacy", "mcp_managed"])
|
|
2854
|
+
.default("legacy")
|
|
2855
|
+
.describe("Approval strategy"),
|
|
2856
|
+
approvalPolicy: z
|
|
2857
|
+
.enum(["strict", "balanced", "permissive"])
|
|
2858
|
+
.optional()
|
|
2859
|
+
.describe("Approval policy override"),
|
|
2860
|
+
mcpServers: z
|
|
2861
|
+
.array(MCP_SERVER_ENUM)
|
|
2862
|
+
.default(["sqry"])
|
|
2863
|
+
.describe("MCP servers exposed to Claude"),
|
|
2864
|
+
strictMcpConfig: z
|
|
2865
|
+
.boolean()
|
|
2866
|
+
.default(false)
|
|
2867
|
+
.describe("Restrict Claude to provided MCP config only"),
|
|
2868
|
+
correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
|
|
2869
|
+
optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
|
|
2870
|
+
idleTimeoutMs: z
|
|
2871
|
+
.number()
|
|
2872
|
+
.int()
|
|
2873
|
+
.min(30_000)
|
|
2874
|
+
.max(3_600_000)
|
|
2875
|
+
.optional()
|
|
2876
|
+
.describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
|
|
2877
|
+
forceRefresh: z
|
|
2878
|
+
.boolean()
|
|
2879
|
+
.default(false)
|
|
2880
|
+
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
2881
|
+
}, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
2882
|
+
if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
|
|
2883
|
+
return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
|
|
2879
2884
|
}
|
|
2880
|
-
|
|
2881
|
-
|
|
2885
|
+
const prep = prepareClaudeRequest({
|
|
2886
|
+
prompt,
|
|
2887
|
+
model,
|
|
2888
|
+
outputFormat,
|
|
2889
|
+
allowedTools,
|
|
2890
|
+
disallowedTools,
|
|
2891
|
+
dangerouslySkipPermissions,
|
|
2892
|
+
permissionMode,
|
|
2893
|
+
approvalStrategy,
|
|
2894
|
+
approvalPolicy,
|
|
2895
|
+
mcpServers,
|
|
2896
|
+
strictMcpConfig,
|
|
2897
|
+
correlationId,
|
|
2898
|
+
optimizePrompt,
|
|
2899
|
+
operation: "claude_request_async",
|
|
2900
|
+
agent,
|
|
2901
|
+
agents,
|
|
2902
|
+
forkSession,
|
|
2903
|
+
systemPrompt,
|
|
2904
|
+
appendSystemPrompt,
|
|
2905
|
+
maxBudgetUsd,
|
|
2906
|
+
maxTurns,
|
|
2907
|
+
effort,
|
|
2908
|
+
excludeDynamicSystemPromptSections,
|
|
2909
|
+
}, runtime);
|
|
2910
|
+
if (!("args" in prep))
|
|
2911
|
+
return prep;
|
|
2912
|
+
const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
|
|
2913
|
+
try {
|
|
2914
|
+
// Session management (before job start for async)
|
|
2915
|
+
let effectiveSessionId = sessionId;
|
|
2916
|
+
let useContinue = continueSession;
|
|
2917
|
+
const activeSession = await sessionManager.getActiveSession("claude");
|
|
2918
|
+
if (!createNewSession && !continueSession && !sessionId && activeSession) {
|
|
2919
|
+
effectiveSessionId = activeSession.id;
|
|
2920
|
+
useContinue = true;
|
|
2921
|
+
}
|
|
2922
|
+
if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
|
|
2923
|
+
useContinue = true;
|
|
2924
|
+
}
|
|
2925
|
+
if (useContinue) {
|
|
2926
|
+
args.push("--continue");
|
|
2927
|
+
}
|
|
2928
|
+
else if (effectiveSessionId) {
|
|
2929
|
+
args.push("--session-id", effectiveSessionId);
|
|
2930
|
+
await sessionManager.updateSessionUsage(effectiveSessionId);
|
|
2931
|
+
}
|
|
2932
|
+
if (effectiveSessionId) {
|
|
2933
|
+
const existingSession = await sessionManager.getSession(effectiveSessionId);
|
|
2934
|
+
if (!existingSession) {
|
|
2935
|
+
await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
|
|
2936
|
+
}
|
|
2937
|
+
}
|
|
2938
|
+
// Idle timeout only for stream-json (text/json produce no output until done)
|
|
2939
|
+
const effectiveIdleTimeout = outputFormat === "stream-json"
|
|
2940
|
+
? resolveIdleTimeout("claude", idleTimeoutMs)
|
|
2941
|
+
: undefined;
|
|
2942
|
+
assertUpstreamCliArgs("claude", args);
|
|
2943
|
+
assertUpstreamCliEnv("claude", undefined);
|
|
2944
|
+
const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat, forceRefresh);
|
|
2945
|
+
logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
|
|
2946
|
+
const asyncResponse = {
|
|
2947
|
+
success: true,
|
|
2948
|
+
job,
|
|
2949
|
+
sessionId: effectiveSessionId || activeSession?.id || null,
|
|
2950
|
+
approval: approvalDecision,
|
|
2951
|
+
mcpServers: {
|
|
2952
|
+
requested: requestedMcpServers,
|
|
2953
|
+
enabled: mcpConfig?.enabled,
|
|
2954
|
+
missing: mcpConfig?.missing,
|
|
2955
|
+
},
|
|
2956
|
+
};
|
|
2957
|
+
if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
|
|
2958
|
+
asyncResponse.reviewIntegrity = prep.reviewIntegrity;
|
|
2959
|
+
}
|
|
2960
|
+
return {
|
|
2961
|
+
content: [
|
|
2962
|
+
{
|
|
2963
|
+
type: "text",
|
|
2964
|
+
text: JSON.stringify(asyncResponse, null, 2),
|
|
2965
|
+
},
|
|
2966
|
+
],
|
|
2967
|
+
};
|
|
2882
2968
|
}
|
|
2883
|
-
|
|
2884
|
-
|
|
2969
|
+
catch (error) {
|
|
2970
|
+
return createErrorResponse("claude_request_async", 1, "", corrId, error);
|
|
2885
2971
|
}
|
|
2886
|
-
|
|
2887
|
-
|
|
2888
|
-
|
|
2972
|
+
});
|
|
2973
|
+
server.tool("codex_request_async", {
|
|
2974
|
+
prompt: z
|
|
2975
|
+
.string()
|
|
2976
|
+
.min(1, "Prompt cannot be empty")
|
|
2977
|
+
.max(100000, "Prompt too long (max 100k chars)")
|
|
2978
|
+
.describe("Prompt text for Codex"),
|
|
2979
|
+
model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
|
|
2980
|
+
fullAuto: z
|
|
2981
|
+
.boolean()
|
|
2982
|
+
.default(false)
|
|
2983
|
+
.describe("DEPRECATED: prefer `sandboxMode` + `askForApproval`. Expands to `--sandbox workspace-write --ask-for-approval never`."),
|
|
2984
|
+
sandboxMode: z
|
|
2985
|
+
.enum(CODEX_SANDBOX_MODES)
|
|
2986
|
+
.optional()
|
|
2987
|
+
.describe("Codex --sandbox: read-only|workspace-write|danger-full-access."),
|
|
2988
|
+
askForApproval: z
|
|
2989
|
+
.enum(CODEX_ASK_FOR_APPROVAL_MODES)
|
|
2990
|
+
.optional()
|
|
2991
|
+
.describe("Codex --ask-for-approval: untrusted|on-request|never."),
|
|
2992
|
+
useLegacyFullAutoFlag: z
|
|
2993
|
+
.boolean()
|
|
2994
|
+
.default(false)
|
|
2995
|
+
.describe("Escape hatch: emit `--full-auto` directly (deprecated)."),
|
|
2996
|
+
dangerouslyBypassApprovalsAndSandbox: z
|
|
2997
|
+
.boolean()
|
|
2998
|
+
.default(false)
|
|
2999
|
+
.describe("Run Codex without approvals/sandbox"),
|
|
3000
|
+
approvalStrategy: z
|
|
3001
|
+
.enum(["legacy", "mcp_managed"])
|
|
3002
|
+
.default("legacy")
|
|
3003
|
+
.describe("Approval strategy"),
|
|
3004
|
+
approvalPolicy: z
|
|
3005
|
+
.enum(["strict", "balanced", "permissive"])
|
|
3006
|
+
.optional()
|
|
3007
|
+
.describe("Approval policy override"),
|
|
3008
|
+
mcpServers: z
|
|
3009
|
+
.array(MCP_SERVER_ENUM)
|
|
3010
|
+
.default(["sqry"])
|
|
3011
|
+
.describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
|
|
3012
|
+
sessionId: z
|
|
3013
|
+
.string()
|
|
3014
|
+
.optional()
|
|
3015
|
+
.describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
|
|
3016
|
+
resumeLatest: z
|
|
3017
|
+
.boolean()
|
|
3018
|
+
.default(false)
|
|
3019
|
+
.describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
|
|
3020
|
+
createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
|
|
3021
|
+
correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
|
|
3022
|
+
optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
|
|
3023
|
+
idleTimeoutMs: z
|
|
3024
|
+
.number()
|
|
3025
|
+
.int()
|
|
3026
|
+
.min(30_000)
|
|
3027
|
+
.max(3_600_000)
|
|
3028
|
+
.optional()
|
|
3029
|
+
.describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
|
|
3030
|
+
forceRefresh: z
|
|
3031
|
+
.boolean()
|
|
3032
|
+
.default(false)
|
|
3033
|
+
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3034
|
+
// U23: emit `--json` to enable JSONL event-stream parsing for token usage.
|
|
3035
|
+
outputFormat: z
|
|
3036
|
+
.enum(["text", "json"])
|
|
3037
|
+
.default("text")
|
|
3038
|
+
.describe("Codex output format. `json` emits --json (JSONL events) for token usage extraction."),
|
|
3039
|
+
// U26: high-impact feature flags. All optional.
|
|
3040
|
+
outputSchema: z
|
|
3041
|
+
.union([z.string(), z.record(z.unknown())])
|
|
3042
|
+
.optional()
|
|
3043
|
+
.describe("Codex --output-schema. Pass a path (string) or an inline JSON Schema object."),
|
|
3044
|
+
search: z.boolean().optional().describe("Emit Codex --search to enable web search."),
|
|
3045
|
+
profile: z.string().optional().describe("Codex --profile <name>."),
|
|
3046
|
+
configOverrides: CODEX_CONFIG_OVERRIDES_SCHEMA.describe("Codex -c key=value overrides. Keys: /^[a-zA-Z0-9._]+$/. Values: no CR/LF."),
|
|
3047
|
+
ephemeral: z.boolean().optional().describe("Codex --ephemeral."),
|
|
3048
|
+
images: z.array(z.string()).optional().describe("Codex -i <path>: image attachments."),
|
|
3049
|
+
ignoreUserConfig: z.boolean().optional().describe("Codex --ignore-user-config."),
|
|
3050
|
+
ignoreRules: z.boolean().optional().describe("Codex --ignore-rules."),
|
|
3051
|
+
}, async ({ prompt, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
|
|
3052
|
+
return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3053
|
+
prompt,
|
|
3054
|
+
model,
|
|
3055
|
+
fullAuto,
|
|
3056
|
+
sandboxMode,
|
|
3057
|
+
askForApproval,
|
|
3058
|
+
useLegacyFullAutoFlag,
|
|
3059
|
+
dangerouslyBypassApprovalsAndSandbox,
|
|
3060
|
+
approvalStrategy,
|
|
3061
|
+
approvalPolicy,
|
|
3062
|
+
mcpServers,
|
|
3063
|
+
sessionId,
|
|
3064
|
+
resumeLatest,
|
|
3065
|
+
createNewSession,
|
|
3066
|
+
correlationId,
|
|
3067
|
+
optimizePrompt,
|
|
3068
|
+
idleTimeoutMs,
|
|
3069
|
+
forceRefresh,
|
|
3070
|
+
outputFormat,
|
|
3071
|
+
outputSchema,
|
|
3072
|
+
search,
|
|
3073
|
+
profile,
|
|
3074
|
+
configOverrides,
|
|
3075
|
+
ephemeral,
|
|
3076
|
+
images,
|
|
3077
|
+
ignoreUserConfig,
|
|
3078
|
+
ignoreRules,
|
|
3079
|
+
});
|
|
3080
|
+
});
|
|
3081
|
+
server.tool("gemini_request_async", {
|
|
3082
|
+
prompt: z
|
|
3083
|
+
.string()
|
|
3084
|
+
.min(1, "Prompt cannot be empty")
|
|
3085
|
+
.max(100000, "Prompt too long (max 100k chars)")
|
|
3086
|
+
.describe("Prompt text for Gemini"),
|
|
3087
|
+
model: z
|
|
3088
|
+
.string()
|
|
3089
|
+
.optional()
|
|
3090
|
+
.describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
|
|
3091
|
+
sessionId: z
|
|
3092
|
+
.string()
|
|
3093
|
+
.optional()
|
|
3094
|
+
.describe("Session ID (user-provided CLI handle for --resume)"),
|
|
3095
|
+
resumeLatest: z.boolean().default(false).describe("Resume latest session"),
|
|
3096
|
+
createNewSession: z.boolean().default(false).describe("Force new session"),
|
|
3097
|
+
approvalMode: z
|
|
3098
|
+
.enum(GEMINI_APPROVAL_MODES)
|
|
3099
|
+
.optional()
|
|
3100
|
+
.describe("Approval: default|auto_edit|yolo|plan"),
|
|
3101
|
+
approvalStrategy: z
|
|
3102
|
+
.enum(["legacy", "mcp_managed"])
|
|
3103
|
+
.default("legacy")
|
|
3104
|
+
.describe("Approval strategy"),
|
|
3105
|
+
approvalPolicy: z
|
|
3106
|
+
.enum(["strict", "balanced", "permissive"])
|
|
3107
|
+
.optional()
|
|
3108
|
+
.describe("Approval policy override"),
|
|
3109
|
+
mcpServers: z
|
|
3110
|
+
.array(MCP_SERVER_ENUM)
|
|
3111
|
+
.default(["sqry"])
|
|
3112
|
+
.describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
|
|
3113
|
+
allowedTools: z
|
|
3114
|
+
.array(z.string())
|
|
3115
|
+
.optional()
|
|
3116
|
+
.describe("Allowed tools (['Write','Edit','Bash'])"),
|
|
3117
|
+
includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
|
|
3118
|
+
correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
|
|
3119
|
+
optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
|
|
3120
|
+
idleTimeoutMs: z
|
|
3121
|
+
.number()
|
|
3122
|
+
.int()
|
|
3123
|
+
.min(30_000)
|
|
3124
|
+
.max(3_600_000)
|
|
3125
|
+
.optional()
|
|
3126
|
+
.describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
|
|
3127
|
+
forceRefresh: z
|
|
3128
|
+
.boolean()
|
|
3129
|
+
.default(false)
|
|
3130
|
+
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3131
|
+
// U23: emit `-o json` to extract token usage via parseGeminiJson. Default
|
|
3132
|
+
// remains text so existing callers see no behavior change.
|
|
3133
|
+
outputFormat: z
|
|
3134
|
+
.enum(["text", "json"])
|
|
3135
|
+
.default("text")
|
|
3136
|
+
.describe("Gemini output format. `json` emits `-o json` so usageMetadata is parsed and reported."),
|
|
3137
|
+
sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
|
|
3138
|
+
policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
|
|
3139
|
+
adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
|
|
3140
|
+
attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
|
|
3141
|
+
}, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
|
|
3142
|
+
return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3143
|
+
prompt,
|
|
3144
|
+
model,
|
|
3145
|
+
sessionId,
|
|
3146
|
+
resumeLatest,
|
|
3147
|
+
createNewSession,
|
|
3148
|
+
approvalMode,
|
|
3149
|
+
approvalStrategy,
|
|
3150
|
+
approvalPolicy,
|
|
3151
|
+
mcpServers,
|
|
3152
|
+
allowedTools,
|
|
3153
|
+
includeDirs,
|
|
3154
|
+
correlationId,
|
|
3155
|
+
optimizePrompt,
|
|
3156
|
+
idleTimeoutMs,
|
|
3157
|
+
forceRefresh,
|
|
3158
|
+
outputFormat,
|
|
3159
|
+
sandbox,
|
|
3160
|
+
policyFiles,
|
|
3161
|
+
adminPolicyFiles,
|
|
3162
|
+
attachments,
|
|
3163
|
+
});
|
|
3164
|
+
});
|
|
3165
|
+
server.tool("grok_request_async", {
|
|
3166
|
+
prompt: z
|
|
3167
|
+
.string()
|
|
3168
|
+
.min(1, "Prompt cannot be empty")
|
|
3169
|
+
.max(100000, "Prompt too long (max 100k chars)")
|
|
3170
|
+
.describe("Prompt text for Grok"),
|
|
3171
|
+
model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
|
|
3172
|
+
outputFormat: z
|
|
3173
|
+
.enum(["plain", "json", "streaming-json"])
|
|
3174
|
+
.optional()
|
|
3175
|
+
.describe("Output format (plain|json|streaming-json). Grok default is plain."),
|
|
3176
|
+
sessionId: z
|
|
3177
|
+
.string()
|
|
3178
|
+
.optional()
|
|
3179
|
+
.describe("Session ID (user-provided CLI handle for --resume)"),
|
|
3180
|
+
resumeLatest: z
|
|
3181
|
+
.boolean()
|
|
3182
|
+
.default(false)
|
|
3183
|
+
.describe("Resume most recent Grok session in cwd (--continue)"),
|
|
3184
|
+
createNewSession: z.boolean().default(false).describe("Force new session"),
|
|
3185
|
+
alwaysApprove: z
|
|
3186
|
+
.boolean()
|
|
3187
|
+
.default(false)
|
|
3188
|
+
.describe("Auto-approve all tool executions (--always-approve)"),
|
|
3189
|
+
permissionMode: z
|
|
3190
|
+
.enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
|
|
3191
|
+
.optional()
|
|
3192
|
+
.describe("Grok permission mode"),
|
|
3193
|
+
effort: z
|
|
3194
|
+
.enum(["low", "medium", "high", "xhigh", "max"])
|
|
3195
|
+
.optional()
|
|
3196
|
+
.describe("Grok effort level"),
|
|
3197
|
+
reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
|
|
3198
|
+
approvalStrategy: z
|
|
3199
|
+
.enum(["legacy", "mcp_managed"])
|
|
3200
|
+
.default("legacy")
|
|
3201
|
+
.describe("Approval strategy"),
|
|
3202
|
+
approvalPolicy: z
|
|
3203
|
+
.enum(["strict", "balanced", "permissive"])
|
|
3204
|
+
.optional()
|
|
3205
|
+
.describe("Approval policy override"),
|
|
3206
|
+
mcpServers: z
|
|
3207
|
+
.array(MCP_SERVER_ENUM)
|
|
3208
|
+
.default(["sqry"])
|
|
3209
|
+
.describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
|
|
3210
|
+
allowedTools: z
|
|
3211
|
+
.array(z.string())
|
|
3212
|
+
.optional()
|
|
3213
|
+
.describe("Allowed built-in tools (passed as --tools comma list)"),
|
|
3214
|
+
disallowedTools: z
|
|
3215
|
+
.array(z.string())
|
|
3216
|
+
.optional()
|
|
3217
|
+
.describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
|
|
3218
|
+
correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
|
|
3219
|
+
optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
|
|
3220
|
+
idleTimeoutMs: z
|
|
3221
|
+
.number()
|
|
3222
|
+
.int()
|
|
3223
|
+
.min(30_000)
|
|
3224
|
+
.max(3_600_000)
|
|
3225
|
+
.optional()
|
|
3226
|
+
.describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
|
|
3227
|
+
forceRefresh: z
|
|
3228
|
+
.boolean()
|
|
3229
|
+
.default(false)
|
|
3230
|
+
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3231
|
+
}, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3232
|
+
return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3233
|
+
prompt,
|
|
3234
|
+
model,
|
|
3235
|
+
outputFormat,
|
|
3236
|
+
sessionId,
|
|
3237
|
+
resumeLatest,
|
|
3238
|
+
createNewSession,
|
|
3239
|
+
alwaysApprove,
|
|
3240
|
+
permissionMode,
|
|
3241
|
+
effort,
|
|
3242
|
+
reasoningEffort,
|
|
3243
|
+
approvalStrategy,
|
|
3244
|
+
approvalPolicy,
|
|
3245
|
+
mcpServers,
|
|
3246
|
+
allowedTools,
|
|
3247
|
+
disallowedTools,
|
|
3248
|
+
correlationId,
|
|
3249
|
+
optimizePrompt,
|
|
3250
|
+
idleTimeoutMs,
|
|
3251
|
+
forceRefresh,
|
|
3252
|
+
});
|
|
3253
|
+
});
|
|
3254
|
+
server.tool("mistral_request_async", {
|
|
3255
|
+
prompt: z
|
|
3256
|
+
.string()
|
|
3257
|
+
.min(1, "Prompt cannot be empty")
|
|
3258
|
+
.max(100000, "Prompt too long (max 100k chars)")
|
|
3259
|
+
.describe("Prompt text for Mistral Vibe"),
|
|
3260
|
+
model: z
|
|
3261
|
+
.string()
|
|
3262
|
+
.optional()
|
|
3263
|
+
.describe("Model alias (resolved into VIBE_ACTIVE_MODEL env var — Vibe has no --model flag)"),
|
|
3264
|
+
outputFormat: z
|
|
3265
|
+
.enum(["plain", "json", "stream-json"])
|
|
3266
|
+
.optional()
|
|
3267
|
+
.describe("Output format (plain|json|stream-json). Vibe default is plain."),
|
|
3268
|
+
sessionId: z
|
|
3269
|
+
.string()
|
|
3270
|
+
.optional()
|
|
3271
|
+
.describe("Session ID (user-provided CLI handle for --resume). Requires [session_logging] enabled = true in ~/.vibe/config.toml."),
|
|
3272
|
+
resumeLatest: z
|
|
3273
|
+
.boolean()
|
|
3274
|
+
.default(false)
|
|
3275
|
+
.describe("Resume most recent Vibe session in cwd (--continue)"),
|
|
3276
|
+
createNewSession: z.boolean().default(false).describe("Force new session"),
|
|
3277
|
+
permissionMode: z
|
|
3278
|
+
.enum(MISTRAL_AGENT_MODES)
|
|
3279
|
+
.optional()
|
|
3280
|
+
.describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
|
|
3281
|
+
effort: z
|
|
3282
|
+
.enum(["low", "medium", "high", "xhigh", "max"])
|
|
3283
|
+
.optional()
|
|
3284
|
+
.describe("Vibe effort level"),
|
|
3285
|
+
reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
|
|
3286
|
+
approvalStrategy: z
|
|
3287
|
+
.enum(["legacy", "mcp_managed"])
|
|
3288
|
+
.default("legacy")
|
|
3289
|
+
.describe("Approval strategy"),
|
|
3290
|
+
approvalPolicy: z
|
|
3291
|
+
.enum(["strict", "balanced", "permissive"])
|
|
3292
|
+
.optional()
|
|
3293
|
+
.describe("Approval policy override"),
|
|
3294
|
+
mcpServers: z
|
|
3295
|
+
.array(MCP_SERVER_ENUM)
|
|
3296
|
+
.default(["sqry"])
|
|
3297
|
+
.describe("MCP server names for approval tracking (Vibe manages its own MCP config via `vibe mcp`)"),
|
|
3298
|
+
allowedTools: z
|
|
3299
|
+
.array(z.string())
|
|
3300
|
+
.optional()
|
|
3301
|
+
.describe("Allowlist of built-in tools — each emitted as a separate --enabled-tools <tool> flag"),
|
|
3302
|
+
disallowedTools: z
|
|
3303
|
+
.array(z.string())
|
|
3304
|
+
.optional()
|
|
3305
|
+
.describe("Accepted for caller parity; Vibe has no deny-list flag, so values are ignored (a warning is logged)."),
|
|
3306
|
+
correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
|
|
3307
|
+
optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
|
|
3308
|
+
idleTimeoutMs: z
|
|
3309
|
+
.number()
|
|
3310
|
+
.int()
|
|
3311
|
+
.min(30_000)
|
|
3312
|
+
.max(3_600_000)
|
|
3313
|
+
.optional()
|
|
3314
|
+
.describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
|
|
3315
|
+
forceRefresh: z
|
|
3316
|
+
.boolean()
|
|
3317
|
+
.default(false)
|
|
3318
|
+
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3319
|
+
}, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3320
|
+
return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3321
|
+
prompt,
|
|
3322
|
+
model,
|
|
3323
|
+
outputFormat,
|
|
3324
|
+
sessionId,
|
|
3325
|
+
resumeLatest,
|
|
3326
|
+
createNewSession,
|
|
3327
|
+
permissionMode,
|
|
3328
|
+
effort,
|
|
3329
|
+
reasoningEffort,
|
|
3330
|
+
approvalStrategy,
|
|
3331
|
+
approvalPolicy,
|
|
3332
|
+
mcpServers,
|
|
3333
|
+
allowedTools,
|
|
3334
|
+
disallowedTools,
|
|
3335
|
+
correlationId,
|
|
3336
|
+
optimizePrompt,
|
|
3337
|
+
idleTimeoutMs,
|
|
3338
|
+
forceRefresh,
|
|
3339
|
+
});
|
|
3340
|
+
});
|
|
3341
|
+
server.tool("llm_job_status", {
|
|
3342
|
+
jobId: z.string().describe("Async job ID from *_request_async"),
|
|
3343
|
+
}, async ({ jobId }) => {
|
|
3344
|
+
const job = asyncJobManager.getJobSnapshot(jobId);
|
|
3345
|
+
if (!job) {
|
|
3346
|
+
return {
|
|
3347
|
+
content: [
|
|
3348
|
+
{
|
|
3349
|
+
type: "text",
|
|
3350
|
+
text: JSON.stringify({
|
|
3351
|
+
success: false,
|
|
3352
|
+
error: "Job not found",
|
|
3353
|
+
jobId,
|
|
3354
|
+
}, null, 2),
|
|
3355
|
+
},
|
|
3356
|
+
],
|
|
3357
|
+
isError: true,
|
|
3358
|
+
};
|
|
2889
3359
|
}
|
|
2890
|
-
|
|
2891
|
-
|
|
2892
|
-
|
|
2893
|
-
|
|
2894
|
-
|
|
3360
|
+
return {
|
|
3361
|
+
content: [
|
|
3362
|
+
{
|
|
3363
|
+
type: "text",
|
|
3364
|
+
text: JSON.stringify({
|
|
3365
|
+
success: true,
|
|
3366
|
+
job,
|
|
3367
|
+
}, null, 2),
|
|
3368
|
+
},
|
|
3369
|
+
],
|
|
3370
|
+
};
|
|
3371
|
+
});
|
|
3372
|
+
server.tool("llm_job_result", {
|
|
3373
|
+
jobId: z.string().describe("Async job ID from *_request_async"),
|
|
3374
|
+
maxChars: z
|
|
3375
|
+
.number()
|
|
3376
|
+
.int()
|
|
3377
|
+
.min(1000)
|
|
3378
|
+
.max(2000000)
|
|
3379
|
+
.default(200000)
|
|
3380
|
+
.describe("Max chars returned per stream"),
|
|
3381
|
+
}, async ({ jobId, maxChars }) => {
|
|
3382
|
+
const result = asyncJobManager.getJobResult(jobId, maxChars);
|
|
3383
|
+
if (!result) {
|
|
3384
|
+
return {
|
|
3385
|
+
content: [
|
|
3386
|
+
{
|
|
3387
|
+
type: "text",
|
|
3388
|
+
text: JSON.stringify({
|
|
3389
|
+
success: false,
|
|
3390
|
+
error: "Job not found",
|
|
3391
|
+
jobId,
|
|
3392
|
+
}, null, 2),
|
|
3393
|
+
},
|
|
3394
|
+
],
|
|
3395
|
+
isError: true,
|
|
3396
|
+
};
|
|
2895
3397
|
}
|
|
2896
|
-
//
|
|
2897
|
-
const
|
|
2898
|
-
|
|
2899
|
-
|
|
2900
|
-
|
|
2901
|
-
|
|
2902
|
-
|
|
2903
|
-
|
|
2904
|
-
|
|
2905
|
-
|
|
2906
|
-
|
|
2907
|
-
|
|
2908
|
-
|
|
2909
|
-
|
|
3398
|
+
// Parse stream-json output for Claude async jobs
|
|
3399
|
+
const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
|
|
3400
|
+
let parsed;
|
|
3401
|
+
if (outputFormat === "stream-json" && result.stdout) {
|
|
3402
|
+
parsed = parseStreamJson(result.stdout);
|
|
3403
|
+
}
|
|
3404
|
+
return {
|
|
3405
|
+
content: [
|
|
3406
|
+
{
|
|
3407
|
+
type: "text",
|
|
3408
|
+
text: JSON.stringify({
|
|
3409
|
+
success: true,
|
|
3410
|
+
result,
|
|
3411
|
+
...(parsed
|
|
3412
|
+
? {
|
|
3413
|
+
parsed: {
|
|
3414
|
+
text: parsed.text,
|
|
3415
|
+
costUsd: parsed.costUsd,
|
|
3416
|
+
usage: parsed.usage,
|
|
3417
|
+
model: parsed.model,
|
|
3418
|
+
numTurns: parsed.numTurns,
|
|
3419
|
+
},
|
|
3420
|
+
}
|
|
3421
|
+
: {}),
|
|
3422
|
+
}, null, 2),
|
|
3423
|
+
},
|
|
3424
|
+
],
|
|
2910
3425
|
};
|
|
2911
|
-
|
|
2912
|
-
|
|
3426
|
+
});
|
|
3427
|
+
server.tool("llm_job_cancel", {
|
|
3428
|
+
jobId: z.string().describe("Async job ID from *_request_async"),
|
|
3429
|
+
}, async ({ jobId }) => {
|
|
3430
|
+
const cancel = asyncJobManager.cancelJob(jobId);
|
|
3431
|
+
if (!cancel.canceled) {
|
|
3432
|
+
return {
|
|
3433
|
+
content: [
|
|
3434
|
+
{
|
|
3435
|
+
type: "text",
|
|
3436
|
+
text: JSON.stringify({
|
|
3437
|
+
success: false,
|
|
3438
|
+
jobId,
|
|
3439
|
+
reason: cancel.reason || "Unable to cancel",
|
|
3440
|
+
}, null, 2),
|
|
3441
|
+
},
|
|
3442
|
+
],
|
|
3443
|
+
isError: true,
|
|
3444
|
+
};
|
|
2913
3445
|
}
|
|
2914
3446
|
return {
|
|
2915
3447
|
content: [
|
|
2916
3448
|
{
|
|
2917
3449
|
type: "text",
|
|
2918
|
-
text: JSON.stringify(
|
|
3450
|
+
text: JSON.stringify({
|
|
3451
|
+
success: true,
|
|
3452
|
+
jobId,
|
|
3453
|
+
}, null, 2),
|
|
2919
3454
|
},
|
|
2920
3455
|
],
|
|
2921
3456
|
};
|
|
2922
|
-
}
|
|
2923
|
-
catch (error) {
|
|
2924
|
-
return createErrorResponse("claude_request_async", 1, "", corrId, error);
|
|
2925
|
-
}
|
|
2926
|
-
});
|
|
2927
|
-
server.tool("codex_request_async", {
|
|
2928
|
-
prompt: z
|
|
2929
|
-
.string()
|
|
2930
|
-
.min(1, "Prompt cannot be empty")
|
|
2931
|
-
.max(100000, "Prompt too long (max 100k chars)")
|
|
2932
|
-
.describe("Prompt text for Codex"),
|
|
2933
|
-
model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
|
|
2934
|
-
fullAuto: z
|
|
2935
|
-
.boolean()
|
|
2936
|
-
.default(false)
|
|
2937
|
-
.describe("DEPRECATED: prefer `sandboxMode` + `askForApproval`. Expands to `--sandbox workspace-write --ask-for-approval never`."),
|
|
2938
|
-
sandboxMode: z
|
|
2939
|
-
.enum(CODEX_SANDBOX_MODES)
|
|
2940
|
-
.optional()
|
|
2941
|
-
.describe("Codex --sandbox: read-only|workspace-write|danger-full-access."),
|
|
2942
|
-
askForApproval: z
|
|
2943
|
-
.enum(CODEX_ASK_FOR_APPROVAL_MODES)
|
|
2944
|
-
.optional()
|
|
2945
|
-
.describe("Codex --ask-for-approval: untrusted|on-request|never."),
|
|
2946
|
-
useLegacyFullAutoFlag: z
|
|
2947
|
-
.boolean()
|
|
2948
|
-
.default(false)
|
|
2949
|
-
.describe("Escape hatch: emit `--full-auto` directly (deprecated)."),
|
|
2950
|
-
dangerouslyBypassApprovalsAndSandbox: z
|
|
2951
|
-
.boolean()
|
|
2952
|
-
.default(false)
|
|
2953
|
-
.describe("Run Codex without approvals/sandbox"),
|
|
2954
|
-
approvalStrategy: z
|
|
2955
|
-
.enum(["legacy", "mcp_managed"])
|
|
2956
|
-
.default("legacy")
|
|
2957
|
-
.describe("Approval strategy"),
|
|
2958
|
-
approvalPolicy: z
|
|
2959
|
-
.enum(["strict", "balanced", "permissive"])
|
|
2960
|
-
.optional()
|
|
2961
|
-
.describe("Approval policy override"),
|
|
2962
|
-
mcpServers: z
|
|
2963
|
-
.array(MCP_SERVER_ENUM)
|
|
2964
|
-
.default(["sqry"])
|
|
2965
|
-
.describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
|
|
2966
|
-
sessionId: z
|
|
2967
|
-
.string()
|
|
2968
|
-
.optional()
|
|
2969
|
-
.describe("Codex session UUID to resume via `codex exec resume <ID>`. Must be a real Codex session ID (from `~/.codex/sessions/` or the `codex resume` picker). Gateway-generated `gw-*` IDs are rejected."),
|
|
2970
|
-
resumeLatest: z
|
|
2971
|
-
.boolean()
|
|
2972
|
-
.default(false)
|
|
2973
|
-
.describe("Resume the most recent Codex session in the current cwd via `codex exec resume --last`. Ignored if sessionId is set."),
|
|
2974
|
-
createNewSession: z.boolean().default(false).describe("Force a fresh session (no resume)"),
|
|
2975
|
-
correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
|
|
2976
|
-
optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
|
|
2977
|
-
idleTimeoutMs: z
|
|
2978
|
-
.number()
|
|
2979
|
-
.int()
|
|
2980
|
-
.min(30_000)
|
|
2981
|
-
.max(3_600_000)
|
|
2982
|
-
.optional()
|
|
2983
|
-
.describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
|
|
2984
|
-
forceRefresh: z
|
|
2985
|
-
.boolean()
|
|
2986
|
-
.default(false)
|
|
2987
|
-
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
2988
|
-
// U23: emit `--json` to enable JSONL event-stream parsing for token usage.
|
|
2989
|
-
outputFormat: z
|
|
2990
|
-
.enum(["text", "json"])
|
|
2991
|
-
.default("text")
|
|
2992
|
-
.describe("Codex output format. `json` emits --json (JSONL events) for token usage extraction."),
|
|
2993
|
-
// U26: high-impact feature flags. All optional.
|
|
2994
|
-
outputSchema: z
|
|
2995
|
-
.union([z.string(), z.record(z.unknown())])
|
|
2996
|
-
.optional()
|
|
2997
|
-
.describe("Codex --output-schema. Pass a path (string) or an inline JSON Schema object."),
|
|
2998
|
-
search: z.boolean().optional().describe("Emit Codex --search to enable web search."),
|
|
2999
|
-
profile: z.string().optional().describe("Codex --profile <name>."),
|
|
3000
|
-
configOverrides: CODEX_CONFIG_OVERRIDES_SCHEMA.describe("Codex -c key=value overrides. Keys: /^[a-zA-Z0-9._]+$/. Values: no CR/LF."),
|
|
3001
|
-
ephemeral: z.boolean().optional().describe("Codex --ephemeral."),
|
|
3002
|
-
images: z.array(z.string()).optional().describe("Codex -i <path>: image attachments."),
|
|
3003
|
-
ignoreUserConfig: z.boolean().optional().describe("Codex --ignore-user-config."),
|
|
3004
|
-
ignoreRules: z.boolean().optional().describe("Codex --ignore-rules."),
|
|
3005
|
-
}, async ({ prompt, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
|
|
3006
|
-
return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3007
|
-
prompt,
|
|
3008
|
-
model,
|
|
3009
|
-
fullAuto,
|
|
3010
|
-
sandboxMode,
|
|
3011
|
-
askForApproval,
|
|
3012
|
-
useLegacyFullAutoFlag,
|
|
3013
|
-
dangerouslyBypassApprovalsAndSandbox,
|
|
3014
|
-
approvalStrategy,
|
|
3015
|
-
approvalPolicy,
|
|
3016
|
-
mcpServers,
|
|
3017
|
-
sessionId,
|
|
3018
|
-
resumeLatest,
|
|
3019
|
-
createNewSession,
|
|
3020
|
-
correlationId,
|
|
3021
|
-
optimizePrompt,
|
|
3022
|
-
idleTimeoutMs,
|
|
3023
|
-
forceRefresh,
|
|
3024
|
-
outputFormat,
|
|
3025
|
-
outputSchema,
|
|
3026
|
-
search,
|
|
3027
|
-
profile,
|
|
3028
|
-
configOverrides,
|
|
3029
|
-
ephemeral,
|
|
3030
|
-
images,
|
|
3031
|
-
ignoreUserConfig,
|
|
3032
|
-
ignoreRules,
|
|
3033
|
-
});
|
|
3034
|
-
});
|
|
3035
|
-
server.tool("gemini_request_async", {
|
|
3036
|
-
prompt: z
|
|
3037
|
-
.string()
|
|
3038
|
-
.min(1, "Prompt cannot be empty")
|
|
3039
|
-
.max(100000, "Prompt too long (max 100k chars)")
|
|
3040
|
-
.describe("Prompt text for Gemini"),
|
|
3041
|
-
model: z
|
|
3042
|
-
.string()
|
|
3043
|
-
.optional()
|
|
3044
|
-
.describe("Model name or alias (e.g. gemini-3-pro-preview, gemini-2.5-flash, pro, flash, latest)"),
|
|
3045
|
-
sessionId: z
|
|
3046
|
-
.string()
|
|
3047
|
-
.optional()
|
|
3048
|
-
.describe("Session ID (user-provided CLI handle for --resume)"),
|
|
3049
|
-
resumeLatest: z.boolean().default(false).describe("Resume latest session"),
|
|
3050
|
-
createNewSession: z.boolean().default(false).describe("Force new session"),
|
|
3051
|
-
approvalMode: z
|
|
3052
|
-
.enum(GEMINI_APPROVAL_MODES)
|
|
3053
|
-
.optional()
|
|
3054
|
-
.describe("Approval: default|auto_edit|yolo|plan"),
|
|
3055
|
-
approvalStrategy: z
|
|
3056
|
-
.enum(["legacy", "mcp_managed"])
|
|
3057
|
-
.default("legacy")
|
|
3058
|
-
.describe("Approval strategy"),
|
|
3059
|
-
approvalPolicy: z
|
|
3060
|
-
.enum(["strict", "balanced", "permissive"])
|
|
3061
|
-
.optional()
|
|
3062
|
-
.describe("Approval policy override"),
|
|
3063
|
-
mcpServers: z
|
|
3064
|
-
.array(MCP_SERVER_ENUM)
|
|
3065
|
-
.default(["sqry"])
|
|
3066
|
-
.describe("MCP server names passed to Gemini as --allowed-mcp-server-names"),
|
|
3067
|
-
allowedTools: z
|
|
3068
|
-
.array(z.string())
|
|
3069
|
-
.optional()
|
|
3070
|
-
.describe("Allowed tools (['Write','Edit','Bash'])"),
|
|
3071
|
-
includeDirs: z.array(z.string()).optional().describe("Additional workspace directories"),
|
|
3072
|
-
correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
|
|
3073
|
-
optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
|
|
3074
|
-
idleTimeoutMs: z
|
|
3075
|
-
.number()
|
|
3076
|
-
.int()
|
|
3077
|
-
.min(30_000)
|
|
3078
|
-
.max(3_600_000)
|
|
3079
|
-
.optional()
|
|
3080
|
-
.describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
|
|
3081
|
-
forceRefresh: z
|
|
3082
|
-
.boolean()
|
|
3083
|
-
.default(false)
|
|
3084
|
-
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3085
|
-
// U23: emit `-o json` to extract token usage via parseGeminiJson. Default
|
|
3086
|
-
// remains text so existing callers see no behavior change.
|
|
3087
|
-
outputFormat: z
|
|
3088
|
-
.enum(["text", "json"])
|
|
3089
|
-
.default("text")
|
|
3090
|
-
.describe("Gemini output format. `json` emits `-o json` so usageMetadata is parsed and reported."),
|
|
3091
|
-
sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
|
|
3092
|
-
policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
|
|
3093
|
-
adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
|
|
3094
|
-
attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
|
|
3095
|
-
}, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
|
|
3096
|
-
return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3097
|
-
prompt,
|
|
3098
|
-
model,
|
|
3099
|
-
sessionId,
|
|
3100
|
-
resumeLatest,
|
|
3101
|
-
createNewSession,
|
|
3102
|
-
approvalMode,
|
|
3103
|
-
approvalStrategy,
|
|
3104
|
-
approvalPolicy,
|
|
3105
|
-
mcpServers,
|
|
3106
|
-
allowedTools,
|
|
3107
|
-
includeDirs,
|
|
3108
|
-
correlationId,
|
|
3109
|
-
optimizePrompt,
|
|
3110
|
-
idleTimeoutMs,
|
|
3111
|
-
forceRefresh,
|
|
3112
|
-
outputFormat,
|
|
3113
|
-
sandbox,
|
|
3114
|
-
policyFiles,
|
|
3115
|
-
adminPolicyFiles,
|
|
3116
|
-
attachments,
|
|
3117
|
-
});
|
|
3118
|
-
});
|
|
3119
|
-
server.tool("grok_request_async", {
|
|
3120
|
-
prompt: z
|
|
3121
|
-
.string()
|
|
3122
|
-
.min(1, "Prompt cannot be empty")
|
|
3123
|
-
.max(100000, "Prompt too long (max 100k chars)")
|
|
3124
|
-
.describe("Prompt text for Grok"),
|
|
3125
|
-
model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
|
|
3126
|
-
outputFormat: z
|
|
3127
|
-
.enum(["plain", "json", "streaming-json"])
|
|
3128
|
-
.optional()
|
|
3129
|
-
.describe("Output format (plain|json|streaming-json). Grok default is plain."),
|
|
3130
|
-
sessionId: z
|
|
3131
|
-
.string()
|
|
3132
|
-
.optional()
|
|
3133
|
-
.describe("Session ID (user-provided CLI handle for --resume)"),
|
|
3134
|
-
resumeLatest: z
|
|
3135
|
-
.boolean()
|
|
3136
|
-
.default(false)
|
|
3137
|
-
.describe("Resume most recent Grok session in cwd (--continue)"),
|
|
3138
|
-
createNewSession: z.boolean().default(false).describe("Force new session"),
|
|
3139
|
-
alwaysApprove: z
|
|
3140
|
-
.boolean()
|
|
3141
|
-
.default(false)
|
|
3142
|
-
.describe("Auto-approve all tool executions (--always-approve)"),
|
|
3143
|
-
permissionMode: z
|
|
3144
|
-
.enum(["default", "acceptEdits", "auto", "dontAsk", "bypassPermissions", "plan"])
|
|
3145
|
-
.optional()
|
|
3146
|
-
.describe("Grok permission mode"),
|
|
3147
|
-
effort: z
|
|
3148
|
-
.enum(["low", "medium", "high", "xhigh", "max"])
|
|
3149
|
-
.optional()
|
|
3150
|
-
.describe("Grok effort level"),
|
|
3151
|
-
reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
|
|
3152
|
-
approvalStrategy: z
|
|
3153
|
-
.enum(["legacy", "mcp_managed"])
|
|
3154
|
-
.default("legacy")
|
|
3155
|
-
.describe("Approval strategy"),
|
|
3156
|
-
approvalPolicy: z
|
|
3157
|
-
.enum(["strict", "balanced", "permissive"])
|
|
3158
|
-
.optional()
|
|
3159
|
-
.describe("Approval policy override"),
|
|
3160
|
-
mcpServers: z
|
|
3161
|
-
.array(MCP_SERVER_ENUM)
|
|
3162
|
-
.default(["sqry"])
|
|
3163
|
-
.describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
|
|
3164
|
-
allowedTools: z
|
|
3165
|
-
.array(z.string())
|
|
3166
|
-
.optional()
|
|
3167
|
-
.describe("Allowed built-in tools (passed as --tools comma list)"),
|
|
3168
|
-
disallowedTools: z
|
|
3169
|
-
.array(z.string())
|
|
3170
|
-
.optional()
|
|
3171
|
-
.describe("Disallowed built-in tools (passed as --disallowed-tools comma list)"),
|
|
3172
|
-
correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
|
|
3173
|
-
optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
|
|
3174
|
-
idleTimeoutMs: z
|
|
3175
|
-
.number()
|
|
3176
|
-
.int()
|
|
3177
|
-
.min(30_000)
|
|
3178
|
-
.max(3_600_000)
|
|
3179
|
-
.optional()
|
|
3180
|
-
.describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
|
|
3181
|
-
forceRefresh: z
|
|
3182
|
-
.boolean()
|
|
3183
|
-
.default(false)
|
|
3184
|
-
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3185
|
-
}, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3186
|
-
return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3187
|
-
prompt,
|
|
3188
|
-
model,
|
|
3189
|
-
outputFormat,
|
|
3190
|
-
sessionId,
|
|
3191
|
-
resumeLatest,
|
|
3192
|
-
createNewSession,
|
|
3193
|
-
alwaysApprove,
|
|
3194
|
-
permissionMode,
|
|
3195
|
-
effort,
|
|
3196
|
-
reasoningEffort,
|
|
3197
|
-
approvalStrategy,
|
|
3198
|
-
approvalPolicy,
|
|
3199
|
-
mcpServers,
|
|
3200
|
-
allowedTools,
|
|
3201
|
-
disallowedTools,
|
|
3202
|
-
correlationId,
|
|
3203
|
-
optimizePrompt,
|
|
3204
|
-
idleTimeoutMs,
|
|
3205
|
-
forceRefresh,
|
|
3206
|
-
});
|
|
3207
|
-
});
|
|
3208
|
-
server.tool("mistral_request_async", {
|
|
3209
|
-
prompt: z
|
|
3210
|
-
.string()
|
|
3211
|
-
.min(1, "Prompt cannot be empty")
|
|
3212
|
-
.max(100000, "Prompt too long (max 100k chars)")
|
|
3213
|
-
.describe("Prompt text for Mistral Vibe"),
|
|
3214
|
-
model: z
|
|
3215
|
-
.string()
|
|
3216
|
-
.optional()
|
|
3217
|
-
.describe("Model alias (resolved into VIBE_ACTIVE_MODEL env var — Vibe has no --model flag)"),
|
|
3218
|
-
outputFormat: z
|
|
3219
|
-
.enum(["plain", "json", "stream-json"])
|
|
3220
|
-
.optional()
|
|
3221
|
-
.describe("Output format (plain|json|stream-json). Vibe default is plain."),
|
|
3222
|
-
sessionId: z
|
|
3223
|
-
.string()
|
|
3224
|
-
.optional()
|
|
3225
|
-
.describe("Session ID (user-provided CLI handle for --resume). Requires [session_logging] enabled = true in ~/.vibe/config.toml."),
|
|
3226
|
-
resumeLatest: z
|
|
3227
|
-
.boolean()
|
|
3228
|
-
.default(false)
|
|
3229
|
-
.describe("Resume most recent Vibe session in cwd (--continue)"),
|
|
3230
|
-
createNewSession: z.boolean().default(false).describe("Force new session"),
|
|
3231
|
-
permissionMode: z
|
|
3232
|
-
.enum(MISTRAL_AGENT_MODES)
|
|
3233
|
-
.optional()
|
|
3234
|
-
.describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
|
|
3235
|
-
effort: z
|
|
3236
|
-
.enum(["low", "medium", "high", "xhigh", "max"])
|
|
3237
|
-
.optional()
|
|
3238
|
-
.describe("Vibe effort level"),
|
|
3239
|
-
reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
|
|
3240
|
-
approvalStrategy: z
|
|
3241
|
-
.enum(["legacy", "mcp_managed"])
|
|
3242
|
-
.default("legacy")
|
|
3243
|
-
.describe("Approval strategy"),
|
|
3244
|
-
approvalPolicy: z
|
|
3245
|
-
.enum(["strict", "balanced", "permissive"])
|
|
3246
|
-
.optional()
|
|
3247
|
-
.describe("Approval policy override"),
|
|
3248
|
-
mcpServers: z
|
|
3249
|
-
.array(MCP_SERVER_ENUM)
|
|
3250
|
-
.default(["sqry"])
|
|
3251
|
-
.describe("MCP server names for approval tracking (Vibe manages its own MCP config via `vibe mcp`)"),
|
|
3252
|
-
allowedTools: z
|
|
3253
|
-
.array(z.string())
|
|
3254
|
-
.optional()
|
|
3255
|
-
.describe("Allowlist of built-in tools — each emitted as a separate --enabled-tools <tool> flag"),
|
|
3256
|
-
disallowedTools: z
|
|
3257
|
-
.array(z.string())
|
|
3258
|
-
.optional()
|
|
3259
|
-
.describe("Accepted for caller parity; Vibe has no deny-list flag, so values are ignored (a warning is logged)."),
|
|
3260
|
-
correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
|
|
3261
|
-
optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
|
|
3262
|
-
idleTimeoutMs: z
|
|
3263
|
-
.number()
|
|
3264
|
-
.int()
|
|
3265
|
-
.min(30_000)
|
|
3266
|
-
.max(3_600_000)
|
|
3267
|
-
.optional()
|
|
3268
|
-
.describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
|
|
3269
|
-
forceRefresh: z
|
|
3270
|
-
.boolean()
|
|
3271
|
-
.default(false)
|
|
3272
|
-
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3273
|
-
}, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3274
|
-
return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3275
|
-
prompt,
|
|
3276
|
-
model,
|
|
3277
|
-
outputFormat,
|
|
3278
|
-
sessionId,
|
|
3279
|
-
resumeLatest,
|
|
3280
|
-
createNewSession,
|
|
3281
|
-
permissionMode,
|
|
3282
|
-
effort,
|
|
3283
|
-
reasoningEffort,
|
|
3284
|
-
approvalStrategy,
|
|
3285
|
-
approvalPolicy,
|
|
3286
|
-
mcpServers,
|
|
3287
|
-
allowedTools,
|
|
3288
|
-
disallowedTools,
|
|
3289
|
-
correlationId,
|
|
3290
|
-
optimizePrompt,
|
|
3291
|
-
idleTimeoutMs,
|
|
3292
|
-
forceRefresh,
|
|
3293
3457
|
});
|
|
3294
|
-
})
|
|
3295
|
-
server.tool("llm_job_status", {
|
|
3296
|
-
jobId: z.string().describe("Async job ID from *_request_async"),
|
|
3297
|
-
}, async ({ jobId }) => {
|
|
3298
|
-
const job = asyncJobManager.getJobSnapshot(jobId);
|
|
3299
|
-
if (!job) {
|
|
3300
|
-
return {
|
|
3301
|
-
content: [
|
|
3302
|
-
{
|
|
3303
|
-
type: "text",
|
|
3304
|
-
text: JSON.stringify({
|
|
3305
|
-
success: false,
|
|
3306
|
-
error: "Job not found",
|
|
3307
|
-
jobId,
|
|
3308
|
-
}, null, 2),
|
|
3309
|
-
},
|
|
3310
|
-
],
|
|
3311
|
-
isError: true,
|
|
3312
|
-
};
|
|
3313
|
-
}
|
|
3314
|
-
return {
|
|
3315
|
-
content: [
|
|
3316
|
-
{
|
|
3317
|
-
type: "text",
|
|
3318
|
-
text: JSON.stringify({
|
|
3319
|
-
success: true,
|
|
3320
|
-
job,
|
|
3321
|
-
}, null, 2),
|
|
3322
|
-
},
|
|
3323
|
-
],
|
|
3324
|
-
};
|
|
3325
|
-
});
|
|
3326
|
-
server.tool("llm_job_result", {
|
|
3327
|
-
jobId: z.string().describe("Async job ID from *_request_async"),
|
|
3328
|
-
maxChars: z
|
|
3329
|
-
.number()
|
|
3330
|
-
.int()
|
|
3331
|
-
.min(1000)
|
|
3332
|
-
.max(2000000)
|
|
3333
|
-
.default(200000)
|
|
3334
|
-
.describe("Max chars returned per stream"),
|
|
3335
|
-
}, async ({ jobId, maxChars }) => {
|
|
3336
|
-
const result = asyncJobManager.getJobResult(jobId, maxChars);
|
|
3337
|
-
if (!result) {
|
|
3338
|
-
return {
|
|
3339
|
-
content: [
|
|
3340
|
-
{
|
|
3341
|
-
type: "text",
|
|
3342
|
-
text: JSON.stringify({
|
|
3343
|
-
success: false,
|
|
3344
|
-
error: "Job not found",
|
|
3345
|
-
jobId,
|
|
3346
|
-
}, null, 2),
|
|
3347
|
-
},
|
|
3348
|
-
],
|
|
3349
|
-
isError: true,
|
|
3350
|
-
};
|
|
3351
|
-
}
|
|
3352
|
-
// Parse stream-json output for Claude async jobs
|
|
3353
|
-
const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
|
|
3354
|
-
let parsed;
|
|
3355
|
-
if (outputFormat === "stream-json" && result.stdout) {
|
|
3356
|
-
parsed = parseStreamJson(result.stdout);
|
|
3357
|
-
}
|
|
3358
|
-
return {
|
|
3359
|
-
content: [
|
|
3360
|
-
{
|
|
3361
|
-
type: "text",
|
|
3362
|
-
text: JSON.stringify({
|
|
3363
|
-
success: true,
|
|
3364
|
-
result,
|
|
3365
|
-
...(parsed
|
|
3366
|
-
? {
|
|
3367
|
-
parsed: {
|
|
3368
|
-
text: parsed.text,
|
|
3369
|
-
costUsd: parsed.costUsd,
|
|
3370
|
-
usage: parsed.usage,
|
|
3371
|
-
model: parsed.model,
|
|
3372
|
-
numTurns: parsed.numTurns,
|
|
3373
|
-
},
|
|
3374
|
-
}
|
|
3375
|
-
: {}),
|
|
3376
|
-
}, null, 2),
|
|
3377
|
-
},
|
|
3378
|
-
],
|
|
3379
|
-
};
|
|
3380
|
-
});
|
|
3381
|
-
server.tool("llm_job_cancel", {
|
|
3382
|
-
jobId: z.string().describe("Async job ID from *_request_async"),
|
|
3383
|
-
}, async ({ jobId }) => {
|
|
3384
|
-
const cancel = asyncJobManager.cancelJob(jobId);
|
|
3385
|
-
if (!cancel.canceled) {
|
|
3386
|
-
return {
|
|
3387
|
-
content: [
|
|
3388
|
-
{
|
|
3389
|
-
type: "text",
|
|
3390
|
-
text: JSON.stringify({
|
|
3391
|
-
success: false,
|
|
3392
|
-
jobId,
|
|
3393
|
-
reason: cancel.reason || "Unable to cancel",
|
|
3394
|
-
}, null, 2),
|
|
3395
|
-
},
|
|
3396
|
-
],
|
|
3397
|
-
isError: true,
|
|
3398
|
-
};
|
|
3399
|
-
}
|
|
3400
|
-
return {
|
|
3401
|
-
content: [
|
|
3402
|
-
{
|
|
3403
|
-
type: "text",
|
|
3404
|
-
text: JSON.stringify({
|
|
3405
|
-
success: true,
|
|
3406
|
-
jobId,
|
|
3407
|
-
}, null, 2),
|
|
3408
|
-
},
|
|
3409
|
-
],
|
|
3410
|
-
};
|
|
3411
|
-
});
|
|
3458
|
+
} // end if (asyncJobsEnabled)
|
|
3412
3459
|
server.tool("llm_process_health", {}, async () => {
|
|
3413
3460
|
const health = asyncJobManager.getJobHealth();
|
|
3461
|
+
const persistenceBlock = {
|
|
3462
|
+
backend: persistence.backend,
|
|
3463
|
+
dbPath: persistence.path,
|
|
3464
|
+
dsn: persistence.dsn ? "[redacted]" : null,
|
|
3465
|
+
retentionDays: persistence.retentionDays,
|
|
3466
|
+
dedupWindowMs: persistence.dedupWindowMs,
|
|
3467
|
+
asyncJobsEnabled: persistence.asyncJobsEnabled,
|
|
3468
|
+
acknowledgeEphemeral: persistence.acknowledgeEphemeral,
|
|
3469
|
+
sources: persistence.sources,
|
|
3470
|
+
warning: persistence.asyncJobsEnabled
|
|
3471
|
+
? null
|
|
3472
|
+
: "Async job persistence is disabled (backend = 'none'). *_request_async tools are NOT registered on this gateway. Set [persistence].backend = 'sqlite' (or 'memory' + acknowledgeEphemeral = true) to enable them.",
|
|
3473
|
+
};
|
|
3414
3474
|
return {
|
|
3415
3475
|
content: [
|
|
3416
3476
|
{
|
|
3417
3477
|
type: "text",
|
|
3418
|
-
text: JSON.stringify({ success: true, ...health }, null, 2),
|
|
3478
|
+
text: JSON.stringify({ success: true, ...health, persistence: persistenceBlock }, null, 2),
|
|
3419
3479
|
},
|
|
3420
3480
|
],
|
|
3421
3481
|
};
|
|
@@ -3470,6 +3530,18 @@ export function createGatewayServer(deps = {}) {
|
|
|
3470
3530
|
const versions = await getCliVersions(cli);
|
|
3471
3531
|
return { content: [{ type: "text", text: JSON.stringify({ versions }, null, 2) }] };
|
|
3472
3532
|
});
|
|
3533
|
+
server.tool("upstream_contracts", {
|
|
3534
|
+
cli: z
|
|
3535
|
+
.preprocess(value => (value === "" || value === null ? undefined : value), SESSION_PROVIDER_ENUM.optional())
|
|
3536
|
+
.describe("CLI filter (claude|codex|gemini|grok|mistral)"),
|
|
3537
|
+
probeInstalled: z
|
|
3538
|
+
.boolean()
|
|
3539
|
+
.default(false)
|
|
3540
|
+
.describe("When true, run local --help probes and compare advertised flags"),
|
|
3541
|
+
}, async ({ cli, probeInstalled }) => {
|
|
3542
|
+
const report = buildUpstreamContractReport({ cli, probeInstalled });
|
|
3543
|
+
return { content: [{ type: "text", text: JSON.stringify(report, null, 2) }] };
|
|
3544
|
+
});
|
|
3473
3545
|
server.tool("cli_upgrade", {
|
|
3474
3546
|
cli: z.enum(["claude", "codex", "gemini", "grok", "mistral"]).describe("CLI to upgrade"),
|
|
3475
3547
|
target: z
|
|
@@ -3849,6 +3921,23 @@ async function main() {
|
|
|
3849
3921
|
process.stderr.write("Only doctor --json is supported in this layer.\n");
|
|
3850
3922
|
process.exit(2);
|
|
3851
3923
|
}
|
|
3924
|
+
if (args[0] === "contracts") {
|
|
3925
|
+
if (args.includes("--json")) {
|
|
3926
|
+
const cliArg = args.find(arg => arg.startsWith("--cli="))?.split("=")[1];
|
|
3927
|
+
const cli = SESSION_PROVIDER_VALUES.includes(cliArg)
|
|
3928
|
+
? cliArg
|
|
3929
|
+
: undefined;
|
|
3930
|
+
if (cliArg && !cli) {
|
|
3931
|
+
process.stderr.write(`Unsupported --cli value: ${cliArg}\n`);
|
|
3932
|
+
process.exit(2);
|
|
3933
|
+
}
|
|
3934
|
+
const probeInstalled = args.includes("--probe-installed");
|
|
3935
|
+
process.stdout.write(JSON.stringify(buildUpstreamContractReport({ cli, probeInstalled }), null, 2) + "\n");
|
|
3936
|
+
return;
|
|
3937
|
+
}
|
|
3938
|
+
process.stderr.write("Usage: llm-cli-gateway contracts --json [--cli=claude|codex|gemini|grok|mistral] [--probe-installed]\n");
|
|
3939
|
+
process.exit(2);
|
|
3940
|
+
}
|
|
3852
3941
|
const transportArg = args.find(arg => arg.startsWith("--transport="));
|
|
3853
3942
|
const transportMode = transportArg?.split("=")[1] ||
|
|
3854
3943
|
process.env.LLM_GATEWAY_TRANSPORT ||
|
|
@@ -3890,7 +3979,7 @@ async function main() {
|
|
|
3890
3979
|
}
|
|
3891
3980
|
// Guard: only auto-start when run directly (not imported for testing)
|
|
3892
3981
|
// Resolve symlinks so `llm-cli-gateway` (npm-linked bin) matches import.meta.url
|
|
3893
|
-
const __entryUrl =
|
|
3982
|
+
const __entryUrl = entrypointFileURL(process.argv[1]);
|
|
3894
3983
|
if (__entryUrl === import.meta.url) {
|
|
3895
3984
|
main().catch(error => {
|
|
3896
3985
|
logger.error("Fatal server error:", error);
|