llm-cli-gateway 2.9.0 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/CHANGELOG.md +92 -0
  2. package/README.md +7 -5
  3. package/dist/acp/event-normalizer.d.ts +42 -0
  4. package/dist/acp/event-normalizer.js +71 -0
  5. package/dist/acp/flight-redaction.d.ts +25 -0
  6. package/dist/acp/flight-redaction.js +40 -0
  7. package/dist/acp/host-services.d.ts +16 -0
  8. package/dist/acp/host-services.js +29 -0
  9. package/dist/acp/permission-bridge.d.ts +15 -0
  10. package/dist/acp/permission-bridge.js +90 -0
  11. package/dist/acp/process-manager.js +7 -1
  12. package/dist/acp/provider-registry.d.ts +1 -1
  13. package/dist/acp/provider-registry.js +13 -0
  14. package/dist/acp/runtime.d.ts +35 -0
  15. package/dist/acp/runtime.js +125 -0
  16. package/dist/acp/session-map.d.ts +42 -0
  17. package/dist/acp/session-map.js +67 -0
  18. package/dist/acp/smoke-harness.d.ts +28 -0
  19. package/dist/acp/smoke-harness.js +90 -0
  20. package/dist/api-http.d.ts +18 -0
  21. package/dist/api-http.js +122 -0
  22. package/dist/api-provider.d.ts +83 -0
  23. package/dist/api-provider.js +258 -0
  24. package/dist/api-request.d.ts +30 -0
  25. package/dist/api-request.js +51 -0
  26. package/dist/approval-manager.d.ts +1 -1
  27. package/dist/approval-manager.js +6 -7
  28. package/dist/async-job-manager.d.ts +19 -4
  29. package/dist/async-job-manager.js +211 -35
  30. package/dist/claude-mcp-config.d.ts +2 -2
  31. package/dist/claude-mcp-config.js +42 -52
  32. package/dist/cli-updater.js +16 -1
  33. package/dist/config.d.ts +20 -0
  34. package/dist/config.js +93 -35
  35. package/dist/doctor.d.ts +1 -1
  36. package/dist/flight-recorder.d.ts +1 -0
  37. package/dist/flight-recorder.js +11 -0
  38. package/dist/index.d.ts +56 -5
  39. package/dist/index.js +670 -48
  40. package/dist/job-store.d.ts +15 -0
  41. package/dist/job-store.js +39 -5
  42. package/dist/mcp-registry.d.ts +17 -0
  43. package/dist/mcp-registry.js +5 -0
  44. package/dist/metrics.js +7 -2
  45. package/dist/model-registry.js +11 -0
  46. package/dist/prompt-parts.d.ts +6 -6
  47. package/dist/provider-login-guidance.js +21 -0
  48. package/dist/provider-status.js +4 -1
  49. package/dist/provider-tool-capabilities.d.ts +4 -3
  50. package/dist/provider-tool-capabilities.js +93 -6
  51. package/dist/request-helpers.d.ts +6 -6
  52. package/dist/request-helpers.js +1 -4
  53. package/dist/resources.d.ts +2 -0
  54. package/dist/resources.js +24 -15
  55. package/dist/session-manager-pg.js +2 -9
  56. package/dist/session-manager.d.ts +9 -4
  57. package/dist/session-manager.js +13 -4
  58. package/dist/upstream-contracts.js +112 -2
  59. package/dist/validation-normalizer.d.ts +2 -2
  60. package/dist/validation-orchestrator.d.ts +2 -0
  61. package/dist/validation-orchestrator.js +28 -7
  62. package/dist/validation-tools.d.ts +61 -0
  63. package/dist/validation-tools.js +36 -21
  64. package/migrations/005_provider_type_open_api_names.sql +28 -0
  65. package/npm-shrinkwrap.json +6 -5
  66. package/package.json +12 -9
package/dist/index.js CHANGED
@@ -18,7 +18,11 @@ import { createWorktree, createWorktreeSessionCleanupHook, } from "./worktree-ma
18
18
  import { ResourceProvider } from "./resources.js";
19
19
  import { PerformanceMetrics } from "./metrics.js";
20
20
  import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse as optimizeResponseText, } from "./optimizer.js";
21
- import { loadConfig, loadPersistenceConfig, loadCacheAwarenessConfig, loadProvidersConfig, defaultGatewayConfigPath, isXaiProviderEnabled, minStableTokensForModel, } from "./config.js";
21
+ import { loadConfig, loadPersistenceConfig, loadCacheAwarenessConfig, loadProvidersConfig, loadAcpConfig, defaultGatewayConfigPath, isXaiProviderEnabled, enabledApiProviders, minStableTokensForModel, } from "./config.js";
22
+ import { runAcpRequest } from "./acp/runtime.js";
23
+ import { isAcpError } from "./acp/errors.js";
24
+ import { createApiProvider, runApiRequest, apiProviderBreakerState, } from "./api-provider.js";
25
+ import { prepareApiRequest, apiProviderCatalogEntry, ApiModelNotAllowedError, } from "./api-request.js";
22
26
  import { createXaiResponse, XaiApiError, } from "./xai-api-provider.js";
23
27
  import { checkHealth } from "./health.js";
24
28
  import { clearModelRegistryCache, getAvailableCliInfo, getCliInfo, resolveModelAlias, } from "./model-registry.js";
@@ -28,7 +32,7 @@ import { createJobStore } from "./job-store.js";
28
32
  import { ApprovalManager, bypassAllowedByOperator, } from "./approval-manager.js";
29
33
  import { checkReviewIntegrity } from "./review-integrity.js";
30
34
  import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES, } from "./claude-mcp-config.js";
31
- import { resolveGrokSessionArgs, resolveMistralSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, prepareMistralRequest as buildMistralCliInvocation, MISTRAL_AGENT_MODES, GATEWAY_SESSION_PREFIX, resolveClaudePermissionFlags, resolveCodexSandboxFlags, CLAUDE_PERMISSION_MODES, GEMINI_APPROVAL_MODES, CODEX_SANDBOX_MODES, CODEX_ASK_FOR_APPROVAL_MODES, CLAUDE_EFFORT_LEVELS, prepareClaudeHighImpactFlags, validateClaudeAgentsMap, prepareCodexHighImpactFlags, prepareCodexForkRequest, CODEX_CONFIG_OVERRIDES_SCHEMA, resolveGeminiSessionPlan, GEMINI_HIGH_IMPACT_PARAMS_SCHEMA, } from "./request-helpers.js";
35
+ import { resolveGrokSessionArgs, resolveMistralSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, prepareMistralRequest as buildMistralCliInvocation, GATEWAY_SESSION_PREFIX, resolveClaudePermissionFlags, resolveCodexSandboxFlags, CLAUDE_PERMISSION_MODES, GEMINI_APPROVAL_MODES, CODEX_SANDBOX_MODES, CODEX_ASK_FOR_APPROVAL_MODES, CLAUDE_EFFORT_LEVELS, prepareClaudeHighImpactFlags, validateClaudeAgentsMap, prepareCodexHighImpactFlags, prepareCodexForkRequest, CODEX_CONFIG_OVERRIDES_SCHEMA, resolveGeminiSessionPlan, GEMINI_HIGH_IMPACT_PARAMS_SCHEMA, } from "./request-helpers.js";
32
36
  import { createFlightRecorder } from "./flight-recorder.js";
33
37
  import { resolvePromptInput, PromptPartsSchema, assembleClaudeCacheBlocks, } from "./prompt-parts.js";
34
38
  import { computeSessionCacheStats, computeTtlRemaining, readPersistedRequest, PERSISTED_REQUEST_DEFAULT_MAX_CHARS, } from "./cache-stats.js";
@@ -183,6 +187,7 @@ let flightRecorder = null;
183
187
  let persistenceConfig = null;
184
188
  let cacheAwarenessConfig = null;
185
189
  let providersConfig = null;
190
+ let acpConfig = null;
186
191
  let jobStore = null;
187
192
  let jobStoreInitialized = false;
188
193
  let asyncJobManager = null;
@@ -195,6 +200,10 @@ function getPersistenceConfig(runtimeLogger = logger) {
195
200
  persistenceConfig ??= loadPersistenceConfig(runtimeLogger);
196
201
  return persistenceConfig;
197
202
  }
203
+ function getAcpConfig(runtimeLogger = logger) {
204
+ acpConfig ??= loadAcpConfig(runtimeLogger);
205
+ return acpConfig;
206
+ }
198
207
  function getCacheAwarenessConfig(runtimeLogger = logger) {
199
208
  cacheAwarenessConfig ??= loadCacheAwarenessConfig(runtimeLogger);
200
209
  return cacheAwarenessConfig;
@@ -229,7 +238,11 @@ function getApprovalManager(runtimeLogger = logger) {
229
238
  approvalManager ??= new ApprovalManager(undefined, runtimeLogger);
230
239
  return approvalManager;
231
240
  }
232
- const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
241
+ function mcpServerEnum() {
242
+ return CLAUDE_MCP_SERVER_NAMES.length > 0
243
+ ? z.enum(CLAUDE_MCP_SERVER_NAMES)
244
+ : z.string();
245
+ }
233
246
  const CLI_TYPE_ENUM = z.enum(CLI_TYPES);
234
247
  export const MAX_TURNS_SCHEMA = z.number().int().positive().safe().max(10_000);
235
248
  const GROK_GENERATED_SHAPE = deriveZodShapeFromGeneration(UPSTREAM_CLI_CONTRACTS.grok, GROK_FLAG_GENERATION);
@@ -299,6 +312,7 @@ export function resolveGatewayServerRuntime(deps = {}, options = {}) {
299
312
  persistence: deps.persistence ?? getPersistenceConfig(runtimeLogger),
300
313
  cacheAwareness: deps.cacheAwareness ?? getCacheAwarenessConfig(runtimeLogger),
301
314
  providers: deps.providers ?? getProvidersConfig(runtimeLogger),
315
+ acpConfig: deps.acpConfig ?? getAcpConfig(runtimeLogger),
302
316
  workspaces: deps.workspaces ?? loadWorkspaceRegistry(runtimeLogger),
303
317
  };
304
318
  }
@@ -317,6 +331,44 @@ function resolveIdleTimeout(cli, override) {
317
331
  return override;
318
332
  return CLI_IDLE_TIMEOUTS[cli];
319
333
  }
334
+ export async function runAcpTransport(deps, params) {
335
+ const runtime = resolveHandlerRuntime(deps);
336
+ const operation = `${params.provider}_request`;
337
+ const corrId = params.correlationId ?? randomUUID();
338
+ const prompt = (params.prompt ?? "").trim();
339
+ if (!prompt) {
340
+ return createErrorResponse(operation, 1, "prompt is required and cannot be empty", corrId);
341
+ }
342
+ try {
343
+ const result = await runAcpRequest({
344
+ config: runtime.acpConfig,
345
+ sessionManager: runtime.sessionManager,
346
+ approvalManager: runtime.approvalManager,
347
+ flightRecorder: runtime.flightRecorder,
348
+ logger: runtime.logger,
349
+ }, {
350
+ provider: params.provider,
351
+ prompt,
352
+ model: params.model,
353
+ sessionId: params.sessionId,
354
+ correlationId: corrId,
355
+ });
356
+ return {
357
+ content: [
358
+ {
359
+ type: "text",
360
+ text: `[gateway] transport=acp session=${result.gatewaySessionId}\n${result.text}`,
361
+ },
362
+ ],
363
+ };
364
+ }
365
+ catch (err) {
366
+ if (isAcpError(err)) {
367
+ return createErrorResponse(operation, 1, err.userMessage, corrId);
368
+ }
369
+ return createErrorResponse(operation, 1, "", corrId, err);
370
+ }
371
+ }
320
372
  const SYNC_POLL_INTERVAL_MS = 1_000;
321
373
  async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete, flightRecorderEntry, extractUsage, stdin, cwd) {
322
374
  let onCompleteOwnedByCaller = onComplete !== undefined;
@@ -406,6 +458,79 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, f
406
458
  message: `Execution exceeded sync deadline (${SYNC_DEADLINE_MS}ms). Poll with llm_job_status, collect with llm_job_result.`,
407
459
  };
408
460
  }
461
+ async function awaitApiJobOrDefer(provider, apiRequest, corrId, runtime = resolveGatewayServerRuntime(), onComplete, flightRecorderEntry, extractUsage) {
462
+ let onCompleteOwnedByCaller = onComplete !== undefined;
463
+ const consumeOnComplete = () => {
464
+ if (!onCompleteOwnedByCaller || !onComplete)
465
+ return;
466
+ onCompleteOwnedByCaller = false;
467
+ try {
468
+ onComplete();
469
+ }
470
+ catch (err) {
471
+ runtime.logger.error(`awaitApiJobOrDefer onComplete (${provider.name}) threw`, err);
472
+ }
473
+ };
474
+ const deferralAvailable = runtime.persistence.backend !== "none" &&
475
+ runtime.persistence.asyncJobsEnabled &&
476
+ runtime.asyncJobManager.hasStore();
477
+ if (SYNC_DEADLINE_MS === 0 || !deferralAvailable) {
478
+ try {
479
+ const result = await runApiRequest(provider, apiRequest, runtime.logger);
480
+ return { stdout: result.text, stderr: "", code: 0 };
481
+ }
482
+ catch (err) {
483
+ return { stdout: "", stderr: err.message, code: 1 };
484
+ }
485
+ finally {
486
+ consumeOnComplete();
487
+ }
488
+ }
489
+ let outcome;
490
+ try {
491
+ outcome = runtime.asyncJobManager.startHttpJob({
492
+ provider,
493
+ apiRequest,
494
+ correlationId: corrId,
495
+ onComplete,
496
+ flightRecorderEntry,
497
+ extractUsage,
498
+ });
499
+ onCompleteOwnedByCaller = false;
500
+ }
501
+ catch (err) {
502
+ consumeOnComplete();
503
+ throw err;
504
+ }
505
+ const job = outcome.snapshot;
506
+ if (outcome.deduped) {
507
+ runtime.logger.info(`[${corrId}] api request deduped onto job ${job.id} (original corrId=${outcome.originalCorrelationId})`);
508
+ }
509
+ const deadline = Date.now() + SYNC_DEADLINE_MS;
510
+ while (Date.now() < deadline) {
511
+ const snapshot = runtime.asyncJobManager.getJobSnapshot(job.id);
512
+ if (snapshot && snapshot.status !== "running") {
513
+ const result = runtime.asyncJobManager.getJobResult(job.id);
514
+ if (!result)
515
+ return { stdout: "", stderr: "Job result unavailable", code: 1 };
516
+ return {
517
+ stdout: result.stdout,
518
+ stderr: result.stderr || result.error || "",
519
+ code: result.exitCode ?? 1,
520
+ };
521
+ }
522
+ await new Promise(resolve => setTimeout(resolve, SYNC_POLL_INTERVAL_MS));
523
+ }
524
+ runtime.asyncJobManager.armFlightCompleteForDeferral(job.id);
525
+ runtime.logger.info(`[${corrId}] ${provider.name} sync deadline exceeded (${SYNC_DEADLINE_MS}ms), deferring to async job ${job.id}`);
526
+ return {
527
+ deferred: true,
528
+ jobId: job.id,
529
+ cli: provider.name,
530
+ correlationId: corrId,
531
+ message: `Execution exceeded sync deadline (${SYNC_DEADLINE_MS}ms). Poll with llm_job_status, collect with llm_job_result.`,
532
+ };
533
+ }
409
534
  function isDeferredResponse(result) {
410
535
  return "deferred" in result && result.deferred === true;
411
536
  }
@@ -434,7 +559,7 @@ export async function resolveWorktreeForRequest(worktreeOpt, sessionId, runtime,
434
559
  return {};
435
560
  const sessionManager = runtime.sessionManager;
436
561
  if (sessionId) {
437
- const session = await Promise.resolve(sessionManager.getSession(sessionId));
562
+ const session = await getCallerOwnedSession(sessionManager, sessionId);
438
563
  const existingPath = session?.metadata?.worktreePath;
439
564
  if (typeof existingPath === "string" && existingPath.length > 0) {
440
565
  return {
@@ -477,9 +602,7 @@ function isGatewayAppDirCwd() {
477
602
  return process.cwd() === join(homedir(), ".llm-cli-gateway");
478
603
  }
479
604
  async function resolveWorkspaceAndWorktreeForRequest(args) {
480
- const session = args.sessionId
481
- ? await Promise.resolve(args.runtime.sessionManager.getSession(args.sessionId))
482
- : null;
605
+ const session = await getCallerOwnedSession(args.runtime.sessionManager, args.sessionId);
483
606
  let workspace;
484
607
  if (args.workspace ||
485
608
  args.runtime.workspaces.defaultAlias ||
@@ -797,7 +920,7 @@ function createApprovalDeniedResponse(operation, decision) {
797
920
  }
798
921
  function normalizeMcpServers(mcpServers) {
799
922
  if (!mcpServers || mcpServers.length === 0) {
800
- return ["sqry"];
923
+ return [];
801
924
  }
802
925
  return [...new Set(mcpServers)];
803
926
  }
@@ -1554,9 +1677,6 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
1554
1677
  if (params.allowedTools && params.allowedTools.length > 0) {
1555
1678
  return unsupported("allowedTools", "agy has no non-interactive allowed-tools flag");
1556
1679
  }
1557
- if (requestedMcpServers.length > 0) {
1558
- return unsupported("mcpServers", "agy has no non-interactive allowed MCP server allowlist flag");
1559
- }
1560
1680
  if (params.outputFormat && params.outputFormat !== "text") {
1561
1681
  return unsupported("outputFormat", "agy print mode currently emits text only");
1562
1682
  }
@@ -1972,11 +2092,33 @@ function usageFromXaiResult(result) {
1972
2092
  costUsd: result.usage.costUsd,
1973
2093
  };
1974
2094
  }
2095
+ function callerCanAccessSession(session) {
2096
+ return principalCanAccess(session.ownerPrincipal, resolveOwnerPrincipal(getRequestContext()));
2097
+ }
2098
+ async function getCallerOwnedSession(sessionManager, sessionId) {
2099
+ if (!sessionId)
2100
+ return null;
2101
+ const existing = await Promise.resolve(sessionManager.getSession(sessionId));
2102
+ if (!existing || !callerCanAccessSession(existing))
2103
+ return null;
2104
+ return existing;
2105
+ }
2106
+ async function getCallerOwnedActiveSession(sessionManager, provider) {
2107
+ const active = await Promise.resolve(sessionManager.getActiveSession(provider));
2108
+ if (!active || !callerCanAccessSession(active))
2109
+ return null;
2110
+ return active;
2111
+ }
1975
2112
  async function getExistingSessionForProvider(sessionManager, sessionId, provider) {
1976
2113
  if (!sessionId)
1977
2114
  return null;
1978
2115
  const existing = await sessionManager.getSession(sessionId);
1979
- if (existing && existing.cli !== provider) {
2116
+ if (!existing)
2117
+ return null;
2118
+ if (!callerCanAccessSession(existing)) {
2119
+ throw new Error(`Session ${sessionId} is not accessible`);
2120
+ }
2121
+ if (existing.cli !== provider) {
1980
2122
  throw new Error(`Session ${sessionId} belongs to provider '${existing.cli}', not '${provider}'`);
1981
2123
  }
1982
2124
  return existing;
@@ -2029,7 +2171,7 @@ async function resolveGrokApiSession(params, runtime) {
2029
2171
  return { sessionId: session.id, previousResponseId: previous };
2030
2172
  }
2031
2173
  if (!params.createNewSession) {
2032
- const active = await runtime.sessionManager.getActiveSession("grok-api");
2174
+ const active = await getCallerOwnedActiveSession(runtime.sessionManager, "grok-api");
2033
2175
  if (active) {
2034
2176
  const previous = typeof active.metadata?.xaiPreviousResponseId === "string"
2035
2177
  ? active.metadata.xaiPreviousResponseId
@@ -2162,6 +2304,120 @@ export async function handleGrokApiRequest(deps, params) {
2162
2304
  runtime.performanceMetrics.recordRequest("grok-api", durationMs || Math.max(0, Date.now() - startTime), wasSuccessful);
2163
2305
  }
2164
2306
  }
2307
+ function buildApiProviderCall(providerRuntime, params) {
2308
+ const apiRequest = prepareApiRequest(providerRuntime, {
2309
+ prompt: params.prompt ?? "",
2310
+ system: params.system,
2311
+ model: params.model,
2312
+ maxOutputTokens: params.maxOutputTokens,
2313
+ temperature: params.temperature,
2314
+ topP: params.topP,
2315
+ reasoningEffort: params.reasoningEffort,
2316
+ timeoutMs: params.timeoutMs,
2317
+ });
2318
+ const provider = createApiProvider(providerRuntime.name, providerRuntime.kind);
2319
+ return { provider, apiRequest };
2320
+ }
2321
+ function buildApiSuccessResponse(text, corrId, providerName) {
2322
+ return {
2323
+ content: [{ type: "text", text }],
2324
+ structuredContent: {
2325
+ response: text,
2326
+ correlationId: corrId,
2327
+ cli: providerName,
2328
+ exitCode: 0,
2329
+ },
2330
+ };
2331
+ }
2332
+ export async function handleApiProviderRequest(runtimeArg, providerRuntime, params) {
2333
+ const toolName = `api_${providerRuntime.name}_request`;
2334
+ const corrId = params.correlationId ?? randomUUID();
2335
+ const startTime = Date.now();
2336
+ let wasSuccessful = false;
2337
+ try {
2338
+ if (!params.prompt || params.prompt.trim().length === 0) {
2339
+ return createErrorResponse(toolName, 1, "prompt is required and cannot be empty", corrId);
2340
+ }
2341
+ const { provider, apiRequest } = buildApiProviderCall(providerRuntime, params);
2342
+ const result = await awaitApiJobOrDefer(provider, apiRequest, corrId, runtimeArg);
2343
+ if (isDeferredResponse(result))
2344
+ return buildDeferredToolResponse(result);
2345
+ if (result.code !== 0) {
2346
+ return createErrorResponse(toolName, result.code, result.stderr, corrId);
2347
+ }
2348
+ wasSuccessful = true;
2349
+ return buildApiSuccessResponse(result.stdout, corrId, providerRuntime.name);
2350
+ }
2351
+ catch (err) {
2352
+ if (err instanceof ApiModelNotAllowedError) {
2353
+ return createErrorResponse(toolName, 1, err.message, corrId, err);
2354
+ }
2355
+ return createErrorResponse(toolName, 1, "", corrId, err);
2356
+ }
2357
+ finally {
2358
+ runtimeArg.performanceMetrics.recordRequest(providerRuntime.name, Math.max(0, Date.now() - startTime), wasSuccessful);
2359
+ }
2360
+ }
2361
+ export function handleApiProviderRequestAsync(runtimeArg, providerRuntime, params) {
2362
+ const toolName = `api_${providerRuntime.name}_request_async`;
2363
+ const corrId = params.correlationId ?? randomUUID();
2364
+ try {
2365
+ if (!params.prompt || params.prompt.trim().length === 0) {
2366
+ return createErrorResponse(toolName, 1, "prompt is required and cannot be empty", corrId);
2367
+ }
2368
+ const { provider, apiRequest } = buildApiProviderCall(providerRuntime, params);
2369
+ const outcome = runtimeArg.asyncJobManager.startHttpJob({
2370
+ provider,
2371
+ apiRequest,
2372
+ correlationId: corrId,
2373
+ writeFlightStart: true,
2374
+ });
2375
+ return buildDeferredToolResponse({
2376
+ deferred: true,
2377
+ jobId: outcome.snapshot.id,
2378
+ cli: providerRuntime.name,
2379
+ correlationId: corrId,
2380
+ message: outcome.deduped
2381
+ ? `Deduped onto existing job ${outcome.snapshot.id}. Poll with llm_job_status.`
2382
+ : `Started async job ${outcome.snapshot.id}. Poll with llm_job_status, collect with llm_job_result.`,
2383
+ });
2384
+ }
2385
+ catch (err) {
2386
+ if (err instanceof ApiModelNotAllowedError) {
2387
+ return createErrorResponse(toolName, 1, err.message, corrId, err);
2388
+ }
2389
+ return createErrorResponse(toolName, 1, "", corrId, err);
2390
+ }
2391
+ }
2392
+ const ApiReasoningEffortSchema = z.enum(["none", "low", "medium", "high"]);
2393
+ export function registerApiProviderTools(server, runtime, providers, asyncJobsEnabled) {
2394
+ const registered = [];
2395
+ const inputSchema = {
2396
+ prompt: z.string().min(1).max(100000).optional().describe("Prompt text for the API provider"),
2397
+ system: z.string().max(100000).optional().describe("Optional system instruction"),
2398
+ model: z
2399
+ .string()
2400
+ .min(1)
2401
+ .optional()
2402
+ .describe("Model id; defaults to the provider default_model"),
2403
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
2404
+ maxOutputTokens: z.number().int().positive().max(100000000).optional(),
2405
+ temperature: z.number().finite().min(0).max(2).optional(),
2406
+ topP: z.number().finite().min(0).max(1).optional(),
2407
+ reasoningEffort: ApiReasoningEffortSchema.optional(),
2408
+ timeoutMs: z.number().int().min(30_000).max(3_600_000).optional(),
2409
+ };
2410
+ for (const providerRuntime of enabledApiProviders(providers)) {
2411
+ const name = providerRuntime.name;
2412
+ server.tool(`api_${name}_request`, `Run a request against the "${name}" API provider (kind: ${providerRuntime.kind}) synchronously. Registered only when [providers.${name}] is configured and enabled.`, inputSchema, { title: `${name} API request`, readOnlyHint: false, openWorldHint: true }, async (params) => handleApiProviderRequest(runtime, providerRuntime, params));
2413
+ registered.push(`api_${name}_request`);
2414
+ if (asyncJobsEnabled) {
2415
+ server.tool(`api_${name}_request_async`, `Start an async request against the "${name}" API provider; returns a jobId to poll with llm_job_status.`, inputSchema, { title: `${name} API request (async)`, readOnlyHint: false, openWorldHint: true }, async (params) => handleApiProviderRequestAsync(runtime, providerRuntime, params));
2416
+ registered.push(`api_${name}_request_async`);
2417
+ }
2418
+ }
2419
+ return registered;
2420
+ }
2165
2421
  function maybeBuildCacheTtlWarning(args) {
2166
2422
  if (args.cli !== "claude")
2167
2423
  return null;
@@ -2447,6 +2703,15 @@ export async function handleGeminiRequestAsync(deps, params) {
2447
2703
  }
2448
2704
  }
2449
2705
  export async function handleGrokRequest(deps, params) {
2706
+ if (params.transport === "acp") {
2707
+ return runAcpTransport(deps, {
2708
+ provider: "grok",
2709
+ prompt: params.prompt,
2710
+ model: params.model,
2711
+ sessionId: params.sessionId,
2712
+ correlationId: params.correlationId,
2713
+ });
2714
+ }
2450
2715
  const runtime = resolveHandlerRuntime(deps);
2451
2716
  const startTime = Date.now();
2452
2717
  const prep = prepareGrokRequest({
@@ -2741,7 +3006,222 @@ export async function handleGrokRequestAsync(deps, params) {
2741
3006
  return createErrorResponse("grok_request_async", 1, "", corrId, error);
2742
3007
  }
2743
3008
  }
3009
+ export function prepareDevinRequest(params, _runtime) {
3010
+ const corrId = params.correlationId ?? randomUUID();
3011
+ let prompt = (params.prompt ?? "").trim();
3012
+ if (!prompt) {
3013
+ return createErrorResponse(params.operation, 1, "prompt is required and cannot be empty", corrId);
3014
+ }
3015
+ if (params.optimizePrompt)
3016
+ prompt = optimizePromptText(prompt);
3017
+ const resolvedModel = resolveModelAlias("devin", params.model, getCliInfo());
3018
+ const args = ["-p", prompt];
3019
+ if (resolvedModel)
3020
+ args.push("--model", resolvedModel);
3021
+ if (params.permissionMode)
3022
+ args.push("--permission-mode", params.permissionMode);
3023
+ if (params.promptFile)
3024
+ args.push("--prompt-file", params.promptFile);
3025
+ return {
3026
+ corrId,
3027
+ effectivePrompt: prompt,
3028
+ resolvedModel,
3029
+ requestedMcpServers: [],
3030
+ approvalDecision: null,
3031
+ args,
3032
+ stablePrefixHash: null,
3033
+ stablePrefixTokens: null,
3034
+ };
3035
+ }
3036
+ export async function handleDevinRequest(deps, params) {
3037
+ if (params.transport === "acp") {
3038
+ return runAcpTransport(deps, {
3039
+ provider: "devin",
3040
+ prompt: params.prompt,
3041
+ model: params.model,
3042
+ sessionId: params.sessionId,
3043
+ correlationId: params.correlationId,
3044
+ });
3045
+ }
3046
+ const runtime = resolveHandlerRuntime(deps);
3047
+ const startTime = Date.now();
3048
+ const prep = prepareDevinRequest({
3049
+ prompt: params.prompt,
3050
+ model: params.model,
3051
+ permissionMode: params.permissionMode,
3052
+ promptFile: params.promptFile,
3053
+ correlationId: params.correlationId,
3054
+ optimizePrompt: params.optimizePrompt,
3055
+ operation: "devin_request",
3056
+ }, runtime);
3057
+ if (!("args" in prep))
3058
+ return prep;
3059
+ const { corrId, args } = prep;
3060
+ let durationMs = 0;
3061
+ let wasSuccessful = false;
3062
+ safeFlightStart({
3063
+ correlationId: corrId,
3064
+ cli: "devin",
3065
+ model: prep.resolvedModel || "default",
3066
+ prompt: prep.effectivePrompt,
3067
+ sessionId: params.sessionId,
3068
+ }, runtime);
3069
+ try {
3070
+ const sessionResult = resolveGrokSessionArgs({
3071
+ sessionId: params.sessionId,
3072
+ resumeLatest: params.resumeLatest,
3073
+ createNewSession: params.createNewSession,
3074
+ });
3075
+ if (sessionResult.userProvidedSession) {
3076
+ await getExistingSessionForProvider(deps.sessionManager, sessionResult.effectiveSessionId, "devin");
3077
+ }
3078
+ args.push(...sessionResult.resumeArgs);
3079
+ const devinFrHandoff = buildAsyncFlightRecorderHandoff("devin", prep, params.sessionId, undefined);
3080
+ const result = await awaitJobOrDefer("devin", args, corrId, resolveIdleTimeout("devin", params.idleTimeoutMs), undefined, params.forceRefresh, runtime, undefined, undefined, devinFrHandoff.flightRecorderEntry, devinFrHandoff.extractUsage);
3081
+ if (isDeferredResponse(result)) {
3082
+ return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
3083
+ }
3084
+ const { stdout, stderr, code } = result;
3085
+ durationMs = Math.max(0, Date.now() - startTime);
3086
+ if (code !== 0) {
3087
+ safeFlightComplete(corrId, {
3088
+ response: stderr || "",
3089
+ durationMs,
3090
+ retryCount: 0,
3091
+ circuitBreakerState: "closed",
3092
+ optimizationApplied: false,
3093
+ exitCode: code,
3094
+ errorMessage: stderr || `Exit code ${code}`,
3095
+ status: "failed",
3096
+ }, runtime);
3097
+ return createErrorResponse("devin", code, stderr, corrId);
3098
+ }
3099
+ wasSuccessful = true;
3100
+ let effectiveSessionId = sessionResult.effectiveSessionId;
3101
+ if (sessionResult.userProvidedSession && effectiveSessionId) {
3102
+ const existing = await deps.sessionManager.getSession(effectiveSessionId);
3103
+ if (!existing) {
3104
+ try {
3105
+ await deps.sessionManager.createSession("devin", "Devin Session", effectiveSessionId);
3106
+ }
3107
+ catch {
3108
+ const rechecked = await deps.sessionManager.getSession(effectiveSessionId);
3109
+ if (!rechecked)
3110
+ throw new Error(`Failed to create or find session ${effectiveSessionId}`);
3111
+ }
3112
+ }
3113
+ await deps.sessionManager.updateSessionUsage(effectiveSessionId);
3114
+ }
3115
+ else if (!params.createNewSession && !effectiveSessionId) {
3116
+ const newSession = await deps.sessionManager.createSession("devin", "Devin Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
3117
+ effectiveSessionId = newSession.id;
3118
+ }
3119
+ const response = buildCliResponse("devin", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession);
3120
+ safeFlightComplete(corrId, {
3121
+ response: stdout,
3122
+ durationMs,
3123
+ retryCount: 0,
3124
+ circuitBreakerState: "closed",
3125
+ optimizationApplied: params.optimizePrompt || (params.optimizeResponse ?? false),
3126
+ exitCode: 0,
3127
+ status: "completed",
3128
+ }, runtime);
3129
+ return response;
3130
+ }
3131
+ catch (error) {
3132
+ const elapsedMs = Math.max(0, Date.now() - startTime);
3133
+ safeFlightComplete(corrId, {
3134
+ response: "",
3135
+ durationMs: elapsedMs,
3136
+ retryCount: 0,
3137
+ circuitBreakerState: "closed",
3138
+ optimizationApplied: false,
3139
+ exitCode: 1,
3140
+ errorMessage: error.message,
3141
+ status: "failed",
3142
+ }, runtime);
3143
+ return createErrorResponse("devin", 1, "", corrId, error);
3144
+ }
3145
+ finally {
3146
+ runtime.performanceMetrics.recordRequest("devin", Math.max(0, durationMs || Date.now() - startTime), wasSuccessful);
3147
+ }
3148
+ }
3149
+ export async function handleDevinRequestAsync(deps, params) {
3150
+ const runtime = resolveHandlerRuntime(deps);
3151
+ const prep = prepareDevinRequest({
3152
+ prompt: params.prompt,
3153
+ model: params.model,
3154
+ permissionMode: params.permissionMode,
3155
+ promptFile: params.promptFile,
3156
+ correlationId: params.correlationId,
3157
+ optimizePrompt: params.optimizePrompt,
3158
+ operation: "devin_request_async",
3159
+ }, runtime);
3160
+ if (!("args" in prep))
3161
+ return prep;
3162
+ const { corrId, args } = prep;
3163
+ try {
3164
+ const sessionResult = resolveGrokSessionArgs({
3165
+ sessionId: params.sessionId,
3166
+ resumeLatest: params.resumeLatest,
3167
+ createNewSession: params.createNewSession,
3168
+ });
3169
+ if (sessionResult.userProvidedSession) {
3170
+ await getExistingSessionForProvider(deps.sessionManager, sessionResult.effectiveSessionId, "devin");
3171
+ }
3172
+ args.push(...sessionResult.resumeArgs);
3173
+ let effectiveSessionId = sessionResult.effectiveSessionId;
3174
+ if (sessionResult.userProvidedSession && effectiveSessionId) {
3175
+ const existing = await deps.sessionManager.getSession(effectiveSessionId);
3176
+ if (!existing) {
3177
+ try {
3178
+ await deps.sessionManager.createSession("devin", "Devin Session", effectiveSessionId);
3179
+ }
3180
+ catch {
3181
+ const rechecked = await deps.sessionManager.getSession(effectiveSessionId);
3182
+ if (!rechecked)
3183
+ throw new Error(`Failed to create or find session ${effectiveSessionId}`);
3184
+ }
3185
+ }
3186
+ await deps.sessionManager.updateSessionUsage(effectiveSessionId);
3187
+ }
3188
+ else if (!params.createNewSession && !effectiveSessionId) {
3189
+ const newSession = await deps.sessionManager.createSession("devin", "Devin Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
3190
+ effectiveSessionId = newSession.id;
3191
+ }
3192
+ assertUpstreamCliArgs("devin", args);
3193
+ assertUpstreamCliEnv("devin", undefined);
3194
+ const devinAsyncFrHandoff = buildAsyncFlightRecorderHandoff("devin", prep, effectiveSessionId, undefined);
3195
+ const job = deps.asyncJobManager.startJob("devin", args, corrId, undefined, resolveIdleTimeout("devin", params.idleTimeoutMs), undefined, params.forceRefresh, undefined, undefined, devinAsyncFrHandoff.flightRecorderEntry, devinAsyncFrHandoff.extractUsage, true);
3196
+ deps.logger.info(`[${corrId}] devin_request_async started job ${job.id}`);
3197
+ return {
3198
+ content: [
3199
+ {
3200
+ type: "text",
3201
+ text: JSON.stringify({
3202
+ success: true,
3203
+ job,
3204
+ sessionId: effectiveSessionId || null,
3205
+ resumable: sessionResult.userProvidedSession,
3206
+ }, null, 2),
3207
+ },
3208
+ ],
3209
+ };
3210
+ }
3211
+ catch (error) {
3212
+ return createErrorResponse("devin_request_async", 1, "", corrId, error);
3213
+ }
3214
+ }
2744
3215
  export async function handleMistralRequest(deps, params) {
3216
+ if (params.transport === "acp") {
3217
+ return runAcpTransport(deps, {
3218
+ provider: "mistral",
3219
+ prompt: params.prompt,
3220
+ model: params.model,
3221
+ sessionId: params.sessionId,
3222
+ correlationId: params.correlationId,
3223
+ });
3224
+ }
2745
3225
  const runtime = resolveHandlerRuntime(deps);
2746
3226
  const startTime = Date.now();
2747
3227
  const prep = prepareMistralRequest({
@@ -3056,7 +3536,7 @@ export async function handleCodexRequestAsync(deps, params) {
3056
3536
  try {
3057
3537
  let effectiveSessionId = params.sessionId;
3058
3538
  if (!params.createNewSession && !params.sessionId) {
3059
- const activeSession = await deps.sessionManager.getActiveSession("codex");
3539
+ const activeSession = await getCallerOwnedActiveSession(deps.sessionManager, "codex");
3060
3540
  if (activeSession) {
3061
3541
  effectiveSessionId = activeSession.id;
3062
3542
  }
@@ -3066,6 +3546,7 @@ export async function handleCodexRequestAsync(deps, params) {
3066
3546
  }
3067
3547
  }
3068
3548
  else if (params.sessionId) {
3549
+ await getExistingSessionForProvider(deps.sessionManager, params.sessionId, "codex");
3069
3550
  await deps.sessionManager.updateSessionUsage(params.sessionId);
3070
3551
  }
3071
3552
  else if (params.createNewSession) {
@@ -3136,8 +3617,15 @@ export function createGatewayServer(deps = {}) {
3136
3617
  const asyncJobsEnabled = persistence.backend !== "none" && persistence.asyncJobsEnabled && asyncJobManager.hasStore();
3137
3618
  const server = newGatewayMcpServer(asyncJobsEnabled, grokApiToolsEnabled);
3138
3619
  registerBaseResources(server, runtime);
3139
- registerValidationTools(server, { asyncJobManager });
3620
+ registerValidationTools(server, {
3621
+ asyncJobManager,
3622
+ apiProviders: enabledApiProviders(providers),
3623
+ });
3140
3624
  registerWorkspaceTools(server, runtime);
3625
+ const apiProviderTools = registerApiProviderTools(server, runtime, providers, asyncJobsEnabled);
3626
+ if (apiProviderTools.length > 0) {
3627
+ runtime.logger.info(`Registered API provider tools: ${apiProviderTools.join(", ")}`);
3628
+ }
3141
3629
  if (grokApiToolsEnabled) {
3142
3630
  server.tool("grok_api_request", "Run an xAI Grok API request synchronously through the Responses API. Requires exactly one of prompt or promptParts. Registered only when [providers.xai] is configured and its API-key env var is present.", {
3143
3631
  prompt: z
@@ -3331,10 +3819,7 @@ export function createGatewayServer(deps = {}) {
3331
3819
  .enum(["strict", "balanced", "permissive"])
3332
3820
  .optional()
3333
3821
  .describe("Approval policy override"),
3334
- mcpServers: z
3335
- .array(MCP_SERVER_ENUM)
3336
- .default(["sqry"])
3337
- .describe("MCP servers exposed to Claude"),
3822
+ mcpServers: z.array(mcpServerEnum()).default([]).describe("MCP servers exposed to Claude"),
3338
3823
  strictMcpConfig: z
3339
3824
  .boolean()
3340
3825
  .default(false)
@@ -3406,7 +3891,7 @@ export function createGatewayServer(deps = {}) {
3406
3891
  let useContinue = continueSession;
3407
3892
  let activeSession = null;
3408
3893
  try {
3409
- activeSession = await sessionManager.getActiveSession("claude");
3894
+ activeSession = await getCallerOwnedActiveSession(sessionManager, "claude");
3410
3895
  }
3411
3896
  catch (err) {
3412
3897
  logger.warn(`[${corrId}] sessionManager.getActiveSession failed (non-fatal): ${err.message}`);
@@ -3605,8 +4090,8 @@ export function createGatewayServer(deps = {}) {
3605
4090
  .optional()
3606
4091
  .describe("Approval policy override"),
3607
4092
  mcpServers: z
3608
- .array(MCP_SERVER_ENUM)
3609
- .default(["sqry"])
4093
+ .array(mcpServerEnum())
4094
+ .default([])
3610
4095
  .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
3611
4096
  sessionId: z
3612
4097
  .string()
@@ -3775,7 +4260,7 @@ export function createGatewayServer(deps = {}) {
3775
4260
  wasSuccessful = true;
3776
4261
  let effectiveSessionId = sessionId;
3777
4262
  if (!createNewSession && !sessionId) {
3778
- const activeSession = await sessionManager.getActiveSession("codex");
4263
+ const activeSession = await getCallerOwnedActiveSession(sessionManager, "codex");
3779
4264
  if (activeSession) {
3780
4265
  effectiveSessionId = activeSession.id;
3781
4266
  }
@@ -3973,9 +4458,9 @@ export function createGatewayServer(deps = {}) {
3973
4458
  .optional()
3974
4459
  .describe("Approval policy override"),
3975
4460
  mcpServers: z
3976
- .array(MCP_SERVER_ENUM)
4461
+ .array(mcpServerEnum())
3977
4462
  .default([])
3978
- .describe("Unsupported for Antigravity CLI; non-empty values are rejected"),
4463
+ .describe("MCP server names accepted for approval tracking only; Antigravity manages its own MCP configuration."),
3979
4464
  allowedTools: z
3980
4465
  .array(z.string())
3981
4466
  .optional()
@@ -4061,6 +4546,10 @@ export function createGatewayServer(deps = {}) {
4061
4546
  .describe("Prompt text for Grok (mutually exclusive with promptParts)"),
4062
4547
  promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
4063
4548
  model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
4549
+ transport: z
4550
+ .enum(["cli", "acp"])
4551
+ .default("cli")
4552
+ .describe("Transport: 'cli' (default) runs the Grok CLI; 'acp' routes through `grok agent stdio` when [acp].enabled and the provider's runtime_enabled are set (fails closed otherwise)."),
4064
4553
  ...GROK_GENERATED_SHAPE,
4065
4554
  sessionId: z
4066
4555
  .string()
@@ -4088,8 +4577,8 @@ export function createGatewayServer(deps = {}) {
4088
4577
  .optional()
4089
4578
  .describe("Approval policy override"),
4090
4579
  mcpServers: z
4091
- .array(MCP_SERVER_ENUM)
4092
- .default(["sqry"])
4580
+ .array(mcpServerEnum())
4581
+ .default([])
4093
4582
  .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
4094
4583
  correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
4095
4584
  optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
@@ -4125,11 +4614,12 @@ export function createGatewayServer(deps = {}) {
4125
4614
  destructiveHint: true,
4126
4615
  idempotentHint: false,
4127
4616
  openWorldHint: true,
4128
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, compactionMode, compactionDetail, agent, bestOfN, check, disableWebSearch, todoGate, verbatim, agents, promptFile, promptJson, single, experimentalMemory, noAltScreen, noMemory, noPlan, noSubagents, oauth, restoreCode, leaderSocket, nativeWorktree, workspace, worktree, }) => {
4617
+ }, async ({ prompt, promptParts, model, transport, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, compactionMode, compactionDetail, agent, bestOfN, check, disableWebSearch, todoGate, verbatim, agents, promptFile, promptJson, single, experimentalMemory, noAltScreen, noMemory, noPlan, noSubagents, oauth, restoreCode, leaderSocket, nativeWorktree, workspace, worktree, }) => {
4129
4618
  return handleGrokRequest({ sessionManager, logger, runtime }, {
4130
4619
  prompt,
4131
4620
  promptParts,
4132
4621
  model,
4622
+ transport,
4133
4623
  outputFormat,
4134
4624
  sessionId,
4135
4625
  resumeLatest,
@@ -4180,6 +4670,72 @@ export function createGatewayServer(deps = {}) {
4180
4670
  worktree,
4181
4671
  });
4182
4672
  });
4673
+ server.tool("devin_request", "Run a Cognition Devin CLI request synchronously (auto-defers to a pollable job past the sync deadline when async jobs are enabled; otherwise runs to completion). Headless print mode (`devin -p`).", {
4674
+ prompt: z
4675
+ .string()
4676
+ .min(1, "Prompt cannot be empty")
4677
+ .max(100000, "Prompt too long (max 100k chars)")
4678
+ .optional()
4679
+ .describe("Prompt text for Devin CLI"),
4680
+ model: z.string().optional().describe("Model name or alias (e.g. opus, latest)"),
4681
+ transport: z
4682
+ .enum(["cli", "acp"])
4683
+ .default("cli")
4684
+ .describe("Transport: 'cli' (default) runs the Devin CLI; 'acp' routes through `devin acp` when [acp].enabled and the provider's runtime_enabled are set (fails closed otherwise)."),
4685
+ permissionMode: z
4686
+ .enum(["normal", "auto", "dangerous", "yolo", "bypass"])
4687
+ .optional()
4688
+ .describe("Devin CLI permission mode (--permission-mode). normal (alias auto) auto-approves read-only tools; dangerous (aliases yolo, bypass) auto-approves all."),
4689
+ promptFile: z
4690
+ .string()
4691
+ .optional()
4692
+ .describe("Load the initial prompt from a file (--prompt-file)"),
4693
+ sessionId: z
4694
+ .string()
4695
+ .optional()
4696
+ .describe("Devin session ID to resume (emits --resume <id>; use resumeLatest for --continue)"),
4697
+ resumeLatest: z
4698
+ .boolean()
4699
+ .default(false)
4700
+ .describe("Resume the most recent Devin session in cwd (--continue)"),
4701
+ createNewSession: z.boolean().default(false).describe("Force a new session"),
4702
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
4703
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
4704
+ optimizeResponse: z.boolean().default(false).describe("Optimize response output"),
4705
+ idleTimeoutMs: z
4706
+ .number()
4707
+ .int()
4708
+ .min(30_000)
4709
+ .max(3_600_000)
4710
+ .optional()
4711
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
4712
+ forceRefresh: z
4713
+ .boolean()
4714
+ .default(false)
4715
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
4716
+ }, {
4717
+ title: "Devin CLI request",
4718
+ readOnlyHint: false,
4719
+ destructiveHint: true,
4720
+ idempotentHint: false,
4721
+ openWorldHint: true,
4722
+ }, async ({ prompt, model, transport, permissionMode, promptFile, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
4723
+ return handleDevinRequest({ sessionManager, logger, runtime }, {
4724
+ prompt,
4725
+ model,
4726
+ transport,
4727
+ permissionMode,
4728
+ promptFile,
4729
+ sessionId,
4730
+ resumeLatest,
4731
+ createNewSession,
4732
+ correlationId,
4733
+ optimizePrompt,
4734
+ optimizeResponse,
4735
+ idleTimeoutMs,
4736
+ forceRefresh,
4737
+ });
4738
+ });
4183
4739
  server.tool("mistral_request", "Run a Mistral Vibe CLI request synchronously (when async jobs are enabled, auto-defers to a pollable job past the sync deadline; otherwise runs to completion). Requires exactly one of prompt or promptParts.", {
4184
4740
  prompt: z
4185
4741
  .string()
@@ -4192,6 +4748,10 @@ export function createGatewayServer(deps = {}) {
4192
4748
  .string()
4193
4749
  .optional()
4194
4750
  .describe("Model alias (e.g. mistral-medium-3.5, latest). Resolved alias is injected via VIBE_ACTIVE_MODEL env var; Vibe has no --model flag."),
4751
+ transport: z
4752
+ .enum(["cli", "acp"])
4753
+ .default("cli")
4754
+ .describe("Transport: 'cli' (default) runs the Vibe CLI; 'acp' routes through `vibe-acp` when [acp].enabled and the provider's runtime_enabled are set (fails closed otherwise)."),
4195
4755
  outputFormat: z
4196
4756
  .enum(["text", "plain", "json", "streaming", "stream-json"])
4197
4757
  .optional()
@@ -4206,9 +4766,9 @@ export function createGatewayServer(deps = {}) {
4206
4766
  .describe("Resume most recent Vibe session in cwd (--continue)"),
4207
4767
  createNewSession: z.boolean().default(false).describe("Force new session"),
4208
4768
  permissionMode: z
4209
- .enum(MISTRAL_AGENT_MODES)
4769
+ .string()
4210
4770
  .optional()
4211
- .describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
4771
+ .describe("Vibe --agent name. Builtins: default|plan|accept-edits|auto-approve; Vibe also accepts install-gated builtins (e.g. lean) and custom agents from ~/.vibe/agents, so any name is passed through. Defaults to auto-approve for programmatic use."),
4212
4772
  approvalStrategy: z
4213
4773
  .enum(["legacy", "mcp_managed"])
4214
4774
  .default("legacy")
@@ -4218,8 +4778,8 @@ export function createGatewayServer(deps = {}) {
4218
4778
  .optional()
4219
4779
  .describe("Approval policy override"),
4220
4780
  mcpServers: z
4221
- .array(MCP_SERVER_ENUM)
4222
- .default(["sqry"])
4781
+ .array(mcpServerEnum())
4782
+ .default([])
4223
4783
  .describe("MCP server names for approval tracking (Vibe manages its own MCP config via `vibe mcp`)"),
4224
4784
  allowedTools: z
4225
4785
  .array(z.string())
@@ -4267,11 +4827,12 @@ export function createGatewayServer(deps = {}) {
4267
4827
  destructiveHint: true,
4268
4828
  idempotentHint: false,
4269
4829
  openWorldHint: true,
4270
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, maxTokens, workingDir, addDir, workspace, worktree, }) => {
4830
+ }, async ({ prompt, promptParts, model, transport, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, maxTokens, workingDir, addDir, workspace, worktree, }) => {
4271
4831
  return handleMistralRequest({ sessionManager, logger, runtime }, {
4272
4832
  prompt,
4273
4833
  promptParts,
4274
4834
  model,
4835
+ transport,
4275
4836
  outputFormat,
4276
4837
  sessionId,
4277
4838
  resumeLatest,
@@ -4416,10 +4977,7 @@ export function createGatewayServer(deps = {}) {
4416
4977
  .enum(["strict", "balanced", "permissive"])
4417
4978
  .optional()
4418
4979
  .describe("Approval policy override"),
4419
- mcpServers: z
4420
- .array(MCP_SERVER_ENUM)
4421
- .default(["sqry"])
4422
- .describe("MCP servers exposed to Claude"),
4980
+ mcpServers: z.array(mcpServerEnum()).default([]).describe("MCP servers exposed to Claude"),
4423
4981
  strictMcpConfig: z
4424
4982
  .boolean()
4425
4983
  .default(false)
@@ -4486,7 +5044,7 @@ export function createGatewayServer(deps = {}) {
4486
5044
  try {
4487
5045
  let effectiveSessionId = sessionId;
4488
5046
  let useContinue = continueSession;
4489
- const activeSession = await sessionManager.getActiveSession("claude");
5047
+ const activeSession = await getCallerOwnedActiveSession(sessionManager, "claude");
4490
5048
  if (!createNewSession && !continueSession && !sessionId && activeSession) {
4491
5049
  effectiveSessionId = activeSession.id;
4492
5050
  useContinue = true;
@@ -4609,8 +5167,8 @@ export function createGatewayServer(deps = {}) {
4609
5167
  .optional()
4610
5168
  .describe("Approval policy override"),
4611
5169
  mcpServers: z
4612
- .array(MCP_SERVER_ENUM)
4613
- .default(["sqry"])
5170
+ .array(mcpServerEnum())
5171
+ .default([])
4614
5172
  .describe("MCP server names for approval tracking (Codex manages its own MCP config)"),
4615
5173
  sessionId: z
4616
5174
  .string()
@@ -4735,9 +5293,9 @@ export function createGatewayServer(deps = {}) {
4735
5293
  .optional()
4736
5294
  .describe("Approval policy override"),
4737
5295
  mcpServers: z
4738
- .array(MCP_SERVER_ENUM)
5296
+ .array(mcpServerEnum())
4739
5297
  .default([])
4740
- .describe("Unsupported for Antigravity CLI; non-empty values are rejected"),
5298
+ .describe("MCP server names accepted for approval tracking only; Antigravity manages its own MCP configuration."),
4741
5299
  allowedTools: z
4742
5300
  .array(z.string())
4743
5301
  .optional()
@@ -4856,8 +5414,8 @@ export function createGatewayServer(deps = {}) {
4856
5414
  .optional()
4857
5415
  .describe("Approval policy override"),
4858
5416
  mcpServers: z
4859
- .array(MCP_SERVER_ENUM)
4860
- .default(["sqry"])
5417
+ .array(mcpServerEnum())
5418
+ .default([])
4861
5419
  .describe("MCP server names for approval tracking (Grok manages its own MCP config via `grok mcp`)"),
4862
5420
  allowedTools: z
4863
5421
  .array(z.string())
@@ -5050,6 +5608,65 @@ export function createGatewayServer(deps = {}) {
5050
5608
  worktree,
5051
5609
  });
5052
5610
  });
5611
+ server.tool("devin_request_async", "Start a Cognition Devin CLI request as a durable background job. Poll with llm_job_status, collect with llm_job_result.", {
5612
+ prompt: z
5613
+ .string()
5614
+ .min(1, "Prompt cannot be empty")
5615
+ .max(100000, "Prompt too long (max 100k chars)")
5616
+ .optional()
5617
+ .describe("Prompt text for Devin CLI"),
5618
+ model: z.string().optional().describe("Model name or alias (e.g. opus, latest)"),
5619
+ permissionMode: z
5620
+ .enum(["normal", "dangerous", "bypass"])
5621
+ .optional()
5622
+ .describe("Devin CLI permission mode (--permission-mode)"),
5623
+ promptFile: z
5624
+ .string()
5625
+ .optional()
5626
+ .describe("Load the initial prompt from a file (--prompt-file)"),
5627
+ sessionId: z
5628
+ .string()
5629
+ .optional()
5630
+ .describe("Devin session ID to resume (--resume <id>; use resumeLatest for --continue)"),
5631
+ resumeLatest: z
5632
+ .boolean()
5633
+ .default(false)
5634
+ .describe("Resume the most recent Devin session in cwd (--continue)"),
5635
+ createNewSession: z.boolean().default(false).describe("Force a new session"),
5636
+ correlationId: z.string().optional().describe("Request trace ID (auto if omitted)"),
5637
+ optimizePrompt: z.boolean().default(false).describe("Optimize prompt before execution"),
5638
+ idleTimeoutMs: z
5639
+ .number()
5640
+ .int()
5641
+ .min(30_000)
5642
+ .max(3_600_000)
5643
+ .optional()
5644
+ .describe("Idle timeout in ms (min 30s, max 1h, omit=CLI default)"),
5645
+ forceRefresh: z
5646
+ .boolean()
5647
+ .default(false)
5648
+ .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
5649
+ }, {
5650
+ title: "Devin CLI request (async)",
5651
+ readOnlyHint: false,
5652
+ destructiveHint: true,
5653
+ idempotentHint: false,
5654
+ openWorldHint: true,
5655
+ }, async ({ prompt, model, permissionMode, promptFile, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
5656
+ return handleDevinRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
5657
+ prompt,
5658
+ model,
5659
+ permissionMode,
5660
+ promptFile,
5661
+ sessionId,
5662
+ resumeLatest,
5663
+ createNewSession,
5664
+ correlationId,
5665
+ optimizePrompt,
5666
+ idleTimeoutMs,
5667
+ forceRefresh,
5668
+ });
5669
+ });
5053
5670
  server.tool("mistral_request_async", "Start a Mistral Vibe CLI request as a durable background job. Poll with llm_job_status, collect with llm_job_result.", {
5054
5671
  prompt: z
5055
5672
  .string()
@@ -5076,9 +5693,9 @@ export function createGatewayServer(deps = {}) {
5076
5693
  .describe("Resume most recent Vibe session in cwd (--continue)"),
5077
5694
  createNewSession: z.boolean().default(false).describe("Force new session"),
5078
5695
  permissionMode: z
5079
- .enum(MISTRAL_AGENT_MODES)
5696
+ .string()
5080
5697
  .optional()
5081
- .describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
5698
+ .describe("Vibe --agent name. Builtins: default|plan|accept-edits|auto-approve; Vibe also accepts install-gated builtins (e.g. lean) and custom agents from ~/.vibe/agents, so any name is passed through. Defaults to auto-approve for programmatic use."),
5082
5699
  approvalStrategy: z
5083
5700
  .enum(["legacy", "mcp_managed"])
5084
5701
  .default("legacy")
@@ -5088,8 +5705,8 @@ export function createGatewayServer(deps = {}) {
5088
5705
  .optional()
5089
5706
  .describe("Approval policy override"),
5090
5707
  mcpServers: z
5091
- .array(MCP_SERVER_ENUM)
5092
- .default(["sqry"])
5708
+ .array(mcpServerEnum())
5709
+ .default([])
5093
5710
  .describe("MCP server names for approval tracking (Vibe manages its own MCP config via `vibe mcp`)"),
5094
5711
  allowedTools: z
5095
5712
  .array(z.string())
@@ -5418,6 +6035,11 @@ export function createGatewayServer(deps = {}) {
5418
6035
  defaultModel: null,
5419
6036
  mode: "disabled",
5420
6037
  },
6038
+ apiProviders: enabledApiProviders(providers).map(p => ({
6039
+ ...apiProviderCatalogEntry(p),
6040
+ baseUrl: p.baseUrl,
6041
+ breakerState: apiProviderBreakerState(p.name),
6042
+ })),
5421
6043
  sources: providers.sources,
5422
6044
  };
5423
6045
  return {