llm-cli-gateway 1.17.3 → 1.17.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +45 -0
- package/README.md +1 -1
- package/dist/approval-manager.js +0 -8
- package/dist/async-job-manager.d.ts +0 -113
- package/dist/async-job-manager.js +6 -124
- package/dist/cache-stats.d.ts +0 -89
- package/dist/cache-stats.js +0 -62
- package/dist/claude-mcp-config.js +0 -1
- package/dist/cli-updater.d.ts +0 -8
- package/dist/cli-updater.js +0 -12
- package/dist/codex-json-parser.d.ts +0 -20
- package/dist/codex-json-parser.js +0 -21
- package/dist/config.d.ts +0 -31
- package/dist/config.js +2 -72
- package/dist/db.d.ts +0 -18
- package/dist/db.js +0 -22
- package/dist/doctor.d.ts +0 -49
- package/dist/doctor.js +0 -47
- package/dist/endpoint-exposure.js +0 -1
- package/dist/executor.d.ts +0 -19
- package/dist/executor.js +3 -38
- package/dist/flight-recorder.d.ts +0 -26
- package/dist/flight-recorder.js +1 -70
- package/dist/gemini-json-parser.d.ts +0 -25
- package/dist/gemini-json-parser.js +0 -28
- package/dist/health.d.ts +0 -3
- package/dist/health.js +0 -3
- package/dist/index.d.ts +12 -208
- package/dist/index.js +116 -588
- package/dist/job-store.d.ts +0 -74
- package/dist/job-store.js +1 -73
- package/dist/logger.d.ts +0 -7
- package/dist/logger.js +0 -6
- package/dist/migrate-sessions.d.ts +0 -3
- package/dist/migrate-sessions.js +0 -16
- package/dist/migrate.js +1 -18
- package/dist/mistral-meta-json-parser.js +0 -67
- package/dist/model-registry.js +0 -13
- package/dist/pricing.d.ts +0 -46
- package/dist/pricing.js +0 -47
- package/dist/process-monitor.d.ts +0 -15
- package/dist/process-monitor.js +2 -31
- package/dist/prompt-parts.d.ts +6 -31
- package/dist/prompt-parts.js +0 -11
- package/dist/provider-status.d.ts +0 -8
- package/dist/provider-status.js +0 -11
- package/dist/request-helpers.d.ts +4 -316
- package/dist/request-helpers.js +13 -231
- package/dist/resources.d.ts +0 -20
- package/dist/resources.js +1 -34
- package/dist/retry.d.ts +0 -45
- package/dist/retry.js +3 -40
- package/dist/session-manager-pg.d.ts +0 -32
- package/dist/session-manager-pg.js +0 -32
- package/dist/session-manager.d.ts +0 -21
- package/dist/session-manager.js +1 -15
- package/dist/stream-json-parser.d.ts +0 -18
- package/dist/stream-json-parser.js +0 -22
- package/dist/upstream-contracts.d.ts +0 -55
- package/dist/upstream-contracts.js +86 -64
- package/dist/validation-orchestrator.js +0 -3
- package/dist/worktree-manager.d.ts +0 -9
- package/dist/worktree-manager.js +0 -21
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -35,7 +35,6 @@ import { printDoctorJson } from "./doctor.js";
|
|
|
35
35
|
import { registerValidationTools } from "./validation-tools.js";
|
|
36
36
|
import { assertUpstreamCliArgs, assertUpstreamCliEnv, buildUpstreamContractReport, } from "./upstream-contracts.js";
|
|
37
37
|
import { entrypointFileURL } from "./entrypoint-url.js";
|
|
38
|
-
// Simple logger that writes to stderr (stdout is used for MCP protocol)
|
|
39
38
|
const logger = {
|
|
40
39
|
info: (message, ...args) => {
|
|
41
40
|
console.error(`[INFO] ${new Date().toISOString()} - ${message}`, ...args);
|
|
@@ -94,10 +93,6 @@ function logOptimizationTokens(kind, correlationId, original, optimized) {
|
|
|
94
93
|
const reduction = originalTokens === 0 ? 0 : ((originalTokens - optimizedTokens) / originalTokens) * 100;
|
|
95
94
|
logger.info(`[${correlationId}] ${kind} tokens ${originalTokens} → ${optimizedTokens} (${reduction.toFixed(1)}% reduction)`);
|
|
96
95
|
}
|
|
97
|
-
// Sync-to-async deadline: if a sync tool's CLI call hasn't finished within this
|
|
98
|
-
// window, the tool returns a deferred async job reference instead of blocking
|
|
99
|
-
// until the MCP client's tool-call timeout fires (~60s in many runtimes).
|
|
100
|
-
// Configurable via SYNC_DEADLINE_MS env var. Set to 0 to disable (pure sync).
|
|
101
96
|
const SYNC_DEADLINE_MS = (() => {
|
|
102
97
|
const env = process.env.SYNC_DEADLINE_MS;
|
|
103
98
|
if (env !== undefined) {
|
|
@@ -105,11 +100,8 @@ const SYNC_DEADLINE_MS = (() => {
|
|
|
105
100
|
if (Number.isFinite(parsed) && parsed >= 0)
|
|
106
101
|
return parsed;
|
|
107
102
|
}
|
|
108
|
-
return 45_000;
|
|
103
|
+
return 45_000;
|
|
109
104
|
})();
|
|
110
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
111
|
-
// Skills loader — reads .agents/skills/*/SKILL.md at startup
|
|
112
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
113
105
|
const __filename = fileURLToPath(import.meta.url);
|
|
114
106
|
const __dirname = dirname(__filename);
|
|
115
107
|
const SKILLS_DIR = join(__dirname, "..", ".agents", "skills");
|
|
@@ -124,7 +116,6 @@ function packageVersion() {
|
|
|
124
116
|
return parsed.version || "unknown";
|
|
125
117
|
}
|
|
126
118
|
catch {
|
|
127
|
-
// Try next candidate.
|
|
128
119
|
}
|
|
129
120
|
}
|
|
130
121
|
return "unknown";
|
|
@@ -137,24 +128,19 @@ function loadSkills() {
|
|
|
137
128
|
const skillPath = join(SKILLS_DIR, dir.name, "SKILL.md");
|
|
138
129
|
try {
|
|
139
130
|
const content = readFileSync(skillPath, "utf-8");
|
|
140
|
-
// Extract description from YAML frontmatter
|
|
141
131
|
const descMatch = content.match(/^---[\s\S]*?description:\s*(.+?)$/m);
|
|
142
132
|
const description = descMatch?.[1]?.trim() || dir.name;
|
|
143
133
|
skills.push({ name: dir.name, content, description });
|
|
144
134
|
}
|
|
145
135
|
catch {
|
|
146
|
-
// Skill file missing or unreadable — skip silently
|
|
147
136
|
}
|
|
148
137
|
}
|
|
149
138
|
}
|
|
150
139
|
catch {
|
|
151
|
-
// Skills directory missing — not fatal
|
|
152
140
|
}
|
|
153
141
|
return skills;
|
|
154
142
|
}
|
|
155
143
|
const loadedSkills = loadSkills();
|
|
156
|
-
// L1: Compact server instructions (~200 tokens) — injected into every client's
|
|
157
|
-
// system prompt at connection time. Covers key patterns + pointers to L2 resources.
|
|
158
144
|
const SERVER_INSTRUCTIONS = `llm-cli-gateway: Multi-LLM orchestration via MCP.
|
|
159
145
|
|
|
160
146
|
Tools: claude_request, codex_request, gemini_request, grok_request, mistral_request (sync) | *_request_async (async)
|
|
@@ -175,17 +161,11 @@ ${loadedSkills.map(s => `- skills://${s.name} — ${s.description}`).join("\n")}
|
|
|
175
161
|
function newGatewayMcpServer() {
|
|
176
162
|
return new McpServer({ name: "llm-cli-gateway", version: "1.0.0" }, { instructions: SERVER_INSTRUCTIONS });
|
|
177
163
|
}
|
|
178
|
-
// Global state (initialized asynchronously)
|
|
179
164
|
let sessionManager;
|
|
180
165
|
let db = null;
|
|
181
166
|
const performanceMetrics = new PerformanceMetrics();
|
|
182
167
|
let resourceProvider;
|
|
183
168
|
let flightRecorder = null;
|
|
184
|
-
// Resolved persistence config — single source of truth for the async-job backend.
|
|
185
|
-
// Driven by ~/.llm-cli-gateway/config.toml (+ deprecated env-var overrides).
|
|
186
|
-
// When backend = "none", the JobStore is null AND *_request_async tools are not
|
|
187
|
-
// registered (see createGatewayServer), making silent in-memory loss
|
|
188
|
-
// structurally impossible.
|
|
189
169
|
let persistenceConfig = null;
|
|
190
170
|
let cacheAwarenessConfig = null;
|
|
191
171
|
let jobStore = null;
|
|
@@ -231,47 +211,9 @@ function getApprovalManager(runtimeLogger = logger) {
|
|
|
231
211
|
return approvalManager;
|
|
232
212
|
}
|
|
233
213
|
const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
|
|
234
|
-
/**
|
|
235
|
-
* Phase 4 slice δ — shared Zod fragments for `maxTurns` / `maxPrice`.
|
|
236
|
-
*
|
|
237
|
-
* Both flags reach the upstream CLIs as decimal-formatted argv strings via
|
|
238
|
-
* `String(N)`. `z.number().int().positive()` alone lets values past
|
|
239
|
-
* `Number.MAX_SAFE_INTEGER` through, after which `String(1e21)` emits
|
|
240
|
-
* scientific notation that Grok and Vibe both reject. The bounds below
|
|
241
|
-
* (safe-integer cap + 10000 ceiling for turns; finite + 10000 USD ceiling
|
|
242
|
-
* for price) guarantee a lossless decimal stringification AND a sane
|
|
243
|
-
* upper bound — no plausible single agent loop exceeds 10k turns or 10k USD.
|
|
244
|
-
*/
|
|
245
214
|
export const MAX_TURNS_SCHEMA = z.number().int().positive().safe().max(10_000);
|
|
246
|
-
// Token budgets can legitimately exceed the agent-turn cap by orders of
|
|
247
|
-
// magnitude. Keep a finite operational guardrail while avoiding the 10k turn
|
|
248
|
-
// ceiling that would make large-context Vibe sessions unusable.
|
|
249
215
|
export const MAX_TOKENS_SCHEMA = z.number().int().positive().safe().max(100_000_000);
|
|
250
|
-
// `.min(1e-6)` keeps the value in JS's decimal-stringify range:
|
|
251
|
-
// String(1e-6) === "0.000001" but String(1e-7) === "1e-7", which both
|
|
252
|
-
// upstream CLIs would reject. 1µUSD per request is fine-grained enough
|
|
253
|
-
// for any plausible budget-cap use.
|
|
254
216
|
export const MAX_PRICE_SCHEMA = z.number().positive().finite().min(1e-6).max(10_000);
|
|
255
|
-
/**
|
|
256
|
-
* Slice λ: shared worktree directive for all 10 `*_request` / `*_request_async`
|
|
257
|
-
* tools. `true` creates a fresh worktree under `<repoRoot>/.worktrees/<uuid>`
|
|
258
|
-
* branched from HEAD. `{ name?, ref? }` lets the caller supply a sanitized
|
|
259
|
-
* name and/or git ref (default ref: HEAD).
|
|
260
|
-
*
|
|
261
|
-
* Lifecycle is gateway-owned: the gateway pre-creates the worktree via
|
|
262
|
-
* `git worktree add`, then spawns the child CLI with `cwd: <worktree-path>`.
|
|
263
|
-
* No `-w` / `--worktree` flag is ever emitted to the underlying CLI. When
|
|
264
|
-
* the request carries a sessionId and the session already has a worktree,
|
|
265
|
-
* that worktree is reused. On session_delete or TTL eviction the gateway
|
|
266
|
-
* runs `git worktree remove --force`.
|
|
267
|
-
*
|
|
268
|
-
* Tool response: when a worktree was used, the successful response stdout
|
|
269
|
-
* is prefixed with `[gateway] worktree=<absolute-path>\n` so callers can
|
|
270
|
-
* parse/use the path without a schema change (slice λ §1.d).
|
|
271
|
-
*
|
|
272
|
-
* NOTE: callers should `.gitignore` the `.worktrees/` directory in their
|
|
273
|
-
* repo (the gateway does NOT auto-gitignore — see slice λ spec Q4).
|
|
274
|
-
*/
|
|
275
217
|
export const WORKTREE_SCHEMA = z
|
|
276
218
|
.union([
|
|
277
219
|
z.boolean(),
|
|
@@ -296,9 +238,6 @@ export const WORKTREE_SCHEMA = z
|
|
|
296
238
|
"path. NOTE: callers should `.gitignore` the `.worktrees/` " +
|
|
297
239
|
"directory in their repo (the gateway does NOT auto-gitignore — " +
|
|
298
240
|
"see slice λ spec Q4).");
|
|
299
|
-
// U22: Session-provider enum extended to five providers. The storage layer's
|
|
300
|
-
// CLI_TYPES already includes "mistral"; the MCP-tool layer mirrors that here so
|
|
301
|
-
// session_create / session_list / session_clear_all accept the fifth provider.
|
|
302
241
|
export const SESSION_PROVIDER_VALUES = ["claude", "codex", "gemini", "grok", "mistral"];
|
|
303
242
|
export const SESSION_PROVIDER_ENUM = z.enum(SESSION_PROVIDER_VALUES);
|
|
304
243
|
let activeServer = null;
|
|
@@ -308,13 +247,10 @@ export function resolveGatewayServerRuntime(deps = {}, options = {}) {
|
|
|
308
247
|
const runtimeSessionManager = deps.sessionManager ?? sessionManager;
|
|
309
248
|
const runtimePerformanceMetrics = deps.performanceMetrics ??
|
|
310
249
|
(options.isolateState ? new PerformanceMetrics() : performanceMetrics);
|
|
311
|
-
// Resolve flight recorder BEFORE async manager so isolateState managers
|
|
312
|
-
// can be wired with the same recorder instance the runtime exposes.
|
|
313
250
|
const runtimeFlightRecorder = deps.flightRecorder ?? getFlightRecorder(runtimeLogger);
|
|
314
251
|
const runtimeAsyncJobManager = deps.asyncJobManager ??
|
|
315
252
|
(options.isolateState
|
|
316
|
-
?
|
|
317
|
-
// durable jobs orphaned. Stdio startup injects the process-global manager.
|
|
253
|
+
?
|
|
318
254
|
newAsyncJobManager(runtimePerformanceMetrics, runtimeLogger, null, runtimeFlightRecorder)
|
|
319
255
|
: getAsyncJobManager(runtimeLogger));
|
|
320
256
|
const runtimeApprovalManager = deps.approvalManager ??
|
|
@@ -337,15 +273,12 @@ export function resolveGatewayServerRuntime(deps = {}, options = {}) {
|
|
|
337
273
|
cacheAwareness: deps.cacheAwareness ?? getCacheAwarenessConfig(runtimeLogger),
|
|
338
274
|
};
|
|
339
275
|
}
|
|
340
|
-
// Per-CLI idle timeouts: kill process if no stdout/stderr activity for this duration.
|
|
341
|
-
// Claude idle timeout only applies in stream-json mode (with --include-partial-messages).
|
|
342
|
-
// In text/json mode, Claude produces no output until done, so idle timeout would false-positive.
|
|
343
276
|
const CLI_IDLE_TIMEOUTS = {
|
|
344
|
-
claude: 600_000,
|
|
345
|
-
codex: 600_000,
|
|
346
|
-
gemini: 600_000,
|
|
347
|
-
grok: 600_000,
|
|
348
|
-
mistral: 600_000,
|
|
277
|
+
claude: 600_000,
|
|
278
|
+
codex: 600_000,
|
|
279
|
+
gemini: 600_000,
|
|
280
|
+
grok: 600_000,
|
|
281
|
+
mistral: 600_000,
|
|
349
282
|
};
|
|
350
283
|
function resolveIdleTimeout(cli, override) {
|
|
351
284
|
if (override !== undefined)
|
|
@@ -353,41 +286,7 @@ function resolveIdleTimeout(cli, override) {
|
|
|
353
286
|
return CLI_IDLE_TIMEOUTS[cli];
|
|
354
287
|
}
|
|
355
288
|
const SYNC_POLL_INTERVAL_MS = 1_000;
|
|
356
|
-
|
|
357
|
-
* Start an async job and poll until completion or deadline.
|
|
358
|
-
* Returns the job result if it finishes in time, or a deferral marker.
|
|
359
|
-
*/
|
|
360
|
-
async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete,
|
|
361
|
-
/**
|
|
362
|
-
* Slice 1.5: when the sync handler has already written a logStart row
|
|
363
|
-
* keyed on `corrId`, pass these so the manager can write logComplete
|
|
364
|
-
* (with usage extraction) when the underlying async job terminates —
|
|
365
|
-
* even if the sync handler returned a deferred response.
|
|
366
|
-
* `writeFlightStart` is NEVER true on this path: the sync handler is
|
|
367
|
-
* always the upstream logStart writer.
|
|
368
|
-
*/
|
|
369
|
-
flightRecorderEntry, extractUsage,
|
|
370
|
-
/**
|
|
371
|
-
* Slice κ: optional stdin payload piped to the child CLI. Currently
|
|
372
|
-
* only Claude's `--input-format stream-json` path sets this. Threaded
|
|
373
|
-
* through both the direct-execute fallback (SYNC_DEADLINE_MS===0) and
|
|
374
|
-
* the AsyncJobManager spawn path, and participates in the dedup key.
|
|
375
|
-
*/
|
|
376
|
-
stdin,
|
|
377
|
-
/**
|
|
378
|
-
* Slice λ: optional working directory for the spawned child process,
|
|
379
|
-
* derived from a gateway-owned git worktree. Threaded to both the
|
|
380
|
-
* direct-execute fallback (`executeCli({ cwd })`) and the
|
|
381
|
-
* AsyncJobManager dedup-aware spawn path
|
|
382
|
-
* (`startJobWithDedup({ cwd })`). `cwd` also participates in the
|
|
383
|
-
* dedup key (see async-job-manager.buildRequestKey) so two requests
|
|
384
|
-
* with identical argv in different worktrees do not collide.
|
|
385
|
-
*/
|
|
386
|
-
cwd) {
|
|
387
|
-
// U26 fix: ownership of onComplete is a contract. Once this function returns
|
|
388
|
-
// OR throws, the caller MUST consider onComplete consumed — i.e. it has
|
|
389
|
-
// either been run, or the AsyncJobManager has taken ownership of it. The
|
|
390
|
-
// caller never needs to reclaim.
|
|
289
|
+
async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete, flightRecorderEntry, extractUsage, stdin, cwd) {
|
|
391
290
|
let onCompleteOwnedByCaller = onComplete !== undefined;
|
|
392
291
|
const consumeOnComplete = () => {
|
|
393
292
|
if (!onCompleteOwnedByCaller || !onComplete)
|
|
@@ -409,8 +308,6 @@ cwd) {
|
|
|
409
308
|
throw err;
|
|
410
309
|
}
|
|
411
310
|
if (SYNC_DEADLINE_MS === 0) {
|
|
412
|
-
// Disabled — fall through to direct execution.
|
|
413
|
-
// Note: direct execution bypasses dedup. forceRefresh is implied.
|
|
414
311
|
const command = cli === "mistral" ? "vibe" : cli;
|
|
415
312
|
try {
|
|
416
313
|
return await executeCli(command, args, {
|
|
@@ -422,8 +319,6 @@ cwd) {
|
|
|
422
319
|
});
|
|
423
320
|
}
|
|
424
321
|
finally {
|
|
425
|
-
// Direct-execution path completes inline; release per-request resources
|
|
426
|
-
// (e.g. outputSchema temp files) here.
|
|
427
322
|
consumeOnComplete();
|
|
428
323
|
}
|
|
429
324
|
}
|
|
@@ -437,22 +332,12 @@ cwd) {
|
|
|
437
332
|
env,
|
|
438
333
|
stdin,
|
|
439
334
|
onComplete,
|
|
440
|
-
// Sync-deferred path: the upstream sync handler already wrote
|
|
441
|
-
// logStart for this corrId, so writeFlightStart stays false. The
|
|
442
|
-
// manager still writes logComplete on terminal state (which UPDATEs
|
|
443
|
-
// the sync handler's row), closing the previously-orphaned
|
|
444
|
-
// sync-deferred case.
|
|
445
335
|
flightRecorderEntry,
|
|
446
336
|
extractUsage,
|
|
447
337
|
});
|
|
448
|
-
// Handoff succeeded: AsyncJobManager owns onComplete (it'll fire via
|
|
449
|
-
// fireOnComplete on terminal status, or run inline immediately for dedup).
|
|
450
338
|
onCompleteOwnedByCaller = false;
|
|
451
339
|
}
|
|
452
340
|
catch (err) {
|
|
453
|
-
// Spawn or pre-spawn failure inside AsyncJobManager. The record was never
|
|
454
|
-
// registered, so onComplete will never be called by the manager. Reclaim
|
|
455
|
-
// here so the temp file is not leaked.
|
|
456
341
|
consumeOnComplete();
|
|
457
342
|
throw err;
|
|
458
343
|
}
|
|
@@ -464,7 +349,6 @@ cwd) {
|
|
|
464
349
|
while (Date.now() < deadline) {
|
|
465
350
|
const snapshot = runtime.asyncJobManager.getJobSnapshot(job.id);
|
|
466
351
|
if (snapshot && snapshot.status !== "running") {
|
|
467
|
-
// Job finished within deadline — extract result
|
|
468
352
|
const result = runtime.asyncJobManager.getJobResult(job.id);
|
|
469
353
|
if (!result) {
|
|
470
354
|
return { stdout: "", stderr: "Job result unavailable", code: 1 };
|
|
@@ -477,13 +361,6 @@ cwd) {
|
|
|
477
361
|
}
|
|
478
362
|
await new Promise(resolve => setTimeout(resolve, SYNC_POLL_INTERVAL_MS));
|
|
479
363
|
}
|
|
480
|
-
// Deadline exceeded — return deferral.
|
|
481
|
-
// R2 Codex-Unit-B F1: hand FR-complete ownership to the manager. Until
|
|
482
|
-
// this call, the manager skips writeFlightComplete on terminal so the
|
|
483
|
-
// sync handler's safeFlightComplete (with rich approvalDecision /
|
|
484
|
-
// optimizationApplied metadata) wins for sync-inline completions. From
|
|
485
|
-
// here on the sync handler returns deferred and will NOT write
|
|
486
|
-
// safeFlightComplete, so the manager must.
|
|
487
364
|
runtime.asyncJobManager.armFlightCompleteForDeferral(job.id);
|
|
488
365
|
runtime.logger.info(`[${corrId}] ${cli} sync deadline exceeded (${SYNC_DEADLINE_MS}ms), deferring to async job ${job.id}`);
|
|
489
366
|
return {
|
|
@@ -517,27 +394,6 @@ function buildDeferredToolResponse(deferred, sessionId) {
|
|
|
517
394
|
],
|
|
518
395
|
};
|
|
519
396
|
}
|
|
520
|
-
/**
|
|
521
|
-
* Slice λ: resolve a request's worktree directive into a spawn cwd.
|
|
522
|
-
*
|
|
523
|
-
* - `worktreeOpt` is the Zod-validated input value (boolean |
|
|
524
|
-
* `{ name?, ref? }` | undefined).
|
|
525
|
-
* - When the request has a session AND the session already has a
|
|
526
|
-
* `metadata.worktreePath`, that path is reused (resume semantics).
|
|
527
|
-
* The reused path is returned without touching git; if the directory
|
|
528
|
-
* was externally removed between requests, the next CLI invocation
|
|
529
|
-
* will surface the error naturally.
|
|
530
|
-
* - When no reusable worktree exists, `createWorktree` runs; on success
|
|
531
|
-
* the new path is written to `session.metadata` (only when a session
|
|
532
|
-
* exists — request-scoped worktrees do NOT persist).
|
|
533
|
-
* - Returns `{}` when `worktreeOpt` is undefined/false (preserves
|
|
534
|
-
* pre-λ behaviour at non-worktree call sites).
|
|
535
|
-
* - Errors propagate as `WorktreeError`/`Error`; the caller wraps them
|
|
536
|
-
* in a `createErrorResponse` envelope. Do NOT swallow.
|
|
537
|
-
*
|
|
538
|
-
* Spec: docs/plans/slice-lambda.spec.md §"Implementation surface to
|
|
539
|
-
* verify" §5.
|
|
540
|
-
*/
|
|
541
397
|
export async function resolveWorktreeForRequest(worktreeOpt, sessionId, runtime) {
|
|
542
398
|
if (!worktreeOpt)
|
|
543
399
|
return {};
|
|
@@ -566,30 +422,13 @@ export async function resolveWorktreeForRequest(worktreeOpt, sessionId, runtime)
|
|
|
566
422
|
}
|
|
567
423
|
return { cwd: handle.path, worktreePath: handle.path };
|
|
568
424
|
}
|
|
569
|
-
/**
|
|
570
|
-
* Slice λ §1.d: response-envelope shape decision for `worktreePath`.
|
|
571
|
-
*
|
|
572
|
-
* We surface the worktree path inline as a stdout prefix
|
|
573
|
-
* (`[gateway] worktree=<absolute-path>\n`) rather than as a
|
|
574
|
-
* structuredContent field or JSON wrapper. Rationale:
|
|
575
|
-
* - zero schema change across all 10 tools and their downstream parsers
|
|
576
|
-
* - matches how other slice features (session warnings, cache_state
|
|
577
|
-
* aggregates) surface side-channel metadata today
|
|
578
|
-
* - callers that want the path can split on the first newline; callers
|
|
579
|
-
* that don't care see a single ignorable header line
|
|
580
|
-
*
|
|
581
|
-
* Use `formatWorktreePrefix(resolution.worktreePath)` once per tool, at
|
|
582
|
-
* the moment a successful response is constructed.
|
|
583
|
-
*/
|
|
584
425
|
export function formatWorktreePrefix(worktreePath) {
|
|
585
426
|
return worktreePath ? `[gateway] worktree=${worktreePath}\n` : "";
|
|
586
427
|
}
|
|
587
|
-
// Helper function for standardized error responses
|
|
588
428
|
function createErrorResponse(cli, code, stderr, correlationId, error) {
|
|
589
429
|
let errorMessage = `Error executing ${cli} CLI`;
|
|
590
430
|
const isLaunchExit = code === 127 || code === -4058;
|
|
591
431
|
if (error) {
|
|
592
|
-
// Command not found or spawn error
|
|
593
432
|
errorMessage += `:\n${error.message}`;
|
|
594
433
|
if (error.message.includes("ENOENT")) {
|
|
595
434
|
errorMessage += `\n\nThe '${cli}' command was not found. Please ensure ${cli} CLI is installed and in your PATH.`;
|
|
@@ -597,12 +436,10 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
|
|
|
597
436
|
logger.error(`[${correlationId || "unknown"}] ${cli} CLI execution failed:`, error.message);
|
|
598
437
|
}
|
|
599
438
|
else if (code === 124) {
|
|
600
|
-
// Wall-clock timeout
|
|
601
439
|
errorMessage += `: Command timed out\n${stderr}`;
|
|
602
440
|
logger.error(`[${correlationId || "unknown"}] ${cli} CLI timed out`);
|
|
603
441
|
}
|
|
604
442
|
else if (code === 125) {
|
|
605
|
-
// Idle timeout (stuck process)
|
|
606
443
|
errorMessage += `: Process killed due to inactivity\n${stderr}`;
|
|
607
444
|
logger.error(`[${correlationId || "unknown"}] ${cli} CLI killed due to inactivity`);
|
|
608
445
|
}
|
|
@@ -611,7 +448,6 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
|
|
|
611
448
|
logger.error(`[${correlationId || "unknown"}] ${cli} CLI failed to launch`);
|
|
612
449
|
}
|
|
613
450
|
else if (code !== 0) {
|
|
614
|
-
// Other non-zero exit code
|
|
615
451
|
errorMessage += ` (exit code ${code}):\n${stderr}`;
|
|
616
452
|
logger.error(`[${correlationId || "unknown"}] ${cli} CLI failed with exit code ${code}`);
|
|
617
453
|
}
|
|
@@ -634,14 +470,7 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
|
|
|
634
470
|
},
|
|
635
471
|
};
|
|
636
472
|
}
|
|
637
|
-
export function extractUsageAndCost(cli, output, outputFormat,
|
|
638
|
-
/**
|
|
639
|
-
* Optional context for off-stdout telemetry sources. Today only Mistral
|
|
640
|
-
* uses this — its meta.json lives on disk keyed by sessionId. Threading
|
|
641
|
-
* this in keeps the closure built by `buildAsyncFlightRecorderHandoff`
|
|
642
|
-
* primitives-only (no `params`/`prep` retention on AsyncJobRecord).
|
|
643
|
-
*/
|
|
644
|
-
ctx) {
|
|
473
|
+
export function extractUsageAndCost(cli, output, outputFormat, ctx) {
|
|
645
474
|
if (cli === "claude" && outputFormat === "stream-json") {
|
|
646
475
|
const parsed = parseStreamJson(output);
|
|
647
476
|
if (!parsed.usage) {
|
|
@@ -679,29 +508,12 @@ ctx) {
|
|
|
679
508
|
cacheReadTokens: parsed.usage.cache_read_tokens,
|
|
680
509
|
};
|
|
681
510
|
}
|
|
682
|
-
// Mistral/Vibe: usage/cost live on disk in `~/.vibe/logs/session/<id>/meta.json`
|
|
683
|
-
// (Phase 4 slice β). Best-effort: if we don't know the sessionId (fresh
|
|
684
|
-
// session whose Vibe-assigned UUID we never observed) or the file is
|
|
685
|
-
// missing/malformed, the parser returns `{}` and the FR row simply lacks
|
|
686
|
-
// usage data — matching pre-slice behaviour. No stdout fallback exists.
|
|
687
511
|
if (cli === "mistral") {
|
|
688
512
|
return parseVibeMetaJson(ctx?.home ?? homedir(), ctx?.sessionId);
|
|
689
513
|
}
|
|
690
514
|
return {};
|
|
691
515
|
}
|
|
692
|
-
/**
|
|
693
|
-
* Slice 1.5: build the async-job-manager's FR payload from a prep object
|
|
694
|
-
* (which every prepare*Request returns), plus the bound CLI and output
|
|
695
|
-
* format primitives needed by extractUsageAndCost. Returning the closure
|
|
696
|
-
* separately means it captures `cliName` and `fmt` ONLY — never `params`
|
|
697
|
-
* or `prep` — so retention on AsyncJobRecord is O(constant).
|
|
698
|
-
*/
|
|
699
516
|
function buildAsyncFlightRecorderHandoff(cliName, prep, sessionId, outputFormat) {
|
|
700
|
-
// Extract primitives BEFORE building the closure — capturing `prep` or
|
|
701
|
-
// `params` directly would pin large attachments / promptParts on the
|
|
702
|
-
// AsyncJobRecord for JOB_TTL_MS. Phase 4 slice β: `sid` and `home` are
|
|
703
|
-
// primitives too, threaded through so the Mistral branch of
|
|
704
|
-
// extractUsageAndCost can read `~/.vibe/logs/session/<id>/meta.json`.
|
|
705
517
|
const cli = cliName;
|
|
706
518
|
const fmt = outputFormat;
|
|
707
519
|
const sid = sessionId;
|
|
@@ -795,11 +607,7 @@ function resolveClaudeMcpConfig(operation, correlationId, requestedMcpServers, s
|
|
|
795
607
|
}
|
|
796
608
|
return { config: mcpConfig };
|
|
797
609
|
}
|
|
798
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
799
|
-
// MCP Resources
|
|
800
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
801
610
|
function registerBaseResources(server, runtime) {
|
|
802
|
-
// Register skill resources (L2: full docs, read on demand)
|
|
803
611
|
for (const skill of loadedSkills) {
|
|
804
612
|
server.registerResource(`skill-${skill.name}`, `skills://${skill.name}`, {
|
|
805
613
|
title: skill.name,
|
|
@@ -816,7 +624,6 @@ function registerBaseResources(server, runtime) {
|
|
|
816
624
|
}));
|
|
817
625
|
}
|
|
818
626
|
runtime.logger.info(`Registered ${loadedSkills.length} skill resources`);
|
|
819
|
-
// Register all sessions resource
|
|
820
627
|
server.registerResource("all-sessions", "sessions://all", {
|
|
821
628
|
title: "📋 All Sessions",
|
|
822
629
|
description: "All conversation sessions across CLIs",
|
|
@@ -826,7 +633,6 @@ function registerBaseResources(server, runtime) {
|
|
|
826
633
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
827
634
|
return { contents: contents ? [contents] : [] };
|
|
828
635
|
});
|
|
829
|
-
// Register Claude sessions resource
|
|
830
636
|
server.registerResource("claude-sessions", "sessions://claude", {
|
|
831
637
|
title: "🤖 Claude Sessions",
|
|
832
638
|
description: "Claude conversation sessions",
|
|
@@ -836,7 +642,6 @@ function registerBaseResources(server, runtime) {
|
|
|
836
642
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
837
643
|
return { contents: contents ? [contents] : [] };
|
|
838
644
|
});
|
|
839
|
-
// Register Codex sessions resource
|
|
840
645
|
server.registerResource("codex-sessions", "sessions://codex", {
|
|
841
646
|
title: "💻 Codex Sessions",
|
|
842
647
|
description: "Codex conversation sessions",
|
|
@@ -846,7 +651,6 @@ function registerBaseResources(server, runtime) {
|
|
|
846
651
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
847
652
|
return { contents: contents ? [contents] : [] };
|
|
848
653
|
});
|
|
849
|
-
// Register Gemini sessions resource
|
|
850
654
|
server.registerResource("gemini-sessions", "sessions://gemini", {
|
|
851
655
|
title: "✨ Gemini Sessions",
|
|
852
656
|
description: "Gemini conversation sessions",
|
|
@@ -856,7 +660,6 @@ function registerBaseResources(server, runtime) {
|
|
|
856
660
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
857
661
|
return { contents: contents ? [contents] : [] };
|
|
858
662
|
});
|
|
859
|
-
// Register Grok sessions resource
|
|
860
663
|
server.registerResource("grok-sessions", "sessions://grok", {
|
|
861
664
|
title: "⚡ Grok Sessions",
|
|
862
665
|
description: "Grok conversation sessions",
|
|
@@ -866,7 +669,6 @@ function registerBaseResources(server, runtime) {
|
|
|
866
669
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
867
670
|
return { contents: contents ? [contents] : [] };
|
|
868
671
|
});
|
|
869
|
-
// Register Mistral sessions resource
|
|
870
672
|
server.registerResource("mistral-sessions", "sessions://mistral", {
|
|
871
673
|
title: "🌬 Mistral Sessions",
|
|
872
674
|
description: "Mistral Vibe conversation sessions",
|
|
@@ -876,7 +678,6 @@ function registerBaseResources(server, runtime) {
|
|
|
876
678
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
877
679
|
return { contents: contents ? [contents] : [] };
|
|
878
680
|
});
|
|
879
|
-
// Register Claude models resource
|
|
880
681
|
server.registerResource("claude-models", "models://claude", {
|
|
881
682
|
title: "🧠 Claude Models",
|
|
882
683
|
description: "Claude models and capabilities",
|
|
@@ -886,7 +687,6 @@ function registerBaseResources(server, runtime) {
|
|
|
886
687
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
887
688
|
return { contents: contents ? [contents] : [] };
|
|
888
689
|
});
|
|
889
|
-
// Register Codex models resource
|
|
890
690
|
server.registerResource("codex-models", "models://codex", {
|
|
891
691
|
title: "🔧 Codex Models",
|
|
892
692
|
description: "Codex models and capabilities",
|
|
@@ -896,7 +696,6 @@ function registerBaseResources(server, runtime) {
|
|
|
896
696
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
897
697
|
return { contents: contents ? [contents] : [] };
|
|
898
698
|
});
|
|
899
|
-
// Register Gemini models resource
|
|
900
699
|
server.registerResource("gemini-models", "models://gemini", {
|
|
901
700
|
title: "🌟 Gemini Models",
|
|
902
701
|
description: "Gemini models and capabilities",
|
|
@@ -906,7 +705,6 @@ function registerBaseResources(server, runtime) {
|
|
|
906
705
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
907
706
|
return { contents: contents ? [contents] : [] };
|
|
908
707
|
});
|
|
909
|
-
// Register Grok models resource
|
|
910
708
|
server.registerResource("grok-models", "models://grok", {
|
|
911
709
|
title: "⚡ Grok Models",
|
|
912
710
|
description: "Grok models and capabilities",
|
|
@@ -916,7 +714,6 @@ function registerBaseResources(server, runtime) {
|
|
|
916
714
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
917
715
|
return { contents: contents ? [contents] : [] };
|
|
918
716
|
});
|
|
919
|
-
// Register Mistral models resource
|
|
920
717
|
server.registerResource("mistral-models", "models://mistral", {
|
|
921
718
|
title: "🌬 Mistral Models",
|
|
922
719
|
description: "Mistral Vibe models and capabilities",
|
|
@@ -926,7 +723,6 @@ function registerBaseResources(server, runtime) {
|
|
|
926
723
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
927
724
|
return { contents: contents ? [contents] : [] };
|
|
928
725
|
});
|
|
929
|
-
// Register performance metrics resource
|
|
930
726
|
server.registerResource("performance-metrics", "metrics://performance", {
|
|
931
727
|
title: "📈 Performance Metrics",
|
|
932
728
|
description: "Request counts, latency, success/failure rates",
|
|
@@ -936,11 +732,6 @@ function registerBaseResources(server, runtime) {
|
|
|
936
732
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
937
733
|
return { contents: contents ? [contents] : [] };
|
|
938
734
|
});
|
|
939
|
-
// Cache-state resources (slice 2). Static URI for global, templated for
|
|
940
|
-
// session/{id} and prefix/{hash}. All three return tokens/hashes/aggregates
|
|
941
|
-
// ONLY — never raw prompt or response text. The structural guarantee is in
|
|
942
|
-
// the SessionCacheStats / PrefixCacheStats / GlobalCacheStats types
|
|
943
|
-
// themselves: those shapes have no prompt/response/system/task fields.
|
|
944
735
|
server.registerResource("cache-state-global", "cache_state://global", {
|
|
945
736
|
title: "💾 Cache State (Global)",
|
|
946
737
|
description: "Aggregate cache hit/miss/savings across all CLIs in the flight recorder. Tokens/hashes only — no prompt text.",
|
|
@@ -999,11 +790,6 @@ function registerBaseResources(server, runtime) {
|
|
|
999
790
|
};
|
|
1000
791
|
});
|
|
1001
792
|
}
|
|
1002
|
-
/**
|
|
1003
|
-
* Slice 1: validate the prompt / promptParts mutex at the prep boundary and
|
|
1004
|
-
* return either an error response or the resolved input. The exact error
|
|
1005
|
-
* messages are part of the public contract — tests assert them verbatim.
|
|
1006
|
-
*/
|
|
1007
793
|
function resolvePromptOrPartsForPrep(args) {
|
|
1008
794
|
const hasPrompt = typeof args.prompt === "string" && args.prompt.length > 0;
|
|
1009
795
|
const hasParts = args.promptParts !== undefined;
|
|
@@ -1045,7 +831,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1045
831
|
const assembledPrompt = inputResolution.assembledPrompt;
|
|
1046
832
|
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
1047
833
|
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
1048
|
-
// Review integrity check on raw prompt (before optimization)
|
|
1049
834
|
const reviewIntegrity = checkReviewIntegrity({
|
|
1050
835
|
prompt: assembledPrompt,
|
|
1051
836
|
allowedTools: params.allowedTools,
|
|
@@ -1058,13 +843,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1058
843
|
score: reviewIntegrity.totalScore,
|
|
1059
844
|
});
|
|
1060
845
|
}
|
|
1061
|
-
// Rec #5 (slice κ): refuse the optimizePrompt + cacheControl combo
|
|
1062
|
-
// before running optimization. Optimization rewrites the assembled
|
|
1063
|
-
// prompt text the flight-recorder logs, but the κ stdin payload is
|
|
1064
|
-
// built from raw `promptParts` content blocks — letting both run
|
|
1065
|
-
// produces a FR row whose `prompt` no longer matches what Claude
|
|
1066
|
-
// actually received, AND any optimisation-driven text change would
|
|
1067
|
-
// silently break Anthropic prefix-cache reuse on the next call.
|
|
1068
846
|
const ccEarly = params.promptParts?.cacheControl;
|
|
1069
847
|
const cacheControlRequestedEarly = !!(ccEarly &&
|
|
1070
848
|
(ccEarly.system || ccEarly.tools || ccEarly.context));
|
|
@@ -1088,7 +866,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1088
866
|
approvalDecision = runtime.approvalManager.decide({
|
|
1089
867
|
cli: "claude",
|
|
1090
868
|
operation: params.operation,
|
|
1091
|
-
prompt: assembledPrompt,
|
|
869
|
+
prompt: assembledPrompt,
|
|
1092
870
|
bypassRequested: params.dangerouslySkipPermissions,
|
|
1093
871
|
fullAuto: false,
|
|
1094
872
|
requestedMcpServers,
|
|
@@ -1102,18 +880,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1102
880
|
return createApprovalDeniedResponse(params.operation, approvalDecision);
|
|
1103
881
|
}
|
|
1104
882
|
}
|
|
1105
|
-
// Rec #2 (slice κ): auto-emit `cache_control` when the caller passes
|
|
1106
|
-
// `promptParts` whose stable prefix exceeds the per-model minimum,
|
|
1107
|
-
// the caller has NOT explicitly set `cacheControl`, the gateway
|
|
1108
|
-
// config has opted in (`[cache_awareness].emit_anthropic_cache_control`),
|
|
1109
|
-
// and outputFormat is stream-json. Auto-emit marks the LAST non-empty
|
|
1110
|
-
// stable block (context → tools → system priority — the rightmost
|
|
1111
|
-
// stable block covers the widest prefix). Skipped when optimizePrompt
|
|
1112
|
-
// is on (same rec #5 desync risk).
|
|
1113
|
-
//
|
|
1114
|
-
// The 1h ttl is forced regardless of `anthropic_ttl_seconds`: 5m
|
|
1115
|
-
// breakpoints from caller content are rejected by Anthropic once
|
|
1116
|
-
// Claude Code's own 1h-marked session-wrap blocks land ahead of them.
|
|
1117
883
|
let autoEmittedCacheControlBlock = null;
|
|
1118
884
|
if (!cacheControlRequestedEarly &&
|
|
1119
885
|
runtime.cacheAwareness.emitAnthropicCacheControl &&
|
|
@@ -1124,9 +890,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1124
890
|
const threshold = minStableTokensForModel(runtime.cacheAwareness, resolvedModel ?? "default");
|
|
1125
891
|
if (stablePrefixTokens >= threshold) {
|
|
1126
892
|
const pp = params.promptParts;
|
|
1127
|
-
// Rightmost non-empty stable block — its cache_control breakpoint
|
|
1128
|
-
// covers everything above it in the message (the API matches
|
|
1129
|
-
// breakpoints in order).
|
|
1130
893
|
if (pp.context && pp.context.length > 0)
|
|
1131
894
|
autoEmittedCacheControlBlock = "context";
|
|
1132
895
|
else if (pp.tools && pp.tools.length > 0)
|
|
@@ -1141,12 +904,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1141
904
|
}
|
|
1142
905
|
}
|
|
1143
906
|
}
|
|
1144
|
-
// Rec #4: warn when promptParts has a cacheable stable prefix but no
|
|
1145
|
-
// cache_control breakpoint is being emitted (neither explicit nor
|
|
1146
|
-
// auto). Either the caller forgot to set `cacheControl` or
|
|
1147
|
-
// `[cache_awareness].emit_anthropic_cache_control` is off — both
|
|
1148
|
-
// leave the stable prefix bytes unreused across calls, defeating the
|
|
1149
|
-
// point of using `promptParts`.
|
|
1150
907
|
const warnings = [];
|
|
1151
908
|
if (!cacheControlRequestedEarly &&
|
|
1152
909
|
autoEmittedCacheControlBlock === null &&
|
|
@@ -1168,13 +925,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1168
925
|
});
|
|
1169
926
|
}
|
|
1170
927
|
}
|
|
1171
|
-
// Slice κ: switch from the legacy positional `-p <prompt>` emission
|
|
1172
|
-
// to `claude -p --input-format stream-json` and feed a JSON
|
|
1173
|
-
// content-blocks payload via stdin. Non-κ callers (no cacheControl,
|
|
1174
|
-
// or cacheControl with all flags false) take the existing positional
|
|
1175
|
-
// path bit-for-bit. The κ path activates on EITHER an explicit caller
|
|
1176
|
-
// opt-in (`cacheControlRequestedEarly`) OR a gateway-driven auto-emit
|
|
1177
|
-
// (`autoEmittedCacheControlBlock`).
|
|
1178
928
|
const cacheControlRequested = cacheControlRequestedEarly || autoEmittedCacheControlBlock !== null;
|
|
1179
929
|
let stdinPayload;
|
|
1180
930
|
let cacheControlBlocks;
|
|
@@ -1182,9 +932,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1182
932
|
if (params.outputFormat !== "stream-json") {
|
|
1183
933
|
return createErrorResponse(params.operation, 1, "", corrId, new Error("promptParts.cacheControl requires outputFormat: 'stream-json' (slice κ pipes the cache_control blocks over --input-format stream-json; text/json output formats cannot carry the required NDJSON usage events)."));
|
|
1184
934
|
}
|
|
1185
|
-
// promptParts is non-null whenever cacheControlRequested is true
|
|
1186
|
-
// (explicit opt-in lives in PromptParts; auto-emit guard requires
|
|
1187
|
-
// promptParts to be defined).
|
|
1188
935
|
const effectiveParts = autoEmittedCacheControlBlock !== null
|
|
1189
936
|
? {
|
|
1190
937
|
...params.promptParts,
|
|
@@ -1216,11 +963,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1216
963
|
args.push("--output-format", "json");
|
|
1217
964
|
}
|
|
1218
965
|
else if (params.outputFormat === "stream-json") {
|
|
1219
|
-
// Claude CLI 2.x rejects `--print --output-format stream-json` without
|
|
1220
|
-
// `--verbose`: "When using --print, --output-format=stream-json requires
|
|
1221
|
-
// --verbose". --verbose only affects what claude logs to stderr; the
|
|
1222
|
-
// stream-json stdout payload is unchanged, so the gateway's NDJSON
|
|
1223
|
-
// parser is unaffected.
|
|
1224
966
|
args.push("--output-format", "stream-json", "--include-partial-messages", "--verbose");
|
|
1225
967
|
}
|
|
1226
968
|
}
|
|
@@ -1251,7 +993,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1251
993
|
args.push("--strict-mcp-config");
|
|
1252
994
|
}
|
|
1253
995
|
}
|
|
1254
|
-
// U25: Claude high-impact features (agent, agents, fork, system-prompt, budget, effort, …)
|
|
1255
996
|
let validatedAgents;
|
|
1256
997
|
if (params.agents && Object.keys(params.agents).length > 0) {
|
|
1257
998
|
const result = validateClaudeAgentsMap(params.agents);
|
|
@@ -1273,6 +1014,10 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1273
1014
|
fallbackModel: params.fallbackModel,
|
|
1274
1015
|
jsonSchema: params.jsonSchema,
|
|
1275
1016
|
addDir: params.addDir,
|
|
1017
|
+
noSessionPersistence: params.noSessionPersistence,
|
|
1018
|
+
settingSources: params.settingSources,
|
|
1019
|
+
settings: params.settings,
|
|
1020
|
+
tools: params.tools,
|
|
1276
1021
|
}));
|
|
1277
1022
|
return {
|
|
1278
1023
|
corrId,
|
|
@@ -1305,7 +1050,6 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
1305
1050
|
const assembledPrompt = inputResolution.assembledPrompt;
|
|
1306
1051
|
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
1307
1052
|
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
1308
|
-
// Review integrity check on raw prompt (before optimization)
|
|
1309
1053
|
const reviewIntegrity = checkReviewIntegrity({ prompt: assembledPrompt });
|
|
1310
1054
|
if (reviewIntegrity.violations.length > 0) {
|
|
1311
1055
|
runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
|
|
@@ -1326,7 +1070,7 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
1326
1070
|
approvalDecision = runtime.approvalManager.decide({
|
|
1327
1071
|
cli: "codex",
|
|
1328
1072
|
operation: params.operation,
|
|
1329
|
-
prompt: assembledPrompt,
|
|
1073
|
+
prompt: assembledPrompt,
|
|
1330
1074
|
bypassRequested: params.dangerouslyBypassApprovalsAndSandbox,
|
|
1331
1075
|
fullAuto: params.fullAuto,
|
|
1332
1076
|
requestedMcpServers,
|
|
@@ -1338,9 +1082,6 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
1338
1082
|
return createApprovalDeniedResponse(params.operation, approvalDecision);
|
|
1339
1083
|
}
|
|
1340
1084
|
}
|
|
1341
|
-
// Resume mode: codex exec resume <SESSION_ID|--last> [flags] PROMPT
|
|
1342
|
-
// Note: `codex exec resume` does NOT accept sandbox policy flags; the original
|
|
1343
|
-
// session's approval policy is inherited. We silently drop fullAuto on resume.
|
|
1344
1085
|
let sessionPlan;
|
|
1345
1086
|
try {
|
|
1346
1087
|
sessionPlan = resolveCodexSessionArgs({
|
|
@@ -1361,9 +1102,6 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
1361
1102
|
}
|
|
1362
1103
|
if (resolvedModel)
|
|
1363
1104
|
args.push("--model", resolvedModel);
|
|
1364
|
-
// Codex sandbox / approval: resolve modern flags + legacy fullAuto shorthand.
|
|
1365
|
-
// `codex exec resume` rejects all of these (the original session's policy is
|
|
1366
|
-
// inherited), so we only emit them when starting a NEW session.
|
|
1367
1105
|
const sandboxFlags = resolveCodexSandboxFlags({
|
|
1368
1106
|
sandboxMode: params.sandboxMode,
|
|
1369
1107
|
askForApproval: params.askForApproval,
|
|
@@ -1379,26 +1117,12 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
1379
1117
|
if (params.dangerouslyBypassApprovalsAndSandbox) {
|
|
1380
1118
|
args.push("--dangerously-bypass-approvals-and-sandbox");
|
|
1381
1119
|
}
|
|
1382
|
-
// U23 fix: emit `--json` when the caller asked for JSON output so the
|
|
1383
|
-
// codex-json-parser actually receives JSONL events. This is what makes
|
|
1384
|
-
// extractUsageAndCost() reachable from the tool surface; without it, the
|
|
1385
|
-
// U23 parser is dead code.
|
|
1386
1120
|
if (params.outputFormat === "json") {
|
|
1387
1121
|
args.push("--json");
|
|
1388
1122
|
}
|
|
1389
1123
|
args.push("--skip-git-repo-check");
|
|
1390
|
-
// U26: High-impact feature flags. `--search` is retained as a compatibility
|
|
1391
|
-
// input but current `codex exec` no longer accepts it, so the helper warns
|
|
1392
|
-
// and emits no argv. `--profile` is accepted for new sessions only. The other
|
|
1393
|
-
// flags here are accepted on resume per `codex exec resume --help` and are
|
|
1394
|
-
// emitted in both branches.
|
|
1395
1124
|
let highImpactCleanup;
|
|
1396
1125
|
if (sessionPlan.mode === "new") {
|
|
1397
|
-
// Phase 4 slice ζ: emit working-dir and add-dir on new sessions only.
|
|
1398
|
-
// Both flags are listed in CODEX_RESUME_FILTERED_FLAGS — resume inherits
|
|
1399
|
-
// the original session's cwd and writable-dir policy, so emitting them
|
|
1400
|
-
// on resume would be silently stripped (wasteful + misleading on argv
|
|
1401
|
-
// logs). Gating here mirrors `--search` / `--sandbox`.
|
|
1402
1126
|
if (params.workingDir) {
|
|
1403
1127
|
args.push("-C", params.workingDir);
|
|
1404
1128
|
}
|
|
@@ -1481,7 +1205,6 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1481
1205
|
const assembledPrompt = inputResolution.assembledPrompt;
|
|
1482
1206
|
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
1483
1207
|
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
1484
|
-
// Review integrity check on raw prompt (before optimization)
|
|
1485
1208
|
const reviewIntegrity = checkReviewIntegrity({
|
|
1486
1209
|
prompt: assembledPrompt,
|
|
1487
1210
|
allowedTools: params.allowedTools,
|
|
@@ -1505,8 +1228,8 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1505
1228
|
approvalDecision = runtime.approvalManager.decide({
|
|
1506
1229
|
cli: "gemini",
|
|
1507
1230
|
operation: params.operation,
|
|
1508
|
-
prompt: assembledPrompt,
|
|
1509
|
-
bypassRequested: params.approvalMode === "yolo",
|
|
1231
|
+
prompt: assembledPrompt,
|
|
1232
|
+
bypassRequested: params.approvalMode === "yolo" || params.yolo === true,
|
|
1510
1233
|
fullAuto: false,
|
|
1511
1234
|
requestedMcpServers,
|
|
1512
1235
|
allowedTools: params.allowedTools,
|
|
@@ -1519,8 +1242,6 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1519
1242
|
}
|
|
1520
1243
|
}
|
|
1521
1244
|
const effectiveApprovalMode = params.approvalStrategy === "mcp_managed" ? "yolo" : params.approvalMode;
|
|
1522
|
-
// U27: Validate high-impact policy paths and prepend attachment tokens
|
|
1523
|
-
// BEFORE the `-p` pair is emitted, preserving the U21 ordering invariant.
|
|
1524
1245
|
const highImpact = prepareGeminiHighImpactFlags({
|
|
1525
1246
|
sandbox: params.sandbox,
|
|
1526
1247
|
policyFiles: params.policyFiles,
|
|
@@ -1537,15 +1258,14 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1537
1258
|
return createErrorResponse(params.operation, 1, "", corrId, err instanceof Error ? err : new Error(String(err)));
|
|
1538
1259
|
}
|
|
1539
1260
|
}
|
|
1540
|
-
// U21: Emit the prompt via -p/--prompt rather than as a positional argument.
|
|
1541
|
-
// Positional prompts depend on Gemini's TTY/mode-detection heuristics; -p is
|
|
1542
|
-
// the documented non-interactive flag and is robust against future CLI mode
|
|
1543
|
-
// changes.
|
|
1544
1261
|
const args = ["-p", effectivePrompt];
|
|
1545
1262
|
if (resolvedModel)
|
|
1546
1263
|
args.push("--model", resolvedModel);
|
|
1547
1264
|
if (effectiveApprovalMode)
|
|
1548
1265
|
args.push("--approval-mode", effectiveApprovalMode);
|
|
1266
|
+
if (params.yolo && effectiveApprovalMode !== "yolo") {
|
|
1267
|
+
args.push("--yolo");
|
|
1268
|
+
}
|
|
1549
1269
|
if (params.allowedTools && params.allowedTools.length > 0) {
|
|
1550
1270
|
sanitizeCliArgValues(params.allowedTools, "allowedTools");
|
|
1551
1271
|
params.allowedTools.forEach(tool => args.push("--allowed-tools", tool));
|
|
@@ -1558,26 +1278,13 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1558
1278
|
sanitizeCliArgValues(params.includeDirs, "includeDirs");
|
|
1559
1279
|
params.includeDirs.forEach(dir => args.push("--include-directories", dir));
|
|
1560
1280
|
}
|
|
1561
|
-
// U27 high-impact flags (-s / --policy / --admin-policy) appended after the
|
|
1562
|
-
// existing flag set so positional ordering relative to `-p` is preserved.
|
|
1563
1281
|
args.push(...highImpact.args);
|
|
1564
|
-
// U23 fix: emit `-o json` when the caller asked for JSON output. The Gemini
|
|
1565
|
-
// JSON parser is otherwise unreachable from the tool surface and the
|
|
1566
|
-
// structured usageMetadata is silently dropped.
|
|
1567
|
-
//
|
|
1568
|
-
// Phase 4 slice ε: same wiring for `-o stream-json` (NDJSON event stream).
|
|
1569
|
-
// Gemini already streams stdout in real-time so the existing 10-minute
|
|
1570
|
-
// idle timeout (CLI_IDLE_TIMEOUTS.gemini) covers both modes without
|
|
1571
|
-
// adjustment — unlike Claude, no `--include-partial-messages` companion
|
|
1572
|
-
// flag is required because Gemini emits assistant `delta` events as part
|
|
1573
|
-
// of the default stream-json shape.
|
|
1574
1282
|
if (params.outputFormat === "json") {
|
|
1575
1283
|
args.push("-o", "json");
|
|
1576
1284
|
}
|
|
1577
1285
|
else if (params.outputFormat === "stream-json") {
|
|
1578
1286
|
args.push("-o", "stream-json");
|
|
1579
1287
|
}
|
|
1580
|
-
// Phase 4 slice γ: opt-in trust-prompt bypass for fresh workspaces.
|
|
1581
1288
|
if (params.skipTrust) {
|
|
1582
1289
|
args.push("--skip-trust");
|
|
1583
1290
|
}
|
|
@@ -1608,7 +1315,6 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
|
|
|
1608
1315
|
const assembledPrompt = inputResolution.assembledPrompt;
|
|
1609
1316
|
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
1610
1317
|
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
1611
|
-
// Review integrity check on raw prompt (before optimization)
|
|
1612
1318
|
const reviewIntegrity = checkReviewIntegrity({
|
|
1613
1319
|
prompt: assembledPrompt,
|
|
1614
1320
|
allowedTools: params.allowedTools,
|
|
@@ -1633,7 +1339,7 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
|
|
|
1633
1339
|
approvalDecision = runtime.approvalManager.decide({
|
|
1634
1340
|
cli: "grok",
|
|
1635
1341
|
operation: params.operation,
|
|
1636
|
-
prompt: assembledPrompt,
|
|
1342
|
+
prompt: assembledPrompt,
|
|
1637
1343
|
bypassRequested: Boolean(params.alwaysApprove) || params.permissionMode === "bypassPermissions",
|
|
1638
1344
|
fullAuto: false,
|
|
1639
1345
|
requestedMcpServers,
|
|
@@ -1694,6 +1400,12 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
|
|
|
1694
1400
|
args.push("--deny", rule);
|
|
1695
1401
|
}
|
|
1696
1402
|
}
|
|
1403
|
+
if (params.compactionMode) {
|
|
1404
|
+
args.push("--compaction-mode", params.compactionMode);
|
|
1405
|
+
}
|
|
1406
|
+
if (params.compactionDetail) {
|
|
1407
|
+
args.push("--compaction-detail", params.compactionDetail);
|
|
1408
|
+
}
|
|
1697
1409
|
return {
|
|
1698
1410
|
corrId,
|
|
1699
1411
|
effectivePrompt,
|
|
@@ -1762,9 +1474,6 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
|
|
|
1762
1474
|
return createApprovalDeniedResponse(params.operation, approvalDecision);
|
|
1763
1475
|
}
|
|
1764
1476
|
}
|
|
1765
|
-
// Under mcp_managed, force --agent auto-approve so the approval gate's
|
|
1766
|
-
// verdict carries through to the CLI invocation (mirrors Grok's --always-approve
|
|
1767
|
-
// forcing under mcp_managed).
|
|
1768
1477
|
const effectivePermissionMode = params.approvalStrategy === "mcp_managed"
|
|
1769
1478
|
? "auto-approve"
|
|
1770
1479
|
: (params.permissionMode ?? "auto-approve");
|
|
@@ -1773,8 +1482,6 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
|
|
|
1773
1482
|
resolvedModel,
|
|
1774
1483
|
outputFormat: params.outputFormat,
|
|
1775
1484
|
permissionMode: effectivePermissionMode,
|
|
1776
|
-
effort: params.effort,
|
|
1777
|
-
reasoningEffort: params.reasoningEffort,
|
|
1778
1485
|
allowedTools: params.allowedTools,
|
|
1779
1486
|
disallowedTools: params.disallowedTools,
|
|
1780
1487
|
trust: params.trust,
|
|
@@ -1813,15 +1520,6 @@ function selectMistralRecoveryModel(failedModel) {
|
|
|
1813
1520
|
].filter((model) => Boolean(model && model !== failedModel));
|
|
1814
1521
|
return candidates.find(model => model !== "local");
|
|
1815
1522
|
}
|
|
1816
|
-
/**
|
|
1817
|
-
* Phase 4 slice δ post-review: pure helper extracted from
|
|
1818
|
-
* `handleMistralRequest` so the retry-path arg-preservation invariants
|
|
1819
|
-
* (trust + maxTurns + maxPrice from slices γ/δ) are unit-testable
|
|
1820
|
-
* without mocking awaitJobOrDefer. Any param the wrapper threads into
|
|
1821
|
-
* the FIRST `buildMistralCliInvocation` call MUST also be threaded
|
|
1822
|
-
* through here, or a fresh-workspace / budgeted run can degrade on
|
|
1823
|
-
* the second attempt.
|
|
1824
|
-
*/
|
|
1825
1523
|
export function buildMistralRetryPrep(params, recoveryModel) {
|
|
1826
1524
|
return buildMistralCliInvocation({
|
|
1827
1525
|
prompt: params.effectivePrompt,
|
|
@@ -1830,8 +1528,6 @@ export function buildMistralRetryPrep(params, recoveryModel) {
|
|
|
1830
1528
|
permissionMode: params.approvalStrategy === "mcp_managed"
|
|
1831
1529
|
? "auto-approve"
|
|
1832
1530
|
: (params.permissionMode ?? "auto-approve"),
|
|
1833
|
-
effort: params.effort,
|
|
1834
|
-
reasoningEffort: params.reasoningEffort,
|
|
1835
1531
|
allowedTools: params.allowedTools,
|
|
1836
1532
|
disallowedTools: params.disallowedTools,
|
|
1837
1533
|
trust: params.trust,
|
|
@@ -1844,13 +1540,11 @@ export function buildMistralRetryPrep(params, recoveryModel) {
|
|
|
1844
1540
|
}
|
|
1845
1541
|
function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat, warnings) {
|
|
1846
1542
|
let finalStdout = stdout;
|
|
1847
|
-
// Skip response optimization for JSON output to prevent corrupting structured data
|
|
1848
1543
|
if (optimizeResponse && outputFormat !== "json") {
|
|
1849
1544
|
const optimized = optimizeResponseText(finalStdout);
|
|
1850
1545
|
logOptimizationTokens("response", corrId, finalStdout, optimized);
|
|
1851
1546
|
finalStdout = optimized;
|
|
1852
1547
|
}
|
|
1853
|
-
// Append review integrity warnings to response text (skip for JSON output to avoid corruption)
|
|
1854
1548
|
if (prep.reviewIntegrity &&
|
|
1855
1549
|
prep.reviewIntegrity.violations.length > 0 &&
|
|
1856
1550
|
outputFormat !== "json") {
|
|
@@ -1867,9 +1561,6 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
|
|
|
1867
1561
|
correlationId: corrId,
|
|
1868
1562
|
sessionId: sessionId || null,
|
|
1869
1563
|
durationMs,
|
|
1870
|
-
// Phase 4 slice β: thread sessionId + home so the Mistral branch of
|
|
1871
|
-
// extractUsageAndCost can read `~/.vibe/logs/session/<dir>/meta.json`.
|
|
1872
|
-
// Other CLIs ignore the ctx (their usage source is stdout).
|
|
1873
1564
|
...extractUsageAndCost(cli, stdout, outputFormat, { sessionId, home: homedir() }),
|
|
1874
1565
|
exitCode: 0,
|
|
1875
1566
|
retryCount: 0,
|
|
@@ -1899,12 +1590,6 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
|
|
|
1899
1590
|
}
|
|
1900
1591
|
return response;
|
|
1901
1592
|
}
|
|
1902
|
-
/**
|
|
1903
|
-
* Slice 3 helper: compute the cache_ttl_expiring_soon warning for a
|
|
1904
|
-
* claude session, if the feature is enabled, the session has prior cache
|
|
1905
|
-
* writes, and ttlRemainingMs is below the threshold (30s by default).
|
|
1906
|
-
* Returns null when no warning applies.
|
|
1907
|
-
*/
|
|
1908
1593
|
function maybeBuildCacheTtlWarning(args) {
|
|
1909
1594
|
if (args.cli !== "claude")
|
|
1910
1595
|
return null;
|
|
@@ -1933,7 +1618,6 @@ function resolveHandlerRuntime(deps) {
|
|
|
1933
1618
|
if (deps.runtime)
|
|
1934
1619
|
return deps.runtime;
|
|
1935
1620
|
const asyncDeps = deps;
|
|
1936
|
-
// Older HandlerDeps callers may not provide `warn`; default-route to `info`.
|
|
1937
1621
|
const depLogger = deps.logger;
|
|
1938
1622
|
const normalizedLogger = {
|
|
1939
1623
|
info: depLogger.info,
|
|
@@ -1969,6 +1653,7 @@ export async function handleGeminiRequest(deps, params) {
|
|
|
1969
1653
|
adminPolicyFiles: params.adminPolicyFiles,
|
|
1970
1654
|
attachments: params.attachments,
|
|
1971
1655
|
skipTrust: params.skipTrust,
|
|
1656
|
+
yolo: params.yolo,
|
|
1972
1657
|
}, runtime);
|
|
1973
1658
|
if (!("args" in prep))
|
|
1974
1659
|
return prep;
|
|
@@ -1986,8 +1671,6 @@ export async function handleGeminiRequest(deps, params) {
|
|
|
1986
1671
|
}, runtime);
|
|
1987
1672
|
deps.logger.info(`[${corrId}] gemini_request invoked with model=${prep.resolvedModel || "default"}, approvalMode=${params.approvalMode}, prompt length=${prep.effectivePrompt.length}`);
|
|
1988
1673
|
try {
|
|
1989
|
-
// Gemini CLI 0.43 supports `--resume`, but not a supported fresh
|
|
1990
|
-
// `--session-id` flag. Fresh sessions emit no session flag.
|
|
1991
1674
|
const sessionPlan = resolveGeminiSessionPlan({
|
|
1992
1675
|
sessionId: params.sessionId,
|
|
1993
1676
|
resumeLatest: params.resumeLatest,
|
|
@@ -2005,7 +1688,6 @@ export async function handleGeminiRequest(deps, params) {
|
|
|
2005
1688
|
}
|
|
2006
1689
|
const geminiFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, params.sessionId, params.outputFormat);
|
|
2007
1690
|
const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, geminiFrHandoff.flightRecorderEntry, geminiFrHandoff.extractUsage, worktreeResolution.cwd);
|
|
2008
|
-
// Deferred — job still running, return async reference
|
|
2009
1691
|
if (isDeferredResponse(result)) {
|
|
2010
1692
|
return buildDeferredToolResponse(result, effectiveSessionIdHint);
|
|
2011
1693
|
}
|
|
@@ -2026,9 +1708,6 @@ export async function handleGeminiRequest(deps, params) {
|
|
|
2026
1708
|
return createErrorResponse("gemini", code, stderr, corrId);
|
|
2027
1709
|
}
|
|
2028
1710
|
wasSuccessful = true;
|
|
2029
|
-
// Post-success session I/O for explicit resume flows. Fresh Gemini sessions
|
|
2030
|
-
// are owned by the CLI because the current CLI has no supported fresh
|
|
2031
|
-
// session-id flag the gateway can inject.
|
|
2032
1711
|
let effectiveSessionId = effectiveSessionIdHint;
|
|
2033
1712
|
if (effectiveSessionId) {
|
|
2034
1713
|
const existing = await deps.sessionManager.getSession(effectiveSessionId);
|
|
@@ -2111,19 +1790,18 @@ export async function handleGeminiRequestAsync(deps, params) {
|
|
|
2111
1790
|
adminPolicyFiles: params.adminPolicyFiles,
|
|
2112
1791
|
attachments: params.attachments,
|
|
2113
1792
|
skipTrust: params.skipTrust,
|
|
1793
|
+
yolo: params.yolo,
|
|
2114
1794
|
}, runtime);
|
|
2115
1795
|
if (!("args" in prep))
|
|
2116
1796
|
return prep;
|
|
2117
1797
|
const { corrId, args, requestedMcpServers, approvalDecision } = prep;
|
|
2118
1798
|
try {
|
|
2119
|
-
// Gemini CLI 0.43 supports `--resume`, but fresh sessions emit no session flag.
|
|
2120
1799
|
const sessionPlan = resolveGeminiSessionPlan({
|
|
2121
1800
|
sessionId: params.sessionId,
|
|
2122
1801
|
resumeLatest: params.resumeLatest,
|
|
2123
1802
|
createNewSession: params.createNewSession,
|
|
2124
1803
|
});
|
|
2125
1804
|
args.push(...sessionPlan.args);
|
|
2126
|
-
// Pre-start session I/O (async handlers: prevent orphaned jobs)
|
|
2127
1805
|
let effectiveSessionId = sessionPlan.resumed ? params.sessionId : undefined;
|
|
2128
1806
|
if (effectiveSessionId) {
|
|
2129
1807
|
const existing = await deps.sessionManager.getSession(effectiveSessionId);
|
|
@@ -2146,13 +1824,8 @@ export async function handleGeminiRequestAsync(deps, params) {
|
|
|
2146
1824
|
catch (err) {
|
|
2147
1825
|
return createErrorResponse("gemini_request_async", 1, "", corrId, err);
|
|
2148
1826
|
}
|
|
2149
|
-
// Start job only after all session I/O succeeds. U23: forward outputFormat
|
|
2150
|
-
// so AsyncJobManager records it in the durable store (the manager also
|
|
2151
|
-
// surfaces it in the snapshot).
|
|
2152
1827
|
assertUpstreamCliArgs("gemini", args);
|
|
2153
1828
|
assertUpstreamCliEnv("gemini", undefined);
|
|
2154
|
-
// Slice 1.5: pure async path — no upstream safeFlightStart, so the
|
|
2155
|
-
// manager owns both logStart and logComplete for this corrId.
|
|
2156
1829
|
const geminiAsyncFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, effectiveSessionId, params.outputFormat);
|
|
2157
1830
|
const job = deps.asyncJobManager.startJob("gemini", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, geminiAsyncFrHandoff.flightRecorderEntry, geminiAsyncFrHandoff.extractUsage, true);
|
|
2158
1831
|
deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
|
|
@@ -2210,6 +1883,8 @@ export async function handleGrokRequest(deps, params) {
|
|
|
2210
1883
|
systemPromptOverride: params.systemPromptOverride,
|
|
2211
1884
|
allow: params.allow,
|
|
2212
1885
|
deny: params.deny,
|
|
1886
|
+
compactionMode: params.compactionMode,
|
|
1887
|
+
compactionDetail: params.compactionDetail,
|
|
2213
1888
|
}, runtime);
|
|
2214
1889
|
if (!("args" in prep))
|
|
2215
1890
|
return prep;
|
|
@@ -2227,7 +1902,6 @@ export async function handleGrokRequest(deps, params) {
|
|
|
2227
1902
|
}, runtime);
|
|
2228
1903
|
deps.logger.info(`[${corrId}] grok_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode}, prompt length=${prep.effectivePrompt.length}`);
|
|
2229
1904
|
try {
|
|
2230
|
-
// Session arg planning (pure, no I/O)
|
|
2231
1905
|
const sessionResult = resolveGrokSessionArgs({
|
|
2232
1906
|
sessionId: params.sessionId,
|
|
2233
1907
|
resumeLatest: params.resumeLatest,
|
|
@@ -2243,7 +1917,6 @@ export async function handleGrokRequest(deps, params) {
|
|
|
2243
1917
|
}
|
|
2244
1918
|
const grokFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, params.sessionId, params.outputFormat);
|
|
2245
1919
|
const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, grokFrHandoff.flightRecorderEntry, grokFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
|
|
2246
|
-
// Deferred — job still running, return async reference
|
|
2247
1920
|
if (isDeferredResponse(result)) {
|
|
2248
1921
|
return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
|
|
2249
1922
|
}
|
|
@@ -2264,7 +1937,6 @@ export async function handleGrokRequest(deps, params) {
|
|
|
2264
1937
|
return createErrorResponse("grok", code, stderr, corrId);
|
|
2265
1938
|
}
|
|
2266
1939
|
wasSuccessful = true;
|
|
2267
|
-
// Post-success session I/O (sync handlers: no phantom sessions on CLI failure)
|
|
2268
1940
|
let effectiveSessionId = sessionResult.effectiveSessionId;
|
|
2269
1941
|
if (sessionResult.userProvidedSession && effectiveSessionId) {
|
|
2270
1942
|
const existing = await deps.sessionManager.getSession(effectiveSessionId);
|
|
@@ -2350,19 +2022,19 @@ export async function handleGrokRequestAsync(deps, params) {
|
|
|
2350
2022
|
systemPromptOverride: params.systemPromptOverride,
|
|
2351
2023
|
allow: params.allow,
|
|
2352
2024
|
deny: params.deny,
|
|
2025
|
+
compactionMode: params.compactionMode,
|
|
2026
|
+
compactionDetail: params.compactionDetail,
|
|
2353
2027
|
}, runtime);
|
|
2354
2028
|
if (!("args" in prep))
|
|
2355
2029
|
return prep;
|
|
2356
2030
|
const { corrId, args, requestedMcpServers, approvalDecision } = prep;
|
|
2357
2031
|
try {
|
|
2358
|
-
// Session arg planning (pure, no I/O)
|
|
2359
2032
|
const sessionResult = resolveGrokSessionArgs({
|
|
2360
2033
|
sessionId: params.sessionId,
|
|
2361
2034
|
resumeLatest: params.resumeLatest,
|
|
2362
2035
|
createNewSession: params.createNewSession,
|
|
2363
2036
|
});
|
|
2364
2037
|
args.push(...sessionResult.resumeArgs);
|
|
2365
|
-
// Pre-start session I/O (async handlers: prevent orphaned jobs)
|
|
2366
2038
|
let effectiveSessionId = sessionResult.effectiveSessionId;
|
|
2367
2039
|
if (sessionResult.userProvidedSession && effectiveSessionId) {
|
|
2368
2040
|
const existing = await deps.sessionManager.getSession(effectiveSessionId);
|
|
@@ -2389,7 +2061,6 @@ export async function handleGrokRequestAsync(deps, params) {
|
|
|
2389
2061
|
catch (err) {
|
|
2390
2062
|
return createErrorResponse("grok_request_async", 1, "", corrId, err);
|
|
2391
2063
|
}
|
|
2392
|
-
// Start job only after all session I/O succeeds
|
|
2393
2064
|
assertUpstreamCliArgs("grok", args);
|
|
2394
2065
|
assertUpstreamCliEnv("grok", undefined);
|
|
2395
2066
|
const grokAsyncFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, effectiveSessionId, params.outputFormat);
|
|
@@ -2431,8 +2102,6 @@ export async function handleMistralRequest(deps, params) {
|
|
|
2431
2102
|
model: params.model,
|
|
2432
2103
|
outputFormat: params.outputFormat,
|
|
2433
2104
|
permissionMode: params.permissionMode,
|
|
2434
|
-
effort: params.effort,
|
|
2435
|
-
reasoningEffort: params.reasoningEffort,
|
|
2436
2105
|
allowedTools: params.allowedTools,
|
|
2437
2106
|
disallowedTools: params.disallowedTools,
|
|
2438
2107
|
approvalStrategy: params.approvalStrategy,
|
|
@@ -2488,8 +2157,6 @@ export async function handleMistralRequest(deps, params) {
|
|
|
2488
2157
|
deps.logger.info(`[${corrId}] mistral_request detected stale Vibe model selection; retrying once with ${recoveryModel}`);
|
|
2489
2158
|
const retryPrep = buildMistralRetryPrep({ ...params, effectivePrompt: prep.effectivePrompt }, recoveryModel);
|
|
2490
2159
|
const retryArgs = [...retryPrep.args, ...sessionResult.resumeArgs];
|
|
2491
|
-
// Reuse the FR handoff built above — the retry preserves corrId,
|
|
2492
|
-
// so the manager's logComplete still updates the original row.
|
|
2493
2160
|
result = await awaitJobOrDefer("mistral", retryArgs, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, true, runtime, retryPrep.env, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
|
|
2494
2161
|
if (isDeferredResponse(result)) {
|
|
2495
2162
|
return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
|
|
@@ -2582,8 +2249,6 @@ export async function handleMistralRequestAsync(deps, params) {
|
|
|
2582
2249
|
model: params.model,
|
|
2583
2250
|
outputFormat: params.outputFormat,
|
|
2584
2251
|
permissionMode: params.permissionMode,
|
|
2585
|
-
effort: params.effort,
|
|
2586
|
-
reasoningEffort: params.reasoningEffort,
|
|
2587
2252
|
allowedTools: params.allowedTools,
|
|
2588
2253
|
disallowedTools: params.disallowedTools,
|
|
2589
2254
|
approvalStrategy: params.approvalStrategy,
|
|
@@ -2702,11 +2367,6 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
2702
2367
|
if (!("args" in prep))
|
|
2703
2368
|
return prep;
|
|
2704
2369
|
const { corrId, args, requestedMcpServers, approvalDecision } = prep;
|
|
2705
|
-
// U26 fix: outputSchema temp-file ownership. The cleanup callable lives in
|
|
2706
|
-
// exactly one place at a time: this scope until startJob succeeds, then
|
|
2707
|
-
// AsyncJobManager (via onComplete → persistComplete → fireOnComplete) once
|
|
2708
|
-
// the job is registered. Any code path that fails to hand it off MUST run
|
|
2709
|
-
// it locally.
|
|
2710
2370
|
const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
|
|
2711
2371
|
let prepCleanupOwnedHere = prepCleanup !== undefined;
|
|
2712
2372
|
const runPrepCleanupLocally = () => {
|
|
@@ -2721,7 +2381,6 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
2721
2381
|
}
|
|
2722
2382
|
};
|
|
2723
2383
|
try {
|
|
2724
|
-
// Pre-start session I/O (async handlers: prevent orphaned jobs)
|
|
2725
2384
|
let effectiveSessionId = params.sessionId;
|
|
2726
2385
|
if (!params.createNewSession && !params.sessionId) {
|
|
2727
2386
|
const activeSession = await deps.sessionManager.getActiveSession("codex");
|
|
@@ -2740,9 +2399,6 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
2740
2399
|
const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
|
|
2741
2400
|
effectiveSessionId = newSession.id;
|
|
2742
2401
|
}
|
|
2743
|
-
// Slice λ: resolve worktree directive after session I/O so resume reuse
|
|
2744
|
-
// can read metadata.worktreePath. A pre-startJob failure here means
|
|
2745
|
-
// prepCleanup is still owned locally; run it before returning.
|
|
2746
2402
|
let worktreeResolution = {};
|
|
2747
2403
|
try {
|
|
2748
2404
|
worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
|
|
@@ -2751,22 +2407,15 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
2751
2407
|
runPrepCleanupLocally();
|
|
2752
2408
|
return createErrorResponse("codex_request_async", 1, "", corrId, err);
|
|
2753
2409
|
}
|
|
2754
|
-
// Start job only after all session I/O succeeds. If startJob throws before
|
|
2755
|
-
// registering the record, ownership stays here and we run it in the catch.
|
|
2756
2410
|
assertUpstreamCliArgs("codex", args);
|
|
2757
2411
|
assertUpstreamCliEnv("codex", undefined);
|
|
2758
2412
|
const codexAsyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, effectiveSessionId, params.outputFormat);
|
|
2759
2413
|
let job;
|
|
2760
2414
|
try {
|
|
2761
2415
|
job = deps.asyncJobManager.startJob("codex", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup, codexAsyncFrHandoff.flightRecorderEntry, codexAsyncFrHandoff.extractUsage, true);
|
|
2762
|
-
// Handoff succeeded: AsyncJobManager will fire prepCleanup on terminal
|
|
2763
|
-
// status. Release our local ownership claim so the catch path doesn't
|
|
2764
|
-
// double-fire.
|
|
2765
2416
|
prepCleanupOwnedHere = false;
|
|
2766
2417
|
}
|
|
2767
2418
|
catch (startErr) {
|
|
2768
|
-
// startJob never stored the record → manager won't call onComplete. We
|
|
2769
|
-
// still own the cleanup; let the outer catch run it.
|
|
2770
2419
|
throw startErr;
|
|
2771
2420
|
}
|
|
2772
2421
|
deps.logger.info(`[${corrId}] codex_request_async started job ${job.id}`);
|
|
@@ -2793,42 +2442,15 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
2793
2442
|
};
|
|
2794
2443
|
}
|
|
2795
2444
|
catch (error) {
|
|
2796
|
-
// Pre-start failure: either session I/O threw, or startJob threw before
|
|
2797
|
-
// registering the record. In either case the manager will NOT fire
|
|
2798
|
-
// prepCleanup, so we must run it here.
|
|
2799
2445
|
runPrepCleanupLocally();
|
|
2800
2446
|
return createErrorResponse("codex_request_async", 1, "", corrId, error);
|
|
2801
2447
|
}
|
|
2802
2448
|
}
|
|
2803
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
2804
|
-
// Claude Code Tool
|
|
2805
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
2806
2449
|
export function createGatewayServer(deps = {}) {
|
|
2807
2450
|
const runtime = resolveGatewayServerRuntime(deps, { isolateState: true });
|
|
2808
2451
|
const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger, persistence, flightRecorder, cacheAwareness, } = runtime;
|
|
2809
|
-
// `flightRecorder` is destructured into closure scope so the session_get
|
|
2810
|
-
// handler (see ~line 5590) has the FlightRecorderQuery read capability
|
|
2811
|
-
// available without re-resolving runtime. Slice 2 will populate the
|
|
2812
|
-
// `cacheState` field of session_get's response from this read surface.
|
|
2813
|
-
// `cacheAwareness` is the loaded [cache_awareness] block (config.ts).
|
|
2814
2452
|
void flightRecorder;
|
|
2815
2453
|
void cacheAwareness;
|
|
2816
|
-
// Structural invariant: tools register iff ALL THREE conditions hold:
|
|
2817
|
-
// (1) persistence.backend !== "none" — the operator/config has not
|
|
2818
|
-
// explicitly disabled durable persistence;
|
|
2819
|
-
// (2) persistence.asyncJobsEnabled === true — the derived opt-in flag
|
|
2820
|
-
// agrees (loadPersistenceConfig sets this iff backend is one of
|
|
2821
|
-
// sqlite/postgres/memory);
|
|
2822
|
-
// (3) asyncJobManager.hasStore() === true — the runtime manager
|
|
2823
|
-
// actually has a store attached (isolate-mode runtimes use null).
|
|
2824
|
-
//
|
|
2825
|
-
// Each guard closes a distinct re-entry path for the silent-loss footgun:
|
|
2826
|
-
// - Without (1), a caller can inject {backend:'none', asyncJobsEnabled:true}
|
|
2827
|
-
// and re-advertise the async tools while reporting backend='none' in
|
|
2828
|
-
// llm_process_health — exactly contradicting SPEC CLAIM 4f.
|
|
2829
|
-
// - Without (2), config that opts out is ignored.
|
|
2830
|
-
// - Without (3), a null-store manager (isolate-mode / HTTP per-session)
|
|
2831
|
-
// accepts registrations that have nowhere to persist results.
|
|
2832
2454
|
const asyncJobsEnabled = persistence.backend !== "none" && persistence.asyncJobsEnabled && asyncJobManager.hasStore();
|
|
2833
2455
|
const server = newGatewayMcpServer();
|
|
2834
2456
|
registerBaseResources(server, runtime);
|
|
@@ -2865,7 +2487,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
2865
2487
|
.enum(CLAUDE_PERMISSION_MODES)
|
|
2866
2488
|
.optional()
|
|
2867
2489
|
.describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op (no flag emitted)."),
|
|
2868
|
-
// U25 — Claude high-impact features
|
|
2869
2490
|
agent: z
|
|
2870
2491
|
.string()
|
|
2871
2492
|
.optional()
|
|
@@ -2905,7 +2526,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
2905
2526
|
.boolean()
|
|
2906
2527
|
.optional()
|
|
2907
2528
|
.describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
|
|
2908
|
-
// Phase 4 slice η — Claude reliability + structured-output parity
|
|
2909
2529
|
fallbackModel: z
|
|
2910
2530
|
.string()
|
|
2911
2531
|
.min(1)
|
|
@@ -2915,11 +2535,28 @@ export function createGatewayServer(deps = {}) {
|
|
|
2915
2535
|
.union([z.string(), z.record(z.string(), z.unknown())])
|
|
2916
2536
|
.optional()
|
|
2917
2537
|
.describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
|
|
2918
|
-
// Phase 4 slice ζ — Claude additional-workspace-dirs parity
|
|
2919
2538
|
addDir: z
|
|
2920
2539
|
.array(z.string())
|
|
2921
2540
|
.optional()
|
|
2922
2541
|
.describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
|
|
2542
|
+
noSessionPersistence: z
|
|
2543
|
+
.boolean()
|
|
2544
|
+
.optional()
|
|
2545
|
+
.describe("Claude --no-session-persistence: do not write this session to disk (ephemeral one-shot runs; mirrors codex --ephemeral)."),
|
|
2546
|
+
settingSources: z
|
|
2547
|
+
.string()
|
|
2548
|
+
.min(1)
|
|
2549
|
+
.optional()
|
|
2550
|
+
.describe("Claude --setting-sources: comma-separated setting sources to load (user|project|local) for reproducible/isolated headless runs."),
|
|
2551
|
+
settings: z
|
|
2552
|
+
.string()
|
|
2553
|
+
.min(1)
|
|
2554
|
+
.optional()
|
|
2555
|
+
.describe("Claude --settings: path to a settings JSON file or a JSON literal of additional settings. Powerful: settings can define hooks/permissions/model; passed verbatim."),
|
|
2556
|
+
tools: z
|
|
2557
|
+
.array(z.string())
|
|
2558
|
+
.optional()
|
|
2559
|
+
.describe('Claude --tools: restrict the available built-in tool set (distinct from allowedTools permission gating). Pass [""] to disable all tools.'),
|
|
2923
2560
|
worktree: WORKTREE_SCHEMA.optional(),
|
|
2924
2561
|
approvalStrategy: z
|
|
2925
2562
|
.enum(["legacy", "mcp_managed"])
|
|
@@ -2951,7 +2588,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
2951
2588
|
.boolean()
|
|
2952
2589
|
.default(false)
|
|
2953
2590
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
2954
|
-
}, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
2591
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, noSessionPersistence, settingSources, settings, tools, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
2955
2592
|
const startTime = Date.now();
|
|
2956
2593
|
if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
|
|
2957
2594
|
return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
|
|
@@ -2984,24 +2621,18 @@ export function createGatewayServer(deps = {}) {
|
|
|
2984
2621
|
fallbackModel,
|
|
2985
2622
|
jsonSchema,
|
|
2986
2623
|
addDir,
|
|
2624
|
+
noSessionPersistence,
|
|
2625
|
+
settingSources,
|
|
2626
|
+
settings,
|
|
2627
|
+
tools,
|
|
2987
2628
|
}, runtime);
|
|
2988
2629
|
if (!("args" in prep))
|
|
2989
2630
|
return prep;
|
|
2990
2631
|
const { corrId, args } = prep;
|
|
2991
2632
|
let durationMs = 0;
|
|
2992
2633
|
let wasSuccessful = false;
|
|
2993
|
-
// Session resolution happens BEFORE safeFlightStart so that:
|
|
2994
|
-
// (1) the TTL warning reads the PRIOR session's lastWriteAt
|
|
2995
|
-
// rather than the row about to be inserted (codex-r1/F1).
|
|
2996
|
-
// (2) the flight-recorder row is tagged with effectiveSessionId
|
|
2997
|
-
// (the session the CLI will actually resume), not the raw
|
|
2998
|
-
// user-provided sessionId.
|
|
2999
2634
|
let effectiveSessionId = sessionId;
|
|
3000
2635
|
let useContinue = continueSession;
|
|
3001
|
-
// Guard the active-session lookup: in some test harnesses the
|
|
3002
|
-
// sessionManager is undefined; the original try-catch wrapped this
|
|
3003
|
-
// block, so we replicate that tolerance here. Failure leaves
|
|
3004
|
-
// effectiveSessionId as the user-provided sessionId.
|
|
3005
2636
|
let activeSession = null;
|
|
3006
2637
|
try {
|
|
3007
2638
|
activeSession = await sessionManager.getActiveSession("claude");
|
|
@@ -3016,16 +2647,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
3016
2647
|
if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
|
|
3017
2648
|
useContinue = true;
|
|
3018
2649
|
}
|
|
3019
|
-
// Slice 3: if the resolved session has a near-expiry Anthropic
|
|
3020
|
-
// cache breakpoint, attach a structured warning (NOT a hard error)
|
|
3021
|
-
// to the response. Computed BEFORE safeFlightStart so the current
|
|
3022
|
-
// row does not skew lastRequestAt.
|
|
3023
2650
|
const ttlWarning = maybeBuildCacheTtlWarning({
|
|
3024
2651
|
runtime,
|
|
3025
2652
|
sessionId: effectiveSessionId,
|
|
3026
2653
|
cli: "claude",
|
|
3027
2654
|
});
|
|
3028
|
-
// Rec #4: include any prep-time warnings (e.g. cacheable_prefix_uncached).
|
|
3029
2655
|
const warnings = [
|
|
3030
2656
|
...(ttlWarning ? [ttlWarning] : []),
|
|
3031
2657
|
...(prep.warnings ?? []),
|
|
@@ -3049,8 +2675,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3049
2675
|
args.push("--session-id", effectiveSessionId);
|
|
3050
2676
|
await sessionManager.updateSessionUsage(effectiveSessionId);
|
|
3051
2677
|
}
|
|
3052
|
-
// Slice λ: resolve worktree directive into spawn cwd. Done after
|
|
3053
|
-
// session resolution so resume reuse can read metadata.worktreePath.
|
|
3054
2678
|
let worktreeResolution = {};
|
|
3055
2679
|
try {
|
|
3056
2680
|
worktreeResolution = await resolveWorktreeForRequest(worktree, effectiveSessionId, runtime);
|
|
@@ -3058,11 +2682,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
3058
2682
|
catch (err) {
|
|
3059
2683
|
return createErrorResponse("claude_request", 1, "", corrId, err);
|
|
3060
2684
|
}
|
|
3061
|
-
// Idle timeout only for stream-json (text/json produce no output until done)
|
|
3062
2685
|
const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
|
|
3063
2686
|
const claudeSyncFrHandoff = buildAsyncFlightRecorderHandoff("claude", prep, effectiveSessionId, outputFormat);
|
|
3064
2687
|
const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh, runtime, undefined, undefined, claudeSyncFrHandoff.flightRecorderEntry, claudeSyncFrHandoff.extractUsage, prep.stdinPayload, worktreeResolution.cwd);
|
|
3065
|
-
// Deferred — job still running, return async reference
|
|
3066
2688
|
if (isDeferredResponse(result)) {
|
|
3067
2689
|
return buildDeferredToolResponse(result, effectiveSessionId);
|
|
3068
2690
|
}
|
|
@@ -3080,9 +2702,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3080
2702
|
errorMessage: stderr || `Exit code ${code}`,
|
|
3081
2703
|
status: "failed",
|
|
3082
2704
|
}, runtime);
|
|
3083
|
-
// Slice 3: attach any computed warnings to the error response so
|
|
3084
|
-
// the caller still sees cache_ttl_expiring_soon when the CLI
|
|
3085
|
-
// happens to fail for an unrelated reason.
|
|
3086
2705
|
const errResp = createErrorResponse("claude", code, stderr, corrId);
|
|
3087
2706
|
if (warnings.length > 0) {
|
|
3088
2707
|
errResp.warnings = warnings;
|
|
@@ -3090,7 +2709,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3090
2709
|
return errResp;
|
|
3091
2710
|
}
|
|
3092
2711
|
wasSuccessful = true;
|
|
3093
|
-
// If we used a session ID and it's not tracked yet, create a session record
|
|
3094
2712
|
if (effectiveSessionId) {
|
|
3095
2713
|
const existingSession = await sessionManager.getSession(effectiveSessionId);
|
|
3096
2714
|
if (!existingSession) {
|
|
@@ -3098,7 +2716,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3098
2716
|
}
|
|
3099
2717
|
}
|
|
3100
2718
|
logger.info(`[${corrId}] claude_request completed successfully in ${durationMs}ms`);
|
|
3101
|
-
// Parse stream-json NDJSON output to extract result text
|
|
3102
2719
|
if (outputFormat === "stream-json") {
|
|
3103
2720
|
const parsed = parseStreamJson(stdout);
|
|
3104
2721
|
if (parsed.costUsd !== null) {
|
|
@@ -3165,9 +2782,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3165
2782
|
performanceMetrics.recordRequest("claude", finalizedDurationMs, wasSuccessful);
|
|
3166
2783
|
}
|
|
3167
2784
|
});
|
|
3168
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3169
|
-
// Codex Tool
|
|
3170
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3171
2785
|
server.tool("codex_request", {
|
|
3172
2786
|
prompt: z
|
|
3173
2787
|
.string()
|
|
@@ -3232,14 +2846,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
3232
2846
|
.boolean()
|
|
3233
2847
|
.default(false)
|
|
3234
2848
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3235
|
-
// U23: emit `--json` so the codex-json-parser surfaces input/output/cache
|
|
3236
|
-
// tokens (and any cost) through extractUsageAndCost. Without "json", the
|
|
3237
|
-
// parser is unreachable and Codex usage is never reported.
|
|
3238
2849
|
outputFormat: z
|
|
3239
2850
|
.enum(["text", "json"])
|
|
3240
2851
|
.default("text")
|
|
3241
2852
|
.describe("Codex output format. `json` emits --json (JSONL events) so token usage and cost are parsed and reported in the flight recorder. `text` is the default."),
|
|
3242
|
-
// U26: high-impact feature flags. All optional.
|
|
3243
2853
|
outputSchema: z
|
|
3244
2854
|
.union([z.string(), z.record(z.string(), z.unknown())])
|
|
3245
2855
|
.optional()
|
|
@@ -3269,7 +2879,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3269
2879
|
.boolean()
|
|
3270
2880
|
.optional()
|
|
3271
2881
|
.describe("Codex --ignore-rules: skip project rule files for this run."),
|
|
3272
|
-
// Phase 4 slice ζ — Codex working-dir + add-dir parity (new sessions only).
|
|
3273
2882
|
workingDir: z
|
|
3274
2883
|
.string()
|
|
3275
2884
|
.min(1)
|
|
@@ -3327,15 +2936,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3327
2936
|
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
3328
2937
|
}, runtime);
|
|
3329
2938
|
logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prep.effectivePrompt.length}`);
|
|
3330
|
-
// U26 fix: pass the outputSchema cleanup to awaitJobOrDefer, which
|
|
3331
|
-
// guarantees the cleanup runs exactly once — inline for direct
|
|
3332
|
-
// execution, on terminal status for the job-backed path (sync
|
|
3333
|
-
// completion or deferred). The outer finally MUST NOT clean again.
|
|
3334
2939
|
const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
|
|
3335
|
-
// Slice λ: resolve worktree directive into spawn cwd. Codex has no
|
|
3336
|
-
// in-handler session resolution prior to spawn (session lookup is
|
|
3337
|
-
// lazy via `codex exec resume`), so the user-supplied sessionId is
|
|
3338
|
-
// the only reuse key.
|
|
3339
2940
|
let worktreeResolution = {};
|
|
3340
2941
|
try {
|
|
3341
2942
|
worktreeResolution = await resolveWorktreeForRequest(worktree, sessionId, runtime);
|
|
@@ -3346,8 +2947,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3346
2947
|
try {
|
|
3347
2948
|
const codexSyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, sessionId, outputFormat);
|
|
3348
2949
|
const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), outputFormat, forceRefresh, runtime, undefined, prepCleanup, codexSyncFrHandoff.flightRecorderEntry, codexSyncFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
|
|
3349
|
-
// Deferred — job still running, return async reference. Cleanup
|
|
3350
|
-
// ownership belongs to AsyncJobManager via onComplete.
|
|
3351
2950
|
if (isDeferredResponse(result)) {
|
|
3352
2951
|
return buildDeferredToolResponse(result, sessionId);
|
|
3353
2952
|
}
|
|
@@ -3368,7 +2967,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3368
2967
|
return createErrorResponse("codex", code, stderr, corrId);
|
|
3369
2968
|
}
|
|
3370
2969
|
wasSuccessful = true;
|
|
3371
|
-
// Track session usage
|
|
3372
2970
|
let effectiveSessionId = sessionId;
|
|
3373
2971
|
if (!createNewSession && !sessionId) {
|
|
3374
2972
|
const activeSession = await sessionManager.getActiveSession("codex");
|
|
@@ -3430,12 +3028,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
3430
3028
|
finally {
|
|
3431
3029
|
const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
|
|
3432
3030
|
performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
|
|
3433
|
-
// Cleanup is owned by awaitJobOrDefer's contract; nothing to do here.
|
|
3434
3031
|
}
|
|
3435
3032
|
});
|
|
3436
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3437
|
-
// U26: codex_fork_session — `codex fork <SESSION_ID|--last> <prompt>`
|
|
3438
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3439
3033
|
server.tool("codex_fork_session", {
|
|
3440
3034
|
prompt: z
|
|
3441
3035
|
.string()
|
|
@@ -3472,8 +3066,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3472
3066
|
const startTime = Date.now();
|
|
3473
3067
|
let durationMs = 0;
|
|
3474
3068
|
let wasSuccessful = false;
|
|
3475
|
-
// Enforce mutual exclusion at tool boundary (Zod records the params but
|
|
3476
|
-
// the SDK's `.tool(...)` does not accept top-level refines).
|
|
3477
3069
|
if (sessionId && forkLast) {
|
|
3478
3070
|
return createErrorResponse("codex_fork_session", 1, "", corrId, new Error("sessionId and forkLast are mutually exclusive"));
|
|
3479
3071
|
}
|
|
@@ -3489,11 +3081,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3489
3081
|
}
|
|
3490
3082
|
const cliInfo = getCliInfo();
|
|
3491
3083
|
const resolvedModel = resolveModelAlias("codex", model, cliInfo);
|
|
3492
|
-
// Compose argv: forkArgs already starts with `fork`. Inject model and
|
|
3493
|
-
// sandbox/approval flags BEFORE the positional <sessionId|--last> +
|
|
3494
|
-
// prompt to keep them as flags rather than positionals. forkArgs layout
|
|
3495
|
-
// is either ["fork", "--last", prompt] or ["fork", sessionId, prompt];
|
|
3496
|
-
// we splice flags right after "fork".
|
|
3497
3084
|
const flagSegment = [];
|
|
3498
3085
|
if (resolvedModel)
|
|
3499
3086
|
flagSegment.push("--model", resolvedModel);
|
|
@@ -3530,9 +3117,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3530
3117
|
performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
|
|
3531
3118
|
}
|
|
3532
3119
|
});
|
|
3533
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3534
|
-
// Gemini Tool
|
|
3535
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3536
3120
|
server.tool("gemini_request", {
|
|
3537
3121
|
prompt: z
|
|
3538
3122
|
.string()
|
|
@@ -3583,11 +3167,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3583
3167
|
.boolean()
|
|
3584
3168
|
.default(false)
|
|
3585
3169
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3586
|
-
// U23: emit `-o json` to extract token usage via parseGeminiJson. Default
|
|
3587
|
-
// remains text so existing callers see no behavior change. Phase 4 slice
|
|
3588
|
-
// ε adds `stream-json` (NDJSON event stream parsed by
|
|
3589
|
-
// parseGeminiStreamJson — `init`/`message`/`result` lines, idle-timeout
|
|
3590
|
-
// semantics covered by Gemini's existing real-time stdout streaming).
|
|
3591
3170
|
outputFormat: z
|
|
3592
3171
|
.enum(["text", "json", "stream-json"])
|
|
3593
3172
|
.default("text")
|
|
@@ -3600,8 +3179,12 @@ export function createGatewayServer(deps = {}) {
|
|
|
3600
3179
|
.boolean()
|
|
3601
3180
|
.default(false)
|
|
3602
3181
|
.describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
|
|
3182
|
+
yolo: z
|
|
3183
|
+
.boolean()
|
|
3184
|
+
.optional()
|
|
3185
|
+
.describe("Emit `--yolo` to auto-approve all actions. Equivalent to approvalMode 'yolo'; routed through the same approval gate. Under mcp_managed the gate still decides."),
|
|
3603
3186
|
worktree: WORKTREE_SCHEMA.optional(),
|
|
3604
|
-
}, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, worktree, }) => {
|
|
3187
|
+
}, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, yolo, worktree, }) => {
|
|
3605
3188
|
return handleGeminiRequest({ sessionManager, logger, runtime }, {
|
|
3606
3189
|
prompt,
|
|
3607
3190
|
promptParts,
|
|
@@ -3626,12 +3209,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
3626
3209
|
adminPolicyFiles,
|
|
3627
3210
|
attachments,
|
|
3628
3211
|
skipTrust,
|
|
3212
|
+
yolo,
|
|
3629
3213
|
worktree,
|
|
3630
3214
|
});
|
|
3631
3215
|
});
|
|
3632
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3633
|
-
// Grok Tool
|
|
3634
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3635
3216
|
server.tool("grok_request", {
|
|
3636
3217
|
prompt: z
|
|
3637
3218
|
.string()
|
|
@@ -3702,13 +3283,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
3702
3283
|
.default(false)
|
|
3703
3284
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3704
3285
|
maxTurns: MAX_TURNS_SCHEMA.optional().describe("Grok `--max-turns N`: cap on agent-loop iterations for cost / latency control (Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
|
|
3705
|
-
// Phase 4 slice ζ — Grok working-directory parity.
|
|
3706
3286
|
workingDir: z
|
|
3707
3287
|
.string()
|
|
3708
3288
|
.min(1)
|
|
3709
3289
|
.optional()
|
|
3710
3290
|
.describe("Grok --cwd <DIR>: working directory for this invocation. Lets headless callers run Grok against a directory other than the gateway process's cwd."),
|
|
3711
|
-
// Phase 4 slice θ — Grok HIGH parity (sandbox, rules, system-prompt-override, allow, deny).
|
|
3712
3291
|
sandbox: z
|
|
3713
3292
|
.string()
|
|
3714
3293
|
.min(1)
|
|
@@ -3732,8 +3311,16 @@ export function createGatewayServer(deps = {}) {
|
|
|
3732
3311
|
.array(z.string())
|
|
3733
3312
|
.optional()
|
|
3734
3313
|
.describe('Grok --deny <RULE>: permission deny rules. Each entry is emitted as its own --deny instance (per `grok --help`: "Repeat to add multiple rules").'),
|
|
3314
|
+
compactionMode: z
|
|
3315
|
+
.enum(["summary", "transcript", "segments"])
|
|
3316
|
+
.optional()
|
|
3317
|
+
.describe("Grok --compaction-mode: summary (default; no pointer) | transcript (points at the raw transcript) | segments (persists per-segment markdown to grep). Sets GROK_COMPACTION_MODE."),
|
|
3318
|
+
compactionDetail: z
|
|
3319
|
+
.enum(["none", "minimal", "balanced", "verbose"])
|
|
3320
|
+
.optional()
|
|
3321
|
+
.describe("Grok --compaction-detail: verbatim segment detail (none|minimal|balanced|verbose, default verbose). Only affects `--compaction-mode segments`. Sets GROK_COMPACTION_DETAIL."),
|
|
3735
3322
|
worktree: WORKTREE_SCHEMA.optional(),
|
|
3736
|
-
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, worktree, }) => {
|
|
3323
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, compactionMode, compactionDetail, worktree, }) => {
|
|
3737
3324
|
return handleGrokRequest({ sessionManager, logger, runtime }, {
|
|
3738
3325
|
prompt,
|
|
3739
3326
|
promptParts,
|
|
@@ -3763,12 +3350,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
3763
3350
|
systemPromptOverride,
|
|
3764
3351
|
allow,
|
|
3765
3352
|
deny,
|
|
3353
|
+
compactionMode,
|
|
3354
|
+
compactionDetail,
|
|
3766
3355
|
worktree,
|
|
3767
3356
|
});
|
|
3768
3357
|
});
|
|
3769
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3770
|
-
// Mistral Vibe Tool
|
|
3771
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3772
3358
|
server.tool("mistral_request", {
|
|
3773
3359
|
prompt: z
|
|
3774
3360
|
.string()
|
|
@@ -3798,11 +3384,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3798
3384
|
.enum(MISTRAL_AGENT_MODES)
|
|
3799
3385
|
.optional()
|
|
3800
3386
|
.describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
|
|
3801
|
-
effort: z
|
|
3802
|
-
.enum(["low", "medium", "high", "xhigh", "max"])
|
|
3803
|
-
.optional()
|
|
3804
|
-
.describe("Vibe effort level"),
|
|
3805
|
-
reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
|
|
3806
3387
|
approvalStrategy: z
|
|
3807
3388
|
.enum(["legacy", "mcp_managed"])
|
|
3808
3389
|
.default("legacy")
|
|
@@ -3844,7 +3425,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3844
3425
|
maxTurns: MAX_TURNS_SCHEMA.optional().describe("Vibe `--max-turns N`: cap the agent-loop iteration count (programmatic mode only, Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
|
|
3845
3426
|
maxPrice: MAX_PRICE_SCHEMA.optional().describe("Vibe `--max-price DOLLARS`: interrupt the session when cumulative cost crosses this cap (programmatic mode only, Phase 4 slice δ). Bounded to finite values ≤ 10000 USD."),
|
|
3846
3427
|
maxTokens: MAX_TOKENS_SCHEMA.optional().describe("Vibe `--max-tokens N`: cap cumulative prompt + completion tokens for the session (programmatic mode only). Bounded to safe integers ≤ 100000000."),
|
|
3847
|
-
// Phase 4 slice ζ — Vibe working-directory + additional-dirs parity.
|
|
3848
3428
|
workingDir: z
|
|
3849
3429
|
.string()
|
|
3850
3430
|
.min(1)
|
|
@@ -3855,7 +3435,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3855
3435
|
.optional()
|
|
3856
3436
|
.describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance (Vibe states this flag may be specified multiple times)."),
|
|
3857
3437
|
worktree: WORKTREE_SCHEMA.optional(),
|
|
3858
|
-
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode,
|
|
3438
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, maxTokens, workingDir, addDir, worktree, }) => {
|
|
3859
3439
|
return handleMistralRequest({ sessionManager, logger, runtime }, {
|
|
3860
3440
|
prompt,
|
|
3861
3441
|
promptParts,
|
|
@@ -3865,8 +3445,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3865
3445
|
resumeLatest,
|
|
3866
3446
|
createNewSession,
|
|
3867
3447
|
permissionMode,
|
|
3868
|
-
effort,
|
|
3869
|
-
reasoningEffort,
|
|
3870
3448
|
approvalStrategy,
|
|
3871
3449
|
approvalPolicy,
|
|
3872
3450
|
mcpServers,
|
|
@@ -3886,16 +3464,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3886
3464
|
worktree,
|
|
3887
3465
|
});
|
|
3888
3466
|
});
|
|
3889
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3890
|
-
// Async Long-Running Job Tools (No Time-Bound LLM Execution)
|
|
3891
|
-
//
|
|
3892
|
-
// STRUCTURAL INVARIANT: these tools are only registered when a real job
|
|
3893
|
-
// store is attached (`persistence.asyncJobsEnabled === true`). When the
|
|
3894
|
-
// operator has configured `[persistence].backend = "none"`, none of the
|
|
3895
|
-
// *_request_async / llm_job_* tools exist in the MCP tool list at all —
|
|
3896
|
-
// orchestrating agents get a clean "tool not found" signal at connect
|
|
3897
|
-
// time instead of silent in-memory loss after the 1-hour TTL.
|
|
3898
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3899
3467
|
if (asyncJobsEnabled) {
|
|
3900
3468
|
server.tool("claude_request_async", {
|
|
3901
3469
|
prompt: z
|
|
@@ -3929,7 +3497,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3929
3497
|
.enum(CLAUDE_PERMISSION_MODES)
|
|
3930
3498
|
.optional()
|
|
3931
3499
|
.describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op."),
|
|
3932
|
-
// U25 — Claude high-impact features
|
|
3933
3500
|
agent: z
|
|
3934
3501
|
.string()
|
|
3935
3502
|
.optional()
|
|
@@ -3969,7 +3536,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3969
3536
|
.boolean()
|
|
3970
3537
|
.optional()
|
|
3971
3538
|
.describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
|
|
3972
|
-
// Phase 4 slice η — Claude reliability + structured-output parity
|
|
3973
3539
|
fallbackModel: z
|
|
3974
3540
|
.string()
|
|
3975
3541
|
.min(1)
|
|
@@ -3979,11 +3545,28 @@ export function createGatewayServer(deps = {}) {
|
|
|
3979
3545
|
.union([z.string(), z.record(z.string(), z.unknown())])
|
|
3980
3546
|
.optional()
|
|
3981
3547
|
.describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
|
|
3982
|
-
// Phase 4 slice ζ — Claude additional-workspace-dirs parity
|
|
3983
3548
|
addDir: z
|
|
3984
3549
|
.array(z.string())
|
|
3985
3550
|
.optional()
|
|
3986
3551
|
.describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
|
|
3552
|
+
noSessionPersistence: z
|
|
3553
|
+
.boolean()
|
|
3554
|
+
.optional()
|
|
3555
|
+
.describe("Claude --no-session-persistence: do not write this session to disk (ephemeral one-shot runs; mirrors codex --ephemeral)."),
|
|
3556
|
+
settingSources: z
|
|
3557
|
+
.string()
|
|
3558
|
+
.min(1)
|
|
3559
|
+
.optional()
|
|
3560
|
+
.describe("Claude --setting-sources: comma-separated setting sources to load (user|project|local) for reproducible/isolated headless runs."),
|
|
3561
|
+
settings: z
|
|
3562
|
+
.string()
|
|
3563
|
+
.min(1)
|
|
3564
|
+
.optional()
|
|
3565
|
+
.describe("Claude --settings: path to a settings JSON file or a JSON literal of additional settings. Powerful: settings can define hooks/permissions/model; passed verbatim."),
|
|
3566
|
+
tools: z
|
|
3567
|
+
.array(z.string())
|
|
3568
|
+
.optional()
|
|
3569
|
+
.describe('Claude --tools: restrict the available built-in tool set (distinct from allowedTools permission gating). Pass [""] to disable all tools.'),
|
|
3987
3570
|
worktree: WORKTREE_SCHEMA.optional(),
|
|
3988
3571
|
approvalStrategy: z
|
|
3989
3572
|
.enum(["legacy", "mcp_managed"])
|
|
@@ -4014,7 +3597,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
4014
3597
|
.boolean()
|
|
4015
3598
|
.default(false)
|
|
4016
3599
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
4017
|
-
}, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3600
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, noSessionPersistence, settingSources, settings, tools, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
4018
3601
|
if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
|
|
4019
3602
|
return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
|
|
4020
3603
|
}
|
|
@@ -4046,12 +3629,15 @@ export function createGatewayServer(deps = {}) {
|
|
|
4046
3629
|
fallbackModel,
|
|
4047
3630
|
jsonSchema,
|
|
4048
3631
|
addDir,
|
|
3632
|
+
noSessionPersistence,
|
|
3633
|
+
settingSources,
|
|
3634
|
+
settings,
|
|
3635
|
+
tools,
|
|
4049
3636
|
}, runtime);
|
|
4050
3637
|
if (!("args" in prep))
|
|
4051
3638
|
return prep;
|
|
4052
3639
|
const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
|
|
4053
3640
|
try {
|
|
4054
|
-
// Session management (before job start for async)
|
|
4055
3641
|
let effectiveSessionId = sessionId;
|
|
4056
3642
|
let useContinue = continueSession;
|
|
4057
3643
|
const activeSession = await sessionManager.getActiveSession("claude");
|
|
@@ -4075,14 +3661,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
4075
3661
|
await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
|
|
4076
3662
|
}
|
|
4077
3663
|
}
|
|
4078
|
-
// Slice 3: TTL warning on resume (async path too).
|
|
4079
3664
|
const ttlWarning = maybeBuildCacheTtlWarning({
|
|
4080
3665
|
runtime,
|
|
4081
3666
|
sessionId: effectiveSessionId,
|
|
4082
3667
|
cli: "claude",
|
|
4083
3668
|
});
|
|
4084
|
-
// Slice λ: resolve worktree directive after session metadata is
|
|
4085
|
-
// settled so resume reuse can read metadata.worktreePath.
|
|
4086
3669
|
let worktreeResolution = {};
|
|
4087
3670
|
try {
|
|
4088
3671
|
worktreeResolution = await resolveWorktreeForRequest(worktree, effectiveSessionId, runtime);
|
|
@@ -4090,7 +3673,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4090
3673
|
catch (err) {
|
|
4091
3674
|
return createErrorResponse("claude_request_async", 1, "", corrId, err);
|
|
4092
3675
|
}
|
|
4093
|
-
// Idle timeout only for stream-json (text/json produce no output until done)
|
|
4094
3676
|
const effectiveIdleTimeout = outputFormat === "stream-json"
|
|
4095
3677
|
? resolveIdleTimeout("claude", idleTimeoutMs)
|
|
4096
3678
|
: undefined;
|
|
@@ -4116,8 +3698,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4116
3698
|
if (worktreeResolution.worktreePath) {
|
|
4117
3699
|
asyncResponse.worktreePath = worktreeResolution.worktreePath;
|
|
4118
3700
|
}
|
|
4119
|
-
// Rec #4: include any prep-time warnings (e.g.
|
|
4120
|
-
// cacheable_prefix_uncached) alongside ttlWarning.
|
|
4121
3701
|
const mergedWarnings = [
|
|
4122
3702
|
...(ttlWarning ? [ttlWarning] : []),
|
|
4123
3703
|
...(prep.warnings ?? []),
|
|
@@ -4201,12 +3781,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
4201
3781
|
.boolean()
|
|
4202
3782
|
.default(false)
|
|
4203
3783
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
4204
|
-
// U23: emit `--json` to enable JSONL event-stream parsing for token usage.
|
|
4205
3784
|
outputFormat: z
|
|
4206
3785
|
.enum(["text", "json"])
|
|
4207
3786
|
.default("text")
|
|
4208
3787
|
.describe("Codex output format. `json` emits --json (JSONL events) for token usage extraction."),
|
|
4209
|
-
// U26: high-impact feature flags. All optional.
|
|
4210
3788
|
outputSchema: z
|
|
4211
3789
|
.union([z.string(), z.record(z.string(), z.unknown())])
|
|
4212
3790
|
.optional()
|
|
@@ -4221,7 +3799,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4221
3799
|
images: z.array(z.string()).optional().describe("Codex -i <path>: image attachments."),
|
|
4222
3800
|
ignoreUserConfig: z.boolean().optional().describe("Codex --ignore-user-config."),
|
|
4223
3801
|
ignoreRules: z.boolean().optional().describe("Codex --ignore-rules."),
|
|
4224
|
-
// Phase 4 slice ζ — Codex working-dir + add-dir parity (new sessions only).
|
|
4225
3802
|
workingDir: z
|
|
4226
3803
|
.string()
|
|
4227
3804
|
.min(1)
|
|
@@ -4318,11 +3895,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4318
3895
|
.boolean()
|
|
4319
3896
|
.default(false)
|
|
4320
3897
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
4321
|
-
// U23: emit `-o json` to extract token usage via parseGeminiJson. Default
|
|
4322
|
-
// remains text so existing callers see no behavior change. Phase 4 slice
|
|
4323
|
-
// ε adds `stream-json` (NDJSON event stream parsed by
|
|
4324
|
-
// parseGeminiStreamJson — `init`/`message`/`result` lines, idle-timeout
|
|
4325
|
-
// semantics covered by Gemini's existing real-time stdout streaming).
|
|
4326
3898
|
outputFormat: z
|
|
4327
3899
|
.enum(["text", "json", "stream-json"])
|
|
4328
3900
|
.default("text")
|
|
@@ -4335,8 +3907,12 @@ export function createGatewayServer(deps = {}) {
|
|
|
4335
3907
|
.boolean()
|
|
4336
3908
|
.default(false)
|
|
4337
3909
|
.describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
|
|
3910
|
+
yolo: z
|
|
3911
|
+
.boolean()
|
|
3912
|
+
.optional()
|
|
3913
|
+
.describe("Emit `--yolo` to auto-approve all actions. Equivalent to approvalMode 'yolo'; routed through the same approval gate. Under mcp_managed the gate still decides."),
|
|
4338
3914
|
worktree: WORKTREE_SCHEMA.optional(),
|
|
4339
|
-
}, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, worktree, }) => {
|
|
3915
|
+
}, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, yolo, worktree, }) => {
|
|
4340
3916
|
return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
4341
3917
|
prompt,
|
|
4342
3918
|
promptParts,
|
|
@@ -4360,6 +3936,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
4360
3936
|
adminPolicyFiles,
|
|
4361
3937
|
attachments,
|
|
4362
3938
|
skipTrust,
|
|
3939
|
+
yolo,
|
|
4363
3940
|
worktree,
|
|
4364
3941
|
});
|
|
4365
3942
|
});
|
|
@@ -4432,13 +4009,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
4432
4009
|
.default(false)
|
|
4433
4010
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
4434
4011
|
maxTurns: MAX_TURNS_SCHEMA.optional().describe("Grok `--max-turns N`: cap on agent-loop iterations for cost / latency control (Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
|
|
4435
|
-
// Phase 4 slice ζ — Grok working-directory parity.
|
|
4436
4012
|
workingDir: z
|
|
4437
4013
|
.string()
|
|
4438
4014
|
.min(1)
|
|
4439
4015
|
.optional()
|
|
4440
4016
|
.describe("Grok --cwd <DIR>: working directory for this invocation. Lets headless callers run Grok against a directory other than the gateway process's cwd."),
|
|
4441
|
-
// Phase 4 slice θ — Grok HIGH parity (sandbox, rules, system-prompt-override, allow, deny).
|
|
4442
4017
|
sandbox: z
|
|
4443
4018
|
.string()
|
|
4444
4019
|
.min(1)
|
|
@@ -4462,8 +4037,16 @@ export function createGatewayServer(deps = {}) {
|
|
|
4462
4037
|
.array(z.string())
|
|
4463
4038
|
.optional()
|
|
4464
4039
|
.describe("Grok --deny <RULE>: permission deny rules. Each entry → its own --deny instance."),
|
|
4040
|
+
compactionMode: z
|
|
4041
|
+
.enum(["summary", "transcript", "segments"])
|
|
4042
|
+
.optional()
|
|
4043
|
+
.describe("Grok --compaction-mode: summary (default) | transcript | segments. Sets GROK_COMPACTION_MODE."),
|
|
4044
|
+
compactionDetail: z
|
|
4045
|
+
.enum(["none", "minimal", "balanced", "verbose"])
|
|
4046
|
+
.optional()
|
|
4047
|
+
.describe("Grok --compaction-detail: segment verbatim detail (none|minimal|balanced|verbose, default verbose). Only affects segments mode. Sets GROK_COMPACTION_DETAIL."),
|
|
4465
4048
|
worktree: WORKTREE_SCHEMA.optional(),
|
|
4466
|
-
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, worktree, }) => {
|
|
4049
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, compactionMode, compactionDetail, worktree, }) => {
|
|
4467
4050
|
return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
4468
4051
|
prompt,
|
|
4469
4052
|
promptParts,
|
|
@@ -4492,6 +4075,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
4492
4075
|
systemPromptOverride,
|
|
4493
4076
|
allow,
|
|
4494
4077
|
deny,
|
|
4078
|
+
compactionMode,
|
|
4079
|
+
compactionDetail,
|
|
4495
4080
|
worktree,
|
|
4496
4081
|
});
|
|
4497
4082
|
});
|
|
@@ -4524,11 +4109,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4524
4109
|
.enum(MISTRAL_AGENT_MODES)
|
|
4525
4110
|
.optional()
|
|
4526
4111
|
.describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
|
|
4527
|
-
effort: z
|
|
4528
|
-
.enum(["low", "medium", "high", "xhigh", "max"])
|
|
4529
|
-
.optional()
|
|
4530
|
-
.describe("Vibe effort level"),
|
|
4531
|
-
reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
|
|
4532
4112
|
approvalStrategy: z
|
|
4533
4113
|
.enum(["legacy", "mcp_managed"])
|
|
4534
4114
|
.default("legacy")
|
|
@@ -4569,7 +4149,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4569
4149
|
maxTurns: MAX_TURNS_SCHEMA.optional().describe("Vibe `--max-turns N`: cap the agent-loop iteration count (programmatic mode only, Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
|
|
4570
4150
|
maxPrice: MAX_PRICE_SCHEMA.optional().describe("Vibe `--max-price DOLLARS`: interrupt the session when cumulative cost crosses this cap (programmatic mode only, Phase 4 slice δ). Bounded to finite values ≤ 10000 USD."),
|
|
4571
4151
|
maxTokens: MAX_TOKENS_SCHEMA.optional().describe("Vibe `--max-tokens N`: cap cumulative prompt + completion tokens for the session (programmatic mode only). Bounded to safe integers ≤ 100000000."),
|
|
4572
|
-
// Phase 4 slice ζ — Vibe working-directory + additional-dirs parity.
|
|
4573
4152
|
workingDir: z
|
|
4574
4153
|
.string()
|
|
4575
4154
|
.min(1)
|
|
@@ -4580,7 +4159,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
4580
4159
|
.optional()
|
|
4581
4160
|
.describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance."),
|
|
4582
4161
|
worktree: WORKTREE_SCHEMA.optional(),
|
|
4583
|
-
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode,
|
|
4162
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, maxTokens, workingDir, addDir, worktree, }) => {
|
|
4584
4163
|
return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
4585
4164
|
prompt,
|
|
4586
4165
|
promptParts,
|
|
@@ -4590,8 +4169,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4590
4169
|
resumeLatest,
|
|
4591
4170
|
createNewSession,
|
|
4592
4171
|
permissionMode,
|
|
4593
|
-
effort,
|
|
4594
|
-
reasoningEffort,
|
|
4595
4172
|
approvalStrategy,
|
|
4596
4173
|
approvalPolicy,
|
|
4597
4174
|
mcpServers,
|
|
@@ -4667,7 +4244,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4667
4244
|
isError: true,
|
|
4668
4245
|
};
|
|
4669
4246
|
}
|
|
4670
|
-
// Parse stream-json output for Claude async jobs
|
|
4671
4247
|
const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
|
|
4672
4248
|
let parsed;
|
|
4673
4249
|
if (outputFormat === "stream-json" && result.stdout) {
|
|
@@ -4727,14 +4303,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
4727
4303
|
],
|
|
4728
4304
|
};
|
|
4729
4305
|
});
|
|
4730
|
-
}
|
|
4731
|
-
// Read back any persisted request (sync OR async) by its correlation id.
|
|
4732
|
-
// Registered unconditionally — it reads the flight recorder, which is
|
|
4733
|
-
// independent of async-job persistence. Every sync/async response echoes
|
|
4734
|
-
// its id in `structuredContent.correlationId`; pass that id here to recover
|
|
4735
|
-
// the persisted prompt/response after the inline result is gone. With flight
|
|
4736
|
-
// recording disabled (LLM_GATEWAY_LOGS_DB=none → NoopFlightRecorder) the
|
|
4737
|
-
// query yields no rows and this returns the "not found" shape.
|
|
4306
|
+
}
|
|
4738
4307
|
server.tool("llm_request_result", {
|
|
4739
4308
|
correlationId: z
|
|
4740
4309
|
.string()
|
|
@@ -4805,9 +4374,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4805
4374
|
],
|
|
4806
4375
|
};
|
|
4807
4376
|
});
|
|
4808
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
4809
|
-
// Approval Audit Tools
|
|
4810
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
4811
4377
|
server.tool("approval_list", {
|
|
4812
4378
|
limit: z
|
|
4813
4379
|
.number()
|
|
@@ -4835,9 +4401,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4835
4401
|
],
|
|
4836
4402
|
};
|
|
4837
4403
|
});
|
|
4838
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
4839
|
-
// List Models Tool
|
|
4840
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
4841
4404
|
server.tool("list_models", {
|
|
4842
4405
|
cli: z
|
|
4843
4406
|
.preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
|
|
@@ -4916,9 +4479,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4916
4479
|
};
|
|
4917
4480
|
}
|
|
4918
4481
|
});
|
|
4919
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
4920
|
-
// Session Management Tools
|
|
4921
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
4922
4482
|
server.tool("session_create", {
|
|
4923
4483
|
cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
|
|
4924
4484
|
description: z.string().optional().describe("Session description"),
|
|
@@ -5094,15 +4654,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
5094
4654
|
};
|
|
5095
4655
|
}
|
|
5096
4656
|
const activeSession = await sessionManager.getActiveSession(session.cli);
|
|
5097
|
-
// Slice 2: project a compact cacheState view from the flight
|
|
5098
|
-
// recorder at read time. NOT persisted on the Session interface
|
|
5099
|
-
// (sessions.json stays content-free per the project invariant).
|
|
5100
|
-
// The field is OMITTED entirely (not null, not empty object) when
|
|
5101
|
-
// the session has zero rows in the flight recorder so the response
|
|
5102
|
-
// stays compact for fresh sessions.
|
|
5103
|
-
//
|
|
5104
|
-
// Slice 3: include ttlRemainingMs derived from the gateway's
|
|
5105
|
-
// configured TTL policy. Null for non-claude sessions.
|
|
5106
4657
|
let cacheState;
|
|
5107
4658
|
try {
|
|
5108
4659
|
const stats = computeSessionCacheStats(flightRecorder, session.id);
|
|
@@ -5171,16 +4722,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
5171
4722
|
});
|
|
5172
4723
|
return server;
|
|
5173
4724
|
}
|
|
5174
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
5175
|
-
// Async Initialization
|
|
5176
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
5177
4725
|
async function initializeSessionManager() {
|
|
5178
4726
|
const config = loadConfig();
|
|
5179
|
-
// Slice λ: file-backed sessions get a cleanup hook that tears down any
|
|
5180
|
-
// git worktrees recorded on session.metadata.worktreePath. PG-backed
|
|
5181
|
-
// sessions skip the hook (multi-tenant deployments don't necessarily
|
|
5182
|
-
// own a single filesystem); revisit if/when worktree support extends
|
|
5183
|
-
// there.
|
|
5184
4727
|
const worktreeCleanupHook = createWorktreeSessionCleanupHook(logger);
|
|
5185
4728
|
if (config.database) {
|
|
5186
4729
|
logger.info("Initializing PostgreSQL session manager");
|
|
@@ -5198,9 +4741,6 @@ async function initializeSessionManager() {
|
|
|
5198
4741
|
}
|
|
5199
4742
|
resourceProvider = new ResourceProvider(sessionManager, performanceMetrics, getFlightRecorder(logger), getCacheAwarenessConfig(logger));
|
|
5200
4743
|
}
|
|
5201
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
5202
|
-
// Health Check Resource (only if using PostgreSQL)
|
|
5203
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
5204
4744
|
function registerHealthResource(server) {
|
|
5205
4745
|
if (db) {
|
|
5206
4746
|
server.registerResource("health", "health://status", {
|
|
@@ -5221,7 +4761,6 @@ function registerHealthResource(server) {
|
|
|
5221
4761
|
});
|
|
5222
4762
|
logger.info("Health check resource registered");
|
|
5223
4763
|
}
|
|
5224
|
-
// Process health resource (always available, not dependent on DB)
|
|
5225
4764
|
server.registerResource("process-health", "metrics://process-health", {
|
|
5226
4765
|
title: "Process Health",
|
|
5227
4766
|
description: "Async job health (CPU, memory, zombie detection)",
|
|
@@ -5240,13 +4779,9 @@ function registerHealthResource(server) {
|
|
|
5240
4779
|
});
|
|
5241
4780
|
logger.info("Process health resource registered");
|
|
5242
4781
|
}
|
|
5243
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
5244
|
-
// Graceful Shutdown
|
|
5245
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
5246
4782
|
async function shutdown(signal) {
|
|
5247
4783
|
logger.info(`Received ${signal}, shutting down gracefully...`);
|
|
5248
4784
|
try {
|
|
5249
|
-
// Kill all active process groups (SIGTERM → wait 3s → SIGKILL)
|
|
5250
4785
|
await killAllProcessGroups();
|
|
5251
4786
|
logger.info("All process groups terminated");
|
|
5252
4787
|
if (activeHttpGateway) {
|
|
@@ -5276,9 +4811,6 @@ async function shutdown(signal) {
|
|
|
5276
4811
|
}
|
|
5277
4812
|
process.on("SIGTERM", () => shutdown("SIGTERM"));
|
|
5278
4813
|
process.on("SIGINT", () => shutdown("SIGINT"));
|
|
5279
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
5280
|
-
// Server Startup
|
|
5281
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
5282
4814
|
async function main() {
|
|
5283
4815
|
startWindowsBootstrapperSelfHeal();
|
|
5284
4816
|
const args = process.argv.slice(2);
|
|
@@ -5342,7 +4874,6 @@ async function main() {
|
|
|
5342
4874
|
process.env.MCP_TRANSPORT ||
|
|
5343
4875
|
"stdio";
|
|
5344
4876
|
logger.info(`Starting llm-cli-gateway MCP server with ${transportMode} transport`);
|
|
5345
|
-
// Initialize session manager first
|
|
5346
4877
|
await initializeSessionManager();
|
|
5347
4878
|
const serverDeps = {
|
|
5348
4879
|
sessionManager,
|
|
@@ -5369,14 +4900,11 @@ async function main() {
|
|
|
5369
4900
|
activeServer = createGatewayServer({
|
|
5370
4901
|
...serverDeps,
|
|
5371
4902
|
});
|
|
5372
|
-
// Register health check resource if using PostgreSQL
|
|
5373
4903
|
registerHealthResource(activeServer);
|
|
5374
4904
|
const transport = new StdioServerTransport();
|
|
5375
4905
|
await activeServer.connect(transport);
|
|
5376
4906
|
logger.info("llm-cli-gateway MCP server connected and ready");
|
|
5377
4907
|
}
|
|
5378
|
-
// Guard: only auto-start when run directly (not imported for testing)
|
|
5379
|
-
// Resolve symlinks so `llm-cli-gateway` (npm-linked bin) matches import.meta.url
|
|
5380
4908
|
const __entryUrl = entrypointFileURL(process.argv[1]);
|
|
5381
4909
|
if (__entryUrl === import.meta.url) {
|
|
5382
4910
|
main().catch(error => {
|