llm-cli-gateway 1.17.4 → 1.17.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +1 -1
- package/dist/approval-manager.js +0 -8
- package/dist/async-job-manager.d.ts +0 -113
- package/dist/async-job-manager.js +6 -124
- package/dist/cache-stats.d.ts +0 -89
- package/dist/cache-stats.js +0 -62
- package/dist/claude-mcp-config.js +0 -1
- package/dist/cli-updater.d.ts +0 -8
- package/dist/cli-updater.js +0 -12
- package/dist/codex-json-parser.d.ts +0 -20
- package/dist/codex-json-parser.js +0 -21
- package/dist/config.d.ts +0 -31
- package/dist/config.js +2 -72
- package/dist/db.d.ts +0 -18
- package/dist/db.js +0 -22
- package/dist/doctor.d.ts +0 -49
- package/dist/doctor.js +0 -47
- package/dist/endpoint-exposure.js +0 -1
- package/dist/executor.d.ts +0 -19
- package/dist/executor.js +3 -38
- package/dist/flight-recorder.d.ts +0 -26
- package/dist/flight-recorder.js +1 -70
- package/dist/gemini-json-parser.d.ts +0 -25
- package/dist/gemini-json-parser.js +0 -28
- package/dist/health.d.ts +0 -3
- package/dist/health.js +0 -3
- package/dist/index.d.ts +1 -221
- package/dist/index.js +14 -563
- package/dist/job-store.d.ts +0 -74
- package/dist/job-store.js +1 -73
- package/dist/logger.d.ts +0 -7
- package/dist/logger.js +0 -6
- package/dist/migrate-sessions.d.ts +0 -3
- package/dist/migrate-sessions.js +0 -16
- package/dist/migrate.js +1 -18
- package/dist/mistral-meta-json-parser.js +0 -67
- package/dist/model-registry.js +0 -13
- package/dist/pricing.d.ts +0 -46
- package/dist/pricing.js +0 -47
- package/dist/process-monitor.d.ts +0 -15
- package/dist/process-monitor.js +2 -31
- package/dist/prompt-parts.d.ts +0 -25
- package/dist/prompt-parts.js +0 -11
- package/dist/provider-status.d.ts +0 -8
- package/dist/provider-status.js +0 -11
- package/dist/request-helpers.d.ts +0 -334
- package/dist/request-helpers.js +1 -229
- package/dist/resources.d.ts +0 -20
- package/dist/resources.js +1 -34
- package/dist/retry.d.ts +0 -45
- package/dist/retry.js +3 -40
- package/dist/session-manager-pg.d.ts +0 -32
- package/dist/session-manager-pg.js +0 -32
- package/dist/session-manager.d.ts +0 -21
- package/dist/session-manager.js +1 -15
- package/dist/stream-json-parser.d.ts +0 -18
- package/dist/stream-json-parser.js +0 -22
- package/dist/upstream-contracts.d.ts +0 -55
- package/dist/upstream-contracts.js +0 -77
- package/dist/validation-orchestrator.js +0 -3
- package/dist/worktree-manager.d.ts +0 -9
- package/dist/worktree-manager.js +0 -21
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -35,7 +35,6 @@ import { printDoctorJson } from "./doctor.js";
|
|
|
35
35
|
import { registerValidationTools } from "./validation-tools.js";
|
|
36
36
|
import { assertUpstreamCliArgs, assertUpstreamCliEnv, buildUpstreamContractReport, } from "./upstream-contracts.js";
|
|
37
37
|
import { entrypointFileURL } from "./entrypoint-url.js";
|
|
38
|
-
// Simple logger that writes to stderr (stdout is used for MCP protocol)
|
|
39
38
|
const logger = {
|
|
40
39
|
info: (message, ...args) => {
|
|
41
40
|
console.error(`[INFO] ${new Date().toISOString()} - ${message}`, ...args);
|
|
@@ -94,10 +93,6 @@ function logOptimizationTokens(kind, correlationId, original, optimized) {
|
|
|
94
93
|
const reduction = originalTokens === 0 ? 0 : ((originalTokens - optimizedTokens) / originalTokens) * 100;
|
|
95
94
|
logger.info(`[${correlationId}] ${kind} tokens ${originalTokens} → ${optimizedTokens} (${reduction.toFixed(1)}% reduction)`);
|
|
96
95
|
}
|
|
97
|
-
// Sync-to-async deadline: if a sync tool's CLI call hasn't finished within this
|
|
98
|
-
// window, the tool returns a deferred async job reference instead of blocking
|
|
99
|
-
// until the MCP client's tool-call timeout fires (~60s in many runtimes).
|
|
100
|
-
// Configurable via SYNC_DEADLINE_MS env var. Set to 0 to disable (pure sync).
|
|
101
96
|
const SYNC_DEADLINE_MS = (() => {
|
|
102
97
|
const env = process.env.SYNC_DEADLINE_MS;
|
|
103
98
|
if (env !== undefined) {
|
|
@@ -105,11 +100,8 @@ const SYNC_DEADLINE_MS = (() => {
|
|
|
105
100
|
if (Number.isFinite(parsed) && parsed >= 0)
|
|
106
101
|
return parsed;
|
|
107
102
|
}
|
|
108
|
-
return 45_000;
|
|
103
|
+
return 45_000;
|
|
109
104
|
})();
|
|
110
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
111
|
-
// Skills loader — reads .agents/skills/*/SKILL.md at startup
|
|
112
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
113
105
|
const __filename = fileURLToPath(import.meta.url);
|
|
114
106
|
const __dirname = dirname(__filename);
|
|
115
107
|
const SKILLS_DIR = join(__dirname, "..", ".agents", "skills");
|
|
@@ -124,7 +116,6 @@ function packageVersion() {
|
|
|
124
116
|
return parsed.version || "unknown";
|
|
125
117
|
}
|
|
126
118
|
catch {
|
|
127
|
-
// Try next candidate.
|
|
128
119
|
}
|
|
129
120
|
}
|
|
130
121
|
return "unknown";
|
|
@@ -137,24 +128,19 @@ function loadSkills() {
|
|
|
137
128
|
const skillPath = join(SKILLS_DIR, dir.name, "SKILL.md");
|
|
138
129
|
try {
|
|
139
130
|
const content = readFileSync(skillPath, "utf-8");
|
|
140
|
-
// Extract description from YAML frontmatter
|
|
141
131
|
const descMatch = content.match(/^---[\s\S]*?description:\s*(.+?)$/m);
|
|
142
132
|
const description = descMatch?.[1]?.trim() || dir.name;
|
|
143
133
|
skills.push({ name: dir.name, content, description });
|
|
144
134
|
}
|
|
145
135
|
catch {
|
|
146
|
-
// Skill file missing or unreadable — skip silently
|
|
147
136
|
}
|
|
148
137
|
}
|
|
149
138
|
}
|
|
150
139
|
catch {
|
|
151
|
-
// Skills directory missing — not fatal
|
|
152
140
|
}
|
|
153
141
|
return skills;
|
|
154
142
|
}
|
|
155
143
|
const loadedSkills = loadSkills();
|
|
156
|
-
// L1: Compact server instructions (~200 tokens) — injected into every client's
|
|
157
|
-
// system prompt at connection time. Covers key patterns + pointers to L2 resources.
|
|
158
144
|
const SERVER_INSTRUCTIONS = `llm-cli-gateway: Multi-LLM orchestration via MCP.
|
|
159
145
|
|
|
160
146
|
Tools: claude_request, codex_request, gemini_request, grok_request, mistral_request (sync) | *_request_async (async)
|
|
@@ -175,17 +161,11 @@ ${loadedSkills.map(s => `- skills://${s.name} — ${s.description}`).join("\n")}
|
|
|
175
161
|
function newGatewayMcpServer() {
|
|
176
162
|
return new McpServer({ name: "llm-cli-gateway", version: "1.0.0" }, { instructions: SERVER_INSTRUCTIONS });
|
|
177
163
|
}
|
|
178
|
-
// Global state (initialized asynchronously)
|
|
179
164
|
let sessionManager;
|
|
180
165
|
let db = null;
|
|
181
166
|
const performanceMetrics = new PerformanceMetrics();
|
|
182
167
|
let resourceProvider;
|
|
183
168
|
let flightRecorder = null;
|
|
184
|
-
// Resolved persistence config — single source of truth for the async-job backend.
|
|
185
|
-
// Driven by ~/.llm-cli-gateway/config.toml (+ deprecated env-var overrides).
|
|
186
|
-
// When backend = "none", the JobStore is null AND *_request_async tools are not
|
|
187
|
-
// registered (see createGatewayServer), making silent in-memory loss
|
|
188
|
-
// structurally impossible.
|
|
189
169
|
let persistenceConfig = null;
|
|
190
170
|
let cacheAwarenessConfig = null;
|
|
191
171
|
let jobStore = null;
|
|
@@ -231,47 +211,9 @@ function getApprovalManager(runtimeLogger = logger) {
|
|
|
231
211
|
return approvalManager;
|
|
232
212
|
}
|
|
233
213
|
const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
|
|
234
|
-
/**
|
|
235
|
-
* Phase 4 slice δ — shared Zod fragments for `maxTurns` / `maxPrice`.
|
|
236
|
-
*
|
|
237
|
-
* Both flags reach the upstream CLIs as decimal-formatted argv strings via
|
|
238
|
-
* `String(N)`. `z.number().int().positive()` alone lets values past
|
|
239
|
-
* `Number.MAX_SAFE_INTEGER` through, after which `String(1e21)` emits
|
|
240
|
-
* scientific notation that Grok and Vibe both reject. The bounds below
|
|
241
|
-
* (safe-integer cap + 10000 ceiling for turns; finite + 10000 USD ceiling
|
|
242
|
-
* for price) guarantee a lossless decimal stringification AND a sane
|
|
243
|
-
* upper bound — no plausible single agent loop exceeds 10k turns or 10k USD.
|
|
244
|
-
*/
|
|
245
214
|
export const MAX_TURNS_SCHEMA = z.number().int().positive().safe().max(10_000);
|
|
246
|
-
// Token budgets can legitimately exceed the agent-turn cap by orders of
|
|
247
|
-
// magnitude. Keep a finite operational guardrail while avoiding the 10k turn
|
|
248
|
-
// ceiling that would make large-context Vibe sessions unusable.
|
|
249
215
|
export const MAX_TOKENS_SCHEMA = z.number().int().positive().safe().max(100_000_000);
|
|
250
|
-
// `.min(1e-6)` keeps the value in JS's decimal-stringify range:
|
|
251
|
-
// String(1e-6) === "0.000001" but String(1e-7) === "1e-7", which both
|
|
252
|
-
// upstream CLIs would reject. 1µUSD per request is fine-grained enough
|
|
253
|
-
// for any plausible budget-cap use.
|
|
254
216
|
export const MAX_PRICE_SCHEMA = z.number().positive().finite().min(1e-6).max(10_000);
|
|
255
|
-
/**
|
|
256
|
-
* Slice λ: shared worktree directive for all 10 `*_request` / `*_request_async`
|
|
257
|
-
* tools. `true` creates a fresh worktree under `<repoRoot>/.worktrees/<uuid>`
|
|
258
|
-
* branched from HEAD. `{ name?, ref? }` lets the caller supply a sanitized
|
|
259
|
-
* name and/or git ref (default ref: HEAD).
|
|
260
|
-
*
|
|
261
|
-
* Lifecycle is gateway-owned: the gateway pre-creates the worktree via
|
|
262
|
-
* `git worktree add`, then spawns the child CLI with `cwd: <worktree-path>`.
|
|
263
|
-
* No `-w` / `--worktree` flag is ever emitted to the underlying CLI. When
|
|
264
|
-
* the request carries a sessionId and the session already has a worktree,
|
|
265
|
-
* that worktree is reused. On session_delete or TTL eviction the gateway
|
|
266
|
-
* runs `git worktree remove --force`.
|
|
267
|
-
*
|
|
268
|
-
* Tool response: when a worktree was used, the successful response stdout
|
|
269
|
-
* is prefixed with `[gateway] worktree=<absolute-path>\n` so callers can
|
|
270
|
-
* parse/use the path without a schema change (slice λ §1.d).
|
|
271
|
-
*
|
|
272
|
-
* NOTE: callers should `.gitignore` the `.worktrees/` directory in their
|
|
273
|
-
* repo (the gateway does NOT auto-gitignore — see slice λ spec Q4).
|
|
274
|
-
*/
|
|
275
217
|
export const WORKTREE_SCHEMA = z
|
|
276
218
|
.union([
|
|
277
219
|
z.boolean(),
|
|
@@ -296,9 +238,6 @@ export const WORKTREE_SCHEMA = z
|
|
|
296
238
|
"path. NOTE: callers should `.gitignore` the `.worktrees/` " +
|
|
297
239
|
"directory in their repo (the gateway does NOT auto-gitignore — " +
|
|
298
240
|
"see slice λ spec Q4).");
|
|
299
|
-
// U22: Session-provider enum extended to five providers. The storage layer's
|
|
300
|
-
// CLI_TYPES already includes "mistral"; the MCP-tool layer mirrors that here so
|
|
301
|
-
// session_create / session_list / session_clear_all accept the fifth provider.
|
|
302
241
|
export const SESSION_PROVIDER_VALUES = ["claude", "codex", "gemini", "grok", "mistral"];
|
|
303
242
|
export const SESSION_PROVIDER_ENUM = z.enum(SESSION_PROVIDER_VALUES);
|
|
304
243
|
let activeServer = null;
|
|
@@ -308,13 +247,10 @@ export function resolveGatewayServerRuntime(deps = {}, options = {}) {
|
|
|
308
247
|
const runtimeSessionManager = deps.sessionManager ?? sessionManager;
|
|
309
248
|
const runtimePerformanceMetrics = deps.performanceMetrics ??
|
|
310
249
|
(options.isolateState ? new PerformanceMetrics() : performanceMetrics);
|
|
311
|
-
// Resolve flight recorder BEFORE async manager so isolateState managers
|
|
312
|
-
// can be wired with the same recorder instance the runtime exposes.
|
|
313
250
|
const runtimeFlightRecorder = deps.flightRecorder ?? getFlightRecorder(runtimeLogger);
|
|
314
251
|
const runtimeAsyncJobManager = deps.asyncJobManager ??
|
|
315
252
|
(options.isolateState
|
|
316
|
-
?
|
|
317
|
-
// durable jobs orphaned. Stdio startup injects the process-global manager.
|
|
253
|
+
?
|
|
318
254
|
newAsyncJobManager(runtimePerformanceMetrics, runtimeLogger, null, runtimeFlightRecorder)
|
|
319
255
|
: getAsyncJobManager(runtimeLogger));
|
|
320
256
|
const runtimeApprovalManager = deps.approvalManager ??
|
|
@@ -337,15 +273,12 @@ export function resolveGatewayServerRuntime(deps = {}, options = {}) {
|
|
|
337
273
|
cacheAwareness: deps.cacheAwareness ?? getCacheAwarenessConfig(runtimeLogger),
|
|
338
274
|
};
|
|
339
275
|
}
|
|
340
|
-
// Per-CLI idle timeouts: kill process if no stdout/stderr activity for this duration.
|
|
341
|
-
// Claude idle timeout only applies in stream-json mode (with --include-partial-messages).
|
|
342
|
-
// In text/json mode, Claude produces no output until done, so idle timeout would false-positive.
|
|
343
276
|
const CLI_IDLE_TIMEOUTS = {
|
|
344
|
-
claude: 600_000,
|
|
345
|
-
codex: 600_000,
|
|
346
|
-
gemini: 600_000,
|
|
347
|
-
grok: 600_000,
|
|
348
|
-
mistral: 600_000,
|
|
277
|
+
claude: 600_000,
|
|
278
|
+
codex: 600_000,
|
|
279
|
+
gemini: 600_000,
|
|
280
|
+
grok: 600_000,
|
|
281
|
+
mistral: 600_000,
|
|
349
282
|
};
|
|
350
283
|
function resolveIdleTimeout(cli, override) {
|
|
351
284
|
if (override !== undefined)
|
|
@@ -353,41 +286,7 @@ function resolveIdleTimeout(cli, override) {
|
|
|
353
286
|
return CLI_IDLE_TIMEOUTS[cli];
|
|
354
287
|
}
|
|
355
288
|
const SYNC_POLL_INTERVAL_MS = 1_000;
|
|
356
|
-
|
|
357
|
-
* Start an async job and poll until completion or deadline.
|
|
358
|
-
* Returns the job result if it finishes in time, or a deferral marker.
|
|
359
|
-
*/
|
|
360
|
-
async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete,
|
|
361
|
-
/**
|
|
362
|
-
* Slice 1.5: when the sync handler has already written a logStart row
|
|
363
|
-
* keyed on `corrId`, pass these so the manager can write logComplete
|
|
364
|
-
* (with usage extraction) when the underlying async job terminates —
|
|
365
|
-
* even if the sync handler returned a deferred response.
|
|
366
|
-
* `writeFlightStart` is NEVER true on this path: the sync handler is
|
|
367
|
-
* always the upstream logStart writer.
|
|
368
|
-
*/
|
|
369
|
-
flightRecorderEntry, extractUsage,
|
|
370
|
-
/**
|
|
371
|
-
* Slice κ: optional stdin payload piped to the child CLI. Currently
|
|
372
|
-
* only Claude's `--input-format stream-json` path sets this. Threaded
|
|
373
|
-
* through both the direct-execute fallback (SYNC_DEADLINE_MS===0) and
|
|
374
|
-
* the AsyncJobManager spawn path, and participates in the dedup key.
|
|
375
|
-
*/
|
|
376
|
-
stdin,
|
|
377
|
-
/**
|
|
378
|
-
* Slice λ: optional working directory for the spawned child process,
|
|
379
|
-
* derived from a gateway-owned git worktree. Threaded to both the
|
|
380
|
-
* direct-execute fallback (`executeCli({ cwd })`) and the
|
|
381
|
-
* AsyncJobManager dedup-aware spawn path
|
|
382
|
-
* (`startJobWithDedup({ cwd })`). `cwd` also participates in the
|
|
383
|
-
* dedup key (see async-job-manager.buildRequestKey) so two requests
|
|
384
|
-
* with identical argv in different worktrees do not collide.
|
|
385
|
-
*/
|
|
386
|
-
cwd) {
|
|
387
|
-
// U26 fix: ownership of onComplete is a contract. Once this function returns
|
|
388
|
-
// OR throws, the caller MUST consider onComplete consumed — i.e. it has
|
|
389
|
-
// either been run, or the AsyncJobManager has taken ownership of it. The
|
|
390
|
-
// caller never needs to reclaim.
|
|
289
|
+
async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete, flightRecorderEntry, extractUsage, stdin, cwd) {
|
|
391
290
|
let onCompleteOwnedByCaller = onComplete !== undefined;
|
|
392
291
|
const consumeOnComplete = () => {
|
|
393
292
|
if (!onCompleteOwnedByCaller || !onComplete)
|
|
@@ -409,8 +308,6 @@ cwd) {
|
|
|
409
308
|
throw err;
|
|
410
309
|
}
|
|
411
310
|
if (SYNC_DEADLINE_MS === 0) {
|
|
412
|
-
// Disabled — fall through to direct execution.
|
|
413
|
-
// Note: direct execution bypasses dedup. forceRefresh is implied.
|
|
414
311
|
const command = cli === "mistral" ? "vibe" : cli;
|
|
415
312
|
try {
|
|
416
313
|
return await executeCli(command, args, {
|
|
@@ -422,8 +319,6 @@ cwd) {
|
|
|
422
319
|
});
|
|
423
320
|
}
|
|
424
321
|
finally {
|
|
425
|
-
// Direct-execution path completes inline; release per-request resources
|
|
426
|
-
// (e.g. outputSchema temp files) here.
|
|
427
322
|
consumeOnComplete();
|
|
428
323
|
}
|
|
429
324
|
}
|
|
@@ -437,22 +332,12 @@ cwd) {
|
|
|
437
332
|
env,
|
|
438
333
|
stdin,
|
|
439
334
|
onComplete,
|
|
440
|
-
// Sync-deferred path: the upstream sync handler already wrote
|
|
441
|
-
// logStart for this corrId, so writeFlightStart stays false. The
|
|
442
|
-
// manager still writes logComplete on terminal state (which UPDATEs
|
|
443
|
-
// the sync handler's row), closing the previously-orphaned
|
|
444
|
-
// sync-deferred case.
|
|
445
335
|
flightRecorderEntry,
|
|
446
336
|
extractUsage,
|
|
447
337
|
});
|
|
448
|
-
// Handoff succeeded: AsyncJobManager owns onComplete (it'll fire via
|
|
449
|
-
// fireOnComplete on terminal status, or run inline immediately for dedup).
|
|
450
338
|
onCompleteOwnedByCaller = false;
|
|
451
339
|
}
|
|
452
340
|
catch (err) {
|
|
453
|
-
// Spawn or pre-spawn failure inside AsyncJobManager. The record was never
|
|
454
|
-
// registered, so onComplete will never be called by the manager. Reclaim
|
|
455
|
-
// here so the temp file is not leaked.
|
|
456
341
|
consumeOnComplete();
|
|
457
342
|
throw err;
|
|
458
343
|
}
|
|
@@ -464,7 +349,6 @@ cwd) {
|
|
|
464
349
|
while (Date.now() < deadline) {
|
|
465
350
|
const snapshot = runtime.asyncJobManager.getJobSnapshot(job.id);
|
|
466
351
|
if (snapshot && snapshot.status !== "running") {
|
|
467
|
-
// Job finished within deadline — extract result
|
|
468
352
|
const result = runtime.asyncJobManager.getJobResult(job.id);
|
|
469
353
|
if (!result) {
|
|
470
354
|
return { stdout: "", stderr: "Job result unavailable", code: 1 };
|
|
@@ -477,13 +361,6 @@ cwd) {
|
|
|
477
361
|
}
|
|
478
362
|
await new Promise(resolve => setTimeout(resolve, SYNC_POLL_INTERVAL_MS));
|
|
479
363
|
}
|
|
480
|
-
// Deadline exceeded — return deferral.
|
|
481
|
-
// R2 Codex-Unit-B F1: hand FR-complete ownership to the manager. Until
|
|
482
|
-
// this call, the manager skips writeFlightComplete on terminal so the
|
|
483
|
-
// sync handler's safeFlightComplete (with rich approvalDecision /
|
|
484
|
-
// optimizationApplied metadata) wins for sync-inline completions. From
|
|
485
|
-
// here on the sync handler returns deferred and will NOT write
|
|
486
|
-
// safeFlightComplete, so the manager must.
|
|
487
364
|
runtime.asyncJobManager.armFlightCompleteForDeferral(job.id);
|
|
488
365
|
runtime.logger.info(`[${corrId}] ${cli} sync deadline exceeded (${SYNC_DEADLINE_MS}ms), deferring to async job ${job.id}`);
|
|
489
366
|
return {
|
|
@@ -517,27 +394,6 @@ function buildDeferredToolResponse(deferred, sessionId) {
|
|
|
517
394
|
],
|
|
518
395
|
};
|
|
519
396
|
}
|
|
520
|
-
/**
|
|
521
|
-
* Slice λ: resolve a request's worktree directive into a spawn cwd.
|
|
522
|
-
*
|
|
523
|
-
* - `worktreeOpt` is the Zod-validated input value (boolean |
|
|
524
|
-
* `{ name?, ref? }` | undefined).
|
|
525
|
-
* - When the request has a session AND the session already has a
|
|
526
|
-
* `metadata.worktreePath`, that path is reused (resume semantics).
|
|
527
|
-
* The reused path is returned without touching git; if the directory
|
|
528
|
-
* was externally removed between requests, the next CLI invocation
|
|
529
|
-
* will surface the error naturally.
|
|
530
|
-
* - When no reusable worktree exists, `createWorktree` runs; on success
|
|
531
|
-
* the new path is written to `session.metadata` (only when a session
|
|
532
|
-
* exists — request-scoped worktrees do NOT persist).
|
|
533
|
-
* - Returns `{}` when `worktreeOpt` is undefined/false (preserves
|
|
534
|
-
* pre-λ behaviour at non-worktree call sites).
|
|
535
|
-
* - Errors propagate as `WorktreeError`/`Error`; the caller wraps them
|
|
536
|
-
* in a `createErrorResponse` envelope. Do NOT swallow.
|
|
537
|
-
*
|
|
538
|
-
* Spec: docs/plans/slice-lambda.spec.md §"Implementation surface to
|
|
539
|
-
* verify" §5.
|
|
540
|
-
*/
|
|
541
397
|
export async function resolveWorktreeForRequest(worktreeOpt, sessionId, runtime) {
|
|
542
398
|
if (!worktreeOpt)
|
|
543
399
|
return {};
|
|
@@ -566,30 +422,13 @@ export async function resolveWorktreeForRequest(worktreeOpt, sessionId, runtime)
|
|
|
566
422
|
}
|
|
567
423
|
return { cwd: handle.path, worktreePath: handle.path };
|
|
568
424
|
}
|
|
569
|
-
/**
|
|
570
|
-
* Slice λ §1.d: response-envelope shape decision for `worktreePath`.
|
|
571
|
-
*
|
|
572
|
-
* We surface the worktree path inline as a stdout prefix
|
|
573
|
-
* (`[gateway] worktree=<absolute-path>\n`) rather than as a
|
|
574
|
-
* structuredContent field or JSON wrapper. Rationale:
|
|
575
|
-
* - zero schema change across all 10 tools and their downstream parsers
|
|
576
|
-
* - matches how other slice features (session warnings, cache_state
|
|
577
|
-
* aggregates) surface side-channel metadata today
|
|
578
|
-
* - callers that want the path can split on the first newline; callers
|
|
579
|
-
* that don't care see a single ignorable header line
|
|
580
|
-
*
|
|
581
|
-
* Use `formatWorktreePrefix(resolution.worktreePath)` once per tool, at
|
|
582
|
-
* the moment a successful response is constructed.
|
|
583
|
-
*/
|
|
584
425
|
export function formatWorktreePrefix(worktreePath) {
|
|
585
426
|
return worktreePath ? `[gateway] worktree=${worktreePath}\n` : "";
|
|
586
427
|
}
|
|
587
|
-
// Helper function for standardized error responses
|
|
588
428
|
function createErrorResponse(cli, code, stderr, correlationId, error) {
|
|
589
429
|
let errorMessage = `Error executing ${cli} CLI`;
|
|
590
430
|
const isLaunchExit = code === 127 || code === -4058;
|
|
591
431
|
if (error) {
|
|
592
|
-
// Command not found or spawn error
|
|
593
432
|
errorMessage += `:\n${error.message}`;
|
|
594
433
|
if (error.message.includes("ENOENT")) {
|
|
595
434
|
errorMessage += `\n\nThe '${cli}' command was not found. Please ensure ${cli} CLI is installed and in your PATH.`;
|
|
@@ -597,12 +436,10 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
|
|
|
597
436
|
logger.error(`[${correlationId || "unknown"}] ${cli} CLI execution failed:`, error.message);
|
|
598
437
|
}
|
|
599
438
|
else if (code === 124) {
|
|
600
|
-
// Wall-clock timeout
|
|
601
439
|
errorMessage += `: Command timed out\n${stderr}`;
|
|
602
440
|
logger.error(`[${correlationId || "unknown"}] ${cli} CLI timed out`);
|
|
603
441
|
}
|
|
604
442
|
else if (code === 125) {
|
|
605
|
-
// Idle timeout (stuck process)
|
|
606
443
|
errorMessage += `: Process killed due to inactivity\n${stderr}`;
|
|
607
444
|
logger.error(`[${correlationId || "unknown"}] ${cli} CLI killed due to inactivity`);
|
|
608
445
|
}
|
|
@@ -611,7 +448,6 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
|
|
|
611
448
|
logger.error(`[${correlationId || "unknown"}] ${cli} CLI failed to launch`);
|
|
612
449
|
}
|
|
613
450
|
else if (code !== 0) {
|
|
614
|
-
// Other non-zero exit code
|
|
615
451
|
errorMessage += ` (exit code ${code}):\n${stderr}`;
|
|
616
452
|
logger.error(`[${correlationId || "unknown"}] ${cli} CLI failed with exit code ${code}`);
|
|
617
453
|
}
|
|
@@ -634,14 +470,7 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
|
|
|
634
470
|
},
|
|
635
471
|
};
|
|
636
472
|
}
|
|
637
|
-
export function extractUsageAndCost(cli, output, outputFormat,
|
|
638
|
-
/**
|
|
639
|
-
* Optional context for off-stdout telemetry sources. Today only Mistral
|
|
640
|
-
* uses this — its meta.json lives on disk keyed by sessionId. Threading
|
|
641
|
-
* this in keeps the closure built by `buildAsyncFlightRecorderHandoff`
|
|
642
|
-
* primitives-only (no `params`/`prep` retention on AsyncJobRecord).
|
|
643
|
-
*/
|
|
644
|
-
ctx) {
|
|
473
|
+
export function extractUsageAndCost(cli, output, outputFormat, ctx) {
|
|
645
474
|
if (cli === "claude" && outputFormat === "stream-json") {
|
|
646
475
|
const parsed = parseStreamJson(output);
|
|
647
476
|
if (!parsed.usage) {
|
|
@@ -679,29 +508,12 @@ ctx) {
|
|
|
679
508
|
cacheReadTokens: parsed.usage.cache_read_tokens,
|
|
680
509
|
};
|
|
681
510
|
}
|
|
682
|
-
// Mistral/Vibe: usage/cost live on disk in `~/.vibe/logs/session/<id>/meta.json`
|
|
683
|
-
// (Phase 4 slice β). Best-effort: if we don't know the sessionId (fresh
|
|
684
|
-
// session whose Vibe-assigned UUID we never observed) or the file is
|
|
685
|
-
// missing/malformed, the parser returns `{}` and the FR row simply lacks
|
|
686
|
-
// usage data — matching pre-slice behaviour. No stdout fallback exists.
|
|
687
511
|
if (cli === "mistral") {
|
|
688
512
|
return parseVibeMetaJson(ctx?.home ?? homedir(), ctx?.sessionId);
|
|
689
513
|
}
|
|
690
514
|
return {};
|
|
691
515
|
}
|
|
692
|
-
/**
|
|
693
|
-
* Slice 1.5: build the async-job-manager's FR payload from a prep object
|
|
694
|
-
* (which every prepare*Request returns), plus the bound CLI and output
|
|
695
|
-
* format primitives needed by extractUsageAndCost. Returning the closure
|
|
696
|
-
* separately means it captures `cliName` and `fmt` ONLY — never `params`
|
|
697
|
-
* or `prep` — so retention on AsyncJobRecord is O(constant).
|
|
698
|
-
*/
|
|
699
516
|
function buildAsyncFlightRecorderHandoff(cliName, prep, sessionId, outputFormat) {
|
|
700
|
-
// Extract primitives BEFORE building the closure — capturing `prep` or
|
|
701
|
-
// `params` directly would pin large attachments / promptParts on the
|
|
702
|
-
// AsyncJobRecord for JOB_TTL_MS. Phase 4 slice β: `sid` and `home` are
|
|
703
|
-
// primitives too, threaded through so the Mistral branch of
|
|
704
|
-
// extractUsageAndCost can read `~/.vibe/logs/session/<id>/meta.json`.
|
|
705
517
|
const cli = cliName;
|
|
706
518
|
const fmt = outputFormat;
|
|
707
519
|
const sid = sessionId;
|
|
@@ -795,11 +607,7 @@ function resolveClaudeMcpConfig(operation, correlationId, requestedMcpServers, s
|
|
|
795
607
|
}
|
|
796
608
|
return { config: mcpConfig };
|
|
797
609
|
}
|
|
798
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
799
|
-
// MCP Resources
|
|
800
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
801
610
|
function registerBaseResources(server, runtime) {
|
|
802
|
-
// Register skill resources (L2: full docs, read on demand)
|
|
803
611
|
for (const skill of loadedSkills) {
|
|
804
612
|
server.registerResource(`skill-${skill.name}`, `skills://${skill.name}`, {
|
|
805
613
|
title: skill.name,
|
|
@@ -816,7 +624,6 @@ function registerBaseResources(server, runtime) {
|
|
|
816
624
|
}));
|
|
817
625
|
}
|
|
818
626
|
runtime.logger.info(`Registered ${loadedSkills.length} skill resources`);
|
|
819
|
-
// Register all sessions resource
|
|
820
627
|
server.registerResource("all-sessions", "sessions://all", {
|
|
821
628
|
title: "📋 All Sessions",
|
|
822
629
|
description: "All conversation sessions across CLIs",
|
|
@@ -826,7 +633,6 @@ function registerBaseResources(server, runtime) {
|
|
|
826
633
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
827
634
|
return { contents: contents ? [contents] : [] };
|
|
828
635
|
});
|
|
829
|
-
// Register Claude sessions resource
|
|
830
636
|
server.registerResource("claude-sessions", "sessions://claude", {
|
|
831
637
|
title: "🤖 Claude Sessions",
|
|
832
638
|
description: "Claude conversation sessions",
|
|
@@ -836,7 +642,6 @@ function registerBaseResources(server, runtime) {
|
|
|
836
642
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
837
643
|
return { contents: contents ? [contents] : [] };
|
|
838
644
|
});
|
|
839
|
-
// Register Codex sessions resource
|
|
840
645
|
server.registerResource("codex-sessions", "sessions://codex", {
|
|
841
646
|
title: "💻 Codex Sessions",
|
|
842
647
|
description: "Codex conversation sessions",
|
|
@@ -846,7 +651,6 @@ function registerBaseResources(server, runtime) {
|
|
|
846
651
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
847
652
|
return { contents: contents ? [contents] : [] };
|
|
848
653
|
});
|
|
849
|
-
// Register Gemini sessions resource
|
|
850
654
|
server.registerResource("gemini-sessions", "sessions://gemini", {
|
|
851
655
|
title: "✨ Gemini Sessions",
|
|
852
656
|
description: "Gemini conversation sessions",
|
|
@@ -856,7 +660,6 @@ function registerBaseResources(server, runtime) {
|
|
|
856
660
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
857
661
|
return { contents: contents ? [contents] : [] };
|
|
858
662
|
});
|
|
859
|
-
// Register Grok sessions resource
|
|
860
663
|
server.registerResource("grok-sessions", "sessions://grok", {
|
|
861
664
|
title: "⚡ Grok Sessions",
|
|
862
665
|
description: "Grok conversation sessions",
|
|
@@ -866,7 +669,6 @@ function registerBaseResources(server, runtime) {
|
|
|
866
669
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
867
670
|
return { contents: contents ? [contents] : [] };
|
|
868
671
|
});
|
|
869
|
-
// Register Mistral sessions resource
|
|
870
672
|
server.registerResource("mistral-sessions", "sessions://mistral", {
|
|
871
673
|
title: "🌬 Mistral Sessions",
|
|
872
674
|
description: "Mistral Vibe conversation sessions",
|
|
@@ -876,7 +678,6 @@ function registerBaseResources(server, runtime) {
|
|
|
876
678
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
877
679
|
return { contents: contents ? [contents] : [] };
|
|
878
680
|
});
|
|
879
|
-
// Register Claude models resource
|
|
880
681
|
server.registerResource("claude-models", "models://claude", {
|
|
881
682
|
title: "🧠 Claude Models",
|
|
882
683
|
description: "Claude models and capabilities",
|
|
@@ -886,7 +687,6 @@ function registerBaseResources(server, runtime) {
|
|
|
886
687
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
887
688
|
return { contents: contents ? [contents] : [] };
|
|
888
689
|
});
|
|
889
|
-
// Register Codex models resource
|
|
890
690
|
server.registerResource("codex-models", "models://codex", {
|
|
891
691
|
title: "🔧 Codex Models",
|
|
892
692
|
description: "Codex models and capabilities",
|
|
@@ -896,7 +696,6 @@ function registerBaseResources(server, runtime) {
|
|
|
896
696
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
897
697
|
return { contents: contents ? [contents] : [] };
|
|
898
698
|
});
|
|
899
|
-
// Register Gemini models resource
|
|
900
699
|
server.registerResource("gemini-models", "models://gemini", {
|
|
901
700
|
title: "🌟 Gemini Models",
|
|
902
701
|
description: "Gemini models and capabilities",
|
|
@@ -906,7 +705,6 @@ function registerBaseResources(server, runtime) {
|
|
|
906
705
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
907
706
|
return { contents: contents ? [contents] : [] };
|
|
908
707
|
});
|
|
909
|
-
// Register Grok models resource
|
|
910
708
|
server.registerResource("grok-models", "models://grok", {
|
|
911
709
|
title: "⚡ Grok Models",
|
|
912
710
|
description: "Grok models and capabilities",
|
|
@@ -916,7 +714,6 @@ function registerBaseResources(server, runtime) {
|
|
|
916
714
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
917
715
|
return { contents: contents ? [contents] : [] };
|
|
918
716
|
});
|
|
919
|
-
// Register Mistral models resource
|
|
920
717
|
server.registerResource("mistral-models", "models://mistral", {
|
|
921
718
|
title: "🌬 Mistral Models",
|
|
922
719
|
description: "Mistral Vibe models and capabilities",
|
|
@@ -926,7 +723,6 @@ function registerBaseResources(server, runtime) {
|
|
|
926
723
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
927
724
|
return { contents: contents ? [contents] : [] };
|
|
928
725
|
});
|
|
929
|
-
// Register performance metrics resource
|
|
930
726
|
server.registerResource("performance-metrics", "metrics://performance", {
|
|
931
727
|
title: "📈 Performance Metrics",
|
|
932
728
|
description: "Request counts, latency, success/failure rates",
|
|
@@ -936,11 +732,6 @@ function registerBaseResources(server, runtime) {
|
|
|
936
732
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
937
733
|
return { contents: contents ? [contents] : [] };
|
|
938
734
|
});
|
|
939
|
-
// Cache-state resources (slice 2). Static URI for global, templated for
|
|
940
|
-
// session/{id} and prefix/{hash}. All three return tokens/hashes/aggregates
|
|
941
|
-
// ONLY — never raw prompt or response text. The structural guarantee is in
|
|
942
|
-
// the SessionCacheStats / PrefixCacheStats / GlobalCacheStats types
|
|
943
|
-
// themselves: those shapes have no prompt/response/system/task fields.
|
|
944
735
|
server.registerResource("cache-state-global", "cache_state://global", {
|
|
945
736
|
title: "💾 Cache State (Global)",
|
|
946
737
|
description: "Aggregate cache hit/miss/savings across all CLIs in the flight recorder. Tokens/hashes only — no prompt text.",
|
|
@@ -999,11 +790,6 @@ function registerBaseResources(server, runtime) {
|
|
|
999
790
|
};
|
|
1000
791
|
});
|
|
1001
792
|
}
|
|
1002
|
-
/**
|
|
1003
|
-
* Slice 1: validate the prompt / promptParts mutex at the prep boundary and
|
|
1004
|
-
* return either an error response or the resolved input. The exact error
|
|
1005
|
-
* messages are part of the public contract — tests assert them verbatim.
|
|
1006
|
-
*/
|
|
1007
793
|
function resolvePromptOrPartsForPrep(args) {
|
|
1008
794
|
const hasPrompt = typeof args.prompt === "string" && args.prompt.length > 0;
|
|
1009
795
|
const hasParts = args.promptParts !== undefined;
|
|
@@ -1045,7 +831,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1045
831
|
const assembledPrompt = inputResolution.assembledPrompt;
|
|
1046
832
|
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
1047
833
|
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
1048
|
-
// Review integrity check on raw prompt (before optimization)
|
|
1049
834
|
const reviewIntegrity = checkReviewIntegrity({
|
|
1050
835
|
prompt: assembledPrompt,
|
|
1051
836
|
allowedTools: params.allowedTools,
|
|
@@ -1058,13 +843,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1058
843
|
score: reviewIntegrity.totalScore,
|
|
1059
844
|
});
|
|
1060
845
|
}
|
|
1061
|
-
// Rec #5 (slice κ): refuse the optimizePrompt + cacheControl combo
|
|
1062
|
-
// before running optimization. Optimization rewrites the assembled
|
|
1063
|
-
// prompt text the flight-recorder logs, but the κ stdin payload is
|
|
1064
|
-
// built from raw `promptParts` content blocks — letting both run
|
|
1065
|
-
// produces a FR row whose `prompt` no longer matches what Claude
|
|
1066
|
-
// actually received, AND any optimisation-driven text change would
|
|
1067
|
-
// silently break Anthropic prefix-cache reuse on the next call.
|
|
1068
846
|
const ccEarly = params.promptParts?.cacheControl;
|
|
1069
847
|
const cacheControlRequestedEarly = !!(ccEarly &&
|
|
1070
848
|
(ccEarly.system || ccEarly.tools || ccEarly.context));
|
|
@@ -1088,7 +866,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1088
866
|
approvalDecision = runtime.approvalManager.decide({
|
|
1089
867
|
cli: "claude",
|
|
1090
868
|
operation: params.operation,
|
|
1091
|
-
prompt: assembledPrompt,
|
|
869
|
+
prompt: assembledPrompt,
|
|
1092
870
|
bypassRequested: params.dangerouslySkipPermissions,
|
|
1093
871
|
fullAuto: false,
|
|
1094
872
|
requestedMcpServers,
|
|
@@ -1102,18 +880,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1102
880
|
return createApprovalDeniedResponse(params.operation, approvalDecision);
|
|
1103
881
|
}
|
|
1104
882
|
}
|
|
1105
|
-
// Rec #2 (slice κ): auto-emit `cache_control` when the caller passes
|
|
1106
|
-
// `promptParts` whose stable prefix exceeds the per-model minimum,
|
|
1107
|
-
// the caller has NOT explicitly set `cacheControl`, the gateway
|
|
1108
|
-
// config has opted in (`[cache_awareness].emit_anthropic_cache_control`),
|
|
1109
|
-
// and outputFormat is stream-json. Auto-emit marks the LAST non-empty
|
|
1110
|
-
// stable block (context → tools → system priority — the rightmost
|
|
1111
|
-
// stable block covers the widest prefix). Skipped when optimizePrompt
|
|
1112
|
-
// is on (same rec #5 desync risk).
|
|
1113
|
-
//
|
|
1114
|
-
// The 1h ttl is forced regardless of `anthropic_ttl_seconds`: 5m
|
|
1115
|
-
// breakpoints from caller content are rejected by Anthropic once
|
|
1116
|
-
// Claude Code's own 1h-marked session-wrap blocks land ahead of them.
|
|
1117
883
|
let autoEmittedCacheControlBlock = null;
|
|
1118
884
|
if (!cacheControlRequestedEarly &&
|
|
1119
885
|
runtime.cacheAwareness.emitAnthropicCacheControl &&
|
|
@@ -1124,9 +890,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1124
890
|
const threshold = minStableTokensForModel(runtime.cacheAwareness, resolvedModel ?? "default");
|
|
1125
891
|
if (stablePrefixTokens >= threshold) {
|
|
1126
892
|
const pp = params.promptParts;
|
|
1127
|
-
// Rightmost non-empty stable block — its cache_control breakpoint
|
|
1128
|
-
// covers everything above it in the message (the API matches
|
|
1129
|
-
// breakpoints in order).
|
|
1130
893
|
if (pp.context && pp.context.length > 0)
|
|
1131
894
|
autoEmittedCacheControlBlock = "context";
|
|
1132
895
|
else if (pp.tools && pp.tools.length > 0)
|
|
@@ -1141,12 +904,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1141
904
|
}
|
|
1142
905
|
}
|
|
1143
906
|
}
|
|
1144
|
-
// Rec #4: warn when promptParts has a cacheable stable prefix but no
|
|
1145
|
-
// cache_control breakpoint is being emitted (neither explicit nor
|
|
1146
|
-
// auto). Either the caller forgot to set `cacheControl` or
|
|
1147
|
-
// `[cache_awareness].emit_anthropic_cache_control` is off — both
|
|
1148
|
-
// leave the stable prefix bytes unreused across calls, defeating the
|
|
1149
|
-
// point of using `promptParts`.
|
|
1150
907
|
const warnings = [];
|
|
1151
908
|
if (!cacheControlRequestedEarly &&
|
|
1152
909
|
autoEmittedCacheControlBlock === null &&
|
|
@@ -1168,13 +925,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1168
925
|
});
|
|
1169
926
|
}
|
|
1170
927
|
}
|
|
1171
|
-
// Slice κ: switch from the legacy positional `-p <prompt>` emission
|
|
1172
|
-
// to `claude -p --input-format stream-json` and feed a JSON
|
|
1173
|
-
// content-blocks payload via stdin. Non-κ callers (no cacheControl,
|
|
1174
|
-
// or cacheControl with all flags false) take the existing positional
|
|
1175
|
-
// path bit-for-bit. The κ path activates on EITHER an explicit caller
|
|
1176
|
-
// opt-in (`cacheControlRequestedEarly`) OR a gateway-driven auto-emit
|
|
1177
|
-
// (`autoEmittedCacheControlBlock`).
|
|
1178
928
|
const cacheControlRequested = cacheControlRequestedEarly || autoEmittedCacheControlBlock !== null;
|
|
1179
929
|
let stdinPayload;
|
|
1180
930
|
let cacheControlBlocks;
|
|
@@ -1182,9 +932,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1182
932
|
if (params.outputFormat !== "stream-json") {
|
|
1183
933
|
return createErrorResponse(params.operation, 1, "", corrId, new Error("promptParts.cacheControl requires outputFormat: 'stream-json' (slice κ pipes the cache_control blocks over --input-format stream-json; text/json output formats cannot carry the required NDJSON usage events)."));
|
|
1184
934
|
}
|
|
1185
|
-
// promptParts is non-null whenever cacheControlRequested is true
|
|
1186
|
-
// (explicit opt-in lives in PromptParts; auto-emit guard requires
|
|
1187
|
-
// promptParts to be defined).
|
|
1188
935
|
const effectiveParts = autoEmittedCacheControlBlock !== null
|
|
1189
936
|
? {
|
|
1190
937
|
...params.promptParts,
|
|
@@ -1216,11 +963,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1216
963
|
args.push("--output-format", "json");
|
|
1217
964
|
}
|
|
1218
965
|
else if (params.outputFormat === "stream-json") {
|
|
1219
|
-
// Claude CLI 2.x rejects `--print --output-format stream-json` without
|
|
1220
|
-
// `--verbose`: "When using --print, --output-format=stream-json requires
|
|
1221
|
-
// --verbose". --verbose only affects what claude logs to stderr; the
|
|
1222
|
-
// stream-json stdout payload is unchanged, so the gateway's NDJSON
|
|
1223
|
-
// parser is unaffected.
|
|
1224
966
|
args.push("--output-format", "stream-json", "--include-partial-messages", "--verbose");
|
|
1225
967
|
}
|
|
1226
968
|
}
|
|
@@ -1251,7 +993,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1251
993
|
args.push("--strict-mcp-config");
|
|
1252
994
|
}
|
|
1253
995
|
}
|
|
1254
|
-
// U25: Claude high-impact features (agent, agents, fork, system-prompt, budget, effort, …)
|
|
1255
996
|
let validatedAgents;
|
|
1256
997
|
if (params.agents && Object.keys(params.agents).length > 0) {
|
|
1257
998
|
const result = validateClaudeAgentsMap(params.agents);
|
|
@@ -1309,7 +1050,6 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
1309
1050
|
const assembledPrompt = inputResolution.assembledPrompt;
|
|
1310
1051
|
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
1311
1052
|
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
1312
|
-
// Review integrity check on raw prompt (before optimization)
|
|
1313
1053
|
const reviewIntegrity = checkReviewIntegrity({ prompt: assembledPrompt });
|
|
1314
1054
|
if (reviewIntegrity.violations.length > 0) {
|
|
1315
1055
|
runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
|
|
@@ -1330,7 +1070,7 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
1330
1070
|
approvalDecision = runtime.approvalManager.decide({
|
|
1331
1071
|
cli: "codex",
|
|
1332
1072
|
operation: params.operation,
|
|
1333
|
-
prompt: assembledPrompt,
|
|
1073
|
+
prompt: assembledPrompt,
|
|
1334
1074
|
bypassRequested: params.dangerouslyBypassApprovalsAndSandbox,
|
|
1335
1075
|
fullAuto: params.fullAuto,
|
|
1336
1076
|
requestedMcpServers,
|
|
@@ -1342,9 +1082,6 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
1342
1082
|
return createApprovalDeniedResponse(params.operation, approvalDecision);
|
|
1343
1083
|
}
|
|
1344
1084
|
}
|
|
1345
|
-
// Resume mode: codex exec resume <SESSION_ID|--last> [flags] PROMPT
|
|
1346
|
-
// Note: `codex exec resume` does NOT accept sandbox policy flags; the original
|
|
1347
|
-
// session's approval policy is inherited. We silently drop fullAuto on resume.
|
|
1348
1085
|
let sessionPlan;
|
|
1349
1086
|
try {
|
|
1350
1087
|
sessionPlan = resolveCodexSessionArgs({
|
|
@@ -1365,9 +1102,6 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
1365
1102
|
}
|
|
1366
1103
|
if (resolvedModel)
|
|
1367
1104
|
args.push("--model", resolvedModel);
|
|
1368
|
-
// Codex sandbox / approval: resolve modern flags + legacy fullAuto shorthand.
|
|
1369
|
-
// `codex exec resume` rejects all of these (the original session's policy is
|
|
1370
|
-
// inherited), so we only emit them when starting a NEW session.
|
|
1371
1105
|
const sandboxFlags = resolveCodexSandboxFlags({
|
|
1372
1106
|
sandboxMode: params.sandboxMode,
|
|
1373
1107
|
askForApproval: params.askForApproval,
|
|
@@ -1383,26 +1117,12 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
1383
1117
|
if (params.dangerouslyBypassApprovalsAndSandbox) {
|
|
1384
1118
|
args.push("--dangerously-bypass-approvals-and-sandbox");
|
|
1385
1119
|
}
|
|
1386
|
-
// U23 fix: emit `--json` when the caller asked for JSON output so the
|
|
1387
|
-
// codex-json-parser actually receives JSONL events. This is what makes
|
|
1388
|
-
// extractUsageAndCost() reachable from the tool surface; without it, the
|
|
1389
|
-
// U23 parser is dead code.
|
|
1390
1120
|
if (params.outputFormat === "json") {
|
|
1391
1121
|
args.push("--json");
|
|
1392
1122
|
}
|
|
1393
1123
|
args.push("--skip-git-repo-check");
|
|
1394
|
-
// U26: High-impact feature flags. `--search` is retained as a compatibility
|
|
1395
|
-
// input but current `codex exec` no longer accepts it, so the helper warns
|
|
1396
|
-
// and emits no argv. `--profile` is accepted for new sessions only. The other
|
|
1397
|
-
// flags here are accepted on resume per `codex exec resume --help` and are
|
|
1398
|
-
// emitted in both branches.
|
|
1399
1124
|
let highImpactCleanup;
|
|
1400
1125
|
if (sessionPlan.mode === "new") {
|
|
1401
|
-
// Phase 4 slice ζ: emit working-dir and add-dir on new sessions only.
|
|
1402
|
-
// Both flags are listed in CODEX_RESUME_FILTERED_FLAGS — resume inherits
|
|
1403
|
-
// the original session's cwd and writable-dir policy, so emitting them
|
|
1404
|
-
// on resume would be silently stripped (wasteful + misleading on argv
|
|
1405
|
-
// logs). Gating here mirrors `--search` / `--sandbox`.
|
|
1406
1126
|
if (params.workingDir) {
|
|
1407
1127
|
args.push("-C", params.workingDir);
|
|
1408
1128
|
}
|
|
@@ -1485,7 +1205,6 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1485
1205
|
const assembledPrompt = inputResolution.assembledPrompt;
|
|
1486
1206
|
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
1487
1207
|
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
1488
|
-
// Review integrity check on raw prompt (before optimization)
|
|
1489
1208
|
const reviewIntegrity = checkReviewIntegrity({
|
|
1490
1209
|
prompt: assembledPrompt,
|
|
1491
1210
|
allowedTools: params.allowedTools,
|
|
@@ -1509,7 +1228,7 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1509
1228
|
approvalDecision = runtime.approvalManager.decide({
|
|
1510
1229
|
cli: "gemini",
|
|
1511
1230
|
operation: params.operation,
|
|
1512
|
-
prompt: assembledPrompt,
|
|
1231
|
+
prompt: assembledPrompt,
|
|
1513
1232
|
bypassRequested: params.approvalMode === "yolo" || params.yolo === true,
|
|
1514
1233
|
fullAuto: false,
|
|
1515
1234
|
requestedMcpServers,
|
|
@@ -1523,8 +1242,6 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1523
1242
|
}
|
|
1524
1243
|
}
|
|
1525
1244
|
const effectiveApprovalMode = params.approvalStrategy === "mcp_managed" ? "yolo" : params.approvalMode;
|
|
1526
|
-
// U27: Validate high-impact policy paths and prepend attachment tokens
|
|
1527
|
-
// BEFORE the `-p` pair is emitted, preserving the U21 ordering invariant.
|
|
1528
1245
|
const highImpact = prepareGeminiHighImpactFlags({
|
|
1529
1246
|
sandbox: params.sandbox,
|
|
1530
1247
|
policyFiles: params.policyFiles,
|
|
@@ -1541,19 +1258,11 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1541
1258
|
return createErrorResponse(params.operation, 1, "", corrId, err instanceof Error ? err : new Error(String(err)));
|
|
1542
1259
|
}
|
|
1543
1260
|
}
|
|
1544
|
-
// U21: Emit the prompt via -p/--prompt rather than as a positional argument.
|
|
1545
|
-
// Positional prompts depend on Gemini's TTY/mode-detection heuristics; -p is
|
|
1546
|
-
// the documented non-interactive flag and is robust against future CLI mode
|
|
1547
|
-
// changes.
|
|
1548
1261
|
const args = ["-p", effectivePrompt];
|
|
1549
1262
|
if (resolvedModel)
|
|
1550
1263
|
args.push("--model", resolvedModel);
|
|
1551
1264
|
if (effectiveApprovalMode)
|
|
1552
1265
|
args.push("--approval-mode", effectiveApprovalMode);
|
|
1553
|
-
// `--yolo` is functionally identical to `--approval-mode yolo`; emit it only
|
|
1554
|
-
// when the caller asked for yolo AND we are not already emitting
|
|
1555
|
-
// `--approval-mode yolo` (under mcp_managed the gate forces that mode), so
|
|
1556
|
-
// there is never a redundant double auto-approve flag.
|
|
1557
1266
|
if (params.yolo && effectiveApprovalMode !== "yolo") {
|
|
1558
1267
|
args.push("--yolo");
|
|
1559
1268
|
}
|
|
@@ -1569,26 +1278,13 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1569
1278
|
sanitizeCliArgValues(params.includeDirs, "includeDirs");
|
|
1570
1279
|
params.includeDirs.forEach(dir => args.push("--include-directories", dir));
|
|
1571
1280
|
}
|
|
1572
|
-
// U27 high-impact flags (-s / --policy / --admin-policy) appended after the
|
|
1573
|
-
// existing flag set so positional ordering relative to `-p` is preserved.
|
|
1574
1281
|
args.push(...highImpact.args);
|
|
1575
|
-
// U23 fix: emit `-o json` when the caller asked for JSON output. The Gemini
|
|
1576
|
-
// JSON parser is otherwise unreachable from the tool surface and the
|
|
1577
|
-
// structured usageMetadata is silently dropped.
|
|
1578
|
-
//
|
|
1579
|
-
// Phase 4 slice ε: same wiring for `-o stream-json` (NDJSON event stream).
|
|
1580
|
-
// Gemini already streams stdout in real-time so the existing 10-minute
|
|
1581
|
-
// idle timeout (CLI_IDLE_TIMEOUTS.gemini) covers both modes without
|
|
1582
|
-
// adjustment — unlike Claude, no `--include-partial-messages` companion
|
|
1583
|
-
// flag is required because Gemini emits assistant `delta` events as part
|
|
1584
|
-
// of the default stream-json shape.
|
|
1585
1282
|
if (params.outputFormat === "json") {
|
|
1586
1283
|
args.push("-o", "json");
|
|
1587
1284
|
}
|
|
1588
1285
|
else if (params.outputFormat === "stream-json") {
|
|
1589
1286
|
args.push("-o", "stream-json");
|
|
1590
1287
|
}
|
|
1591
|
-
// Phase 4 slice γ: opt-in trust-prompt bypass for fresh workspaces.
|
|
1592
1288
|
if (params.skipTrust) {
|
|
1593
1289
|
args.push("--skip-trust");
|
|
1594
1290
|
}
|
|
@@ -1619,7 +1315,6 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
|
|
|
1619
1315
|
const assembledPrompt = inputResolution.assembledPrompt;
|
|
1620
1316
|
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
1621
1317
|
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
1622
|
-
// Review integrity check on raw prompt (before optimization)
|
|
1623
1318
|
const reviewIntegrity = checkReviewIntegrity({
|
|
1624
1319
|
prompt: assembledPrompt,
|
|
1625
1320
|
allowedTools: params.allowedTools,
|
|
@@ -1644,7 +1339,7 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
|
|
|
1644
1339
|
approvalDecision = runtime.approvalManager.decide({
|
|
1645
1340
|
cli: "grok",
|
|
1646
1341
|
operation: params.operation,
|
|
1647
|
-
prompt: assembledPrompt,
|
|
1342
|
+
prompt: assembledPrompt,
|
|
1648
1343
|
bypassRequested: Boolean(params.alwaysApprove) || params.permissionMode === "bypassPermissions",
|
|
1649
1344
|
fullAuto: false,
|
|
1650
1345
|
requestedMcpServers,
|
|
@@ -1779,9 +1474,6 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
|
|
|
1779
1474
|
return createApprovalDeniedResponse(params.operation, approvalDecision);
|
|
1780
1475
|
}
|
|
1781
1476
|
}
|
|
1782
|
-
// Under mcp_managed, force --agent auto-approve so the approval gate's
|
|
1783
|
-
// verdict carries through to the CLI invocation (mirrors Grok's --always-approve
|
|
1784
|
-
// forcing under mcp_managed).
|
|
1785
1477
|
const effectivePermissionMode = params.approvalStrategy === "mcp_managed"
|
|
1786
1478
|
? "auto-approve"
|
|
1787
1479
|
: (params.permissionMode ?? "auto-approve");
|
|
@@ -1828,15 +1520,6 @@ function selectMistralRecoveryModel(failedModel) {
|
|
|
1828
1520
|
].filter((model) => Boolean(model && model !== failedModel));
|
|
1829
1521
|
return candidates.find(model => model !== "local");
|
|
1830
1522
|
}
|
|
1831
|
-
/**
|
|
1832
|
-
* Phase 4 slice δ post-review: pure helper extracted from
|
|
1833
|
-
* `handleMistralRequest` so the retry-path arg-preservation invariants
|
|
1834
|
-
* (trust + maxTurns + maxPrice from slices γ/δ) are unit-testable
|
|
1835
|
-
* without mocking awaitJobOrDefer. Any param the wrapper threads into
|
|
1836
|
-
* the FIRST `buildMistralCliInvocation` call MUST also be threaded
|
|
1837
|
-
* through here, or a fresh-workspace / budgeted run can degrade on
|
|
1838
|
-
* the second attempt.
|
|
1839
|
-
*/
|
|
1840
1523
|
export function buildMistralRetryPrep(params, recoveryModel) {
|
|
1841
1524
|
return buildMistralCliInvocation({
|
|
1842
1525
|
prompt: params.effectivePrompt,
|
|
@@ -1857,13 +1540,11 @@ export function buildMistralRetryPrep(params, recoveryModel) {
|
|
|
1857
1540
|
}
|
|
1858
1541
|
function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat, warnings) {
|
|
1859
1542
|
let finalStdout = stdout;
|
|
1860
|
-
// Skip response optimization for JSON output to prevent corrupting structured data
|
|
1861
1543
|
if (optimizeResponse && outputFormat !== "json") {
|
|
1862
1544
|
const optimized = optimizeResponseText(finalStdout);
|
|
1863
1545
|
logOptimizationTokens("response", corrId, finalStdout, optimized);
|
|
1864
1546
|
finalStdout = optimized;
|
|
1865
1547
|
}
|
|
1866
|
-
// Append review integrity warnings to response text (skip for JSON output to avoid corruption)
|
|
1867
1548
|
if (prep.reviewIntegrity &&
|
|
1868
1549
|
prep.reviewIntegrity.violations.length > 0 &&
|
|
1869
1550
|
outputFormat !== "json") {
|
|
@@ -1880,9 +1561,6 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
|
|
|
1880
1561
|
correlationId: corrId,
|
|
1881
1562
|
sessionId: sessionId || null,
|
|
1882
1563
|
durationMs,
|
|
1883
|
-
// Phase 4 slice β: thread sessionId + home so the Mistral branch of
|
|
1884
|
-
// extractUsageAndCost can read `~/.vibe/logs/session/<dir>/meta.json`.
|
|
1885
|
-
// Other CLIs ignore the ctx (their usage source is stdout).
|
|
1886
1564
|
...extractUsageAndCost(cli, stdout, outputFormat, { sessionId, home: homedir() }),
|
|
1887
1565
|
exitCode: 0,
|
|
1888
1566
|
retryCount: 0,
|
|
@@ -1912,12 +1590,6 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
|
|
|
1912
1590
|
}
|
|
1913
1591
|
return response;
|
|
1914
1592
|
}
|
|
1915
|
-
/**
|
|
1916
|
-
* Slice 3 helper: compute the cache_ttl_expiring_soon warning for a
|
|
1917
|
-
* claude session, if the feature is enabled, the session has prior cache
|
|
1918
|
-
* writes, and ttlRemainingMs is below the threshold (30s by default).
|
|
1919
|
-
* Returns null when no warning applies.
|
|
1920
|
-
*/
|
|
1921
1593
|
function maybeBuildCacheTtlWarning(args) {
|
|
1922
1594
|
if (args.cli !== "claude")
|
|
1923
1595
|
return null;
|
|
@@ -1946,7 +1618,6 @@ function resolveHandlerRuntime(deps) {
|
|
|
1946
1618
|
if (deps.runtime)
|
|
1947
1619
|
return deps.runtime;
|
|
1948
1620
|
const asyncDeps = deps;
|
|
1949
|
-
// Older HandlerDeps callers may not provide `warn`; default-route to `info`.
|
|
1950
1621
|
const depLogger = deps.logger;
|
|
1951
1622
|
const normalizedLogger = {
|
|
1952
1623
|
info: depLogger.info,
|
|
@@ -2000,8 +1671,6 @@ export async function handleGeminiRequest(deps, params) {
|
|
|
2000
1671
|
}, runtime);
|
|
2001
1672
|
deps.logger.info(`[${corrId}] gemini_request invoked with model=${prep.resolvedModel || "default"}, approvalMode=${params.approvalMode}, prompt length=${prep.effectivePrompt.length}`);
|
|
2002
1673
|
try {
|
|
2003
|
-
// Gemini CLI 0.43 supports `--resume`, but not a supported fresh
|
|
2004
|
-
// `--session-id` flag. Fresh sessions emit no session flag.
|
|
2005
1674
|
const sessionPlan = resolveGeminiSessionPlan({
|
|
2006
1675
|
sessionId: params.sessionId,
|
|
2007
1676
|
resumeLatest: params.resumeLatest,
|
|
@@ -2019,7 +1688,6 @@ export async function handleGeminiRequest(deps, params) {
|
|
|
2019
1688
|
}
|
|
2020
1689
|
const geminiFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, params.sessionId, params.outputFormat);
|
|
2021
1690
|
const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, geminiFrHandoff.flightRecorderEntry, geminiFrHandoff.extractUsage, worktreeResolution.cwd);
|
|
2022
|
-
// Deferred — job still running, return async reference
|
|
2023
1691
|
if (isDeferredResponse(result)) {
|
|
2024
1692
|
return buildDeferredToolResponse(result, effectiveSessionIdHint);
|
|
2025
1693
|
}
|
|
@@ -2040,9 +1708,6 @@ export async function handleGeminiRequest(deps, params) {
|
|
|
2040
1708
|
return createErrorResponse("gemini", code, stderr, corrId);
|
|
2041
1709
|
}
|
|
2042
1710
|
wasSuccessful = true;
|
|
2043
|
-
// Post-success session I/O for explicit resume flows. Fresh Gemini sessions
|
|
2044
|
-
// are owned by the CLI because the current CLI has no supported fresh
|
|
2045
|
-
// session-id flag the gateway can inject.
|
|
2046
1711
|
let effectiveSessionId = effectiveSessionIdHint;
|
|
2047
1712
|
if (effectiveSessionId) {
|
|
2048
1713
|
const existing = await deps.sessionManager.getSession(effectiveSessionId);
|
|
@@ -2131,14 +1796,12 @@ export async function handleGeminiRequestAsync(deps, params) {
|
|
|
2131
1796
|
return prep;
|
|
2132
1797
|
const { corrId, args, requestedMcpServers, approvalDecision } = prep;
|
|
2133
1798
|
try {
|
|
2134
|
-
// Gemini CLI 0.43 supports `--resume`, but fresh sessions emit no session flag.
|
|
2135
1799
|
const sessionPlan = resolveGeminiSessionPlan({
|
|
2136
1800
|
sessionId: params.sessionId,
|
|
2137
1801
|
resumeLatest: params.resumeLatest,
|
|
2138
1802
|
createNewSession: params.createNewSession,
|
|
2139
1803
|
});
|
|
2140
1804
|
args.push(...sessionPlan.args);
|
|
2141
|
-
// Pre-start session I/O (async handlers: prevent orphaned jobs)
|
|
2142
1805
|
let effectiveSessionId = sessionPlan.resumed ? params.sessionId : undefined;
|
|
2143
1806
|
if (effectiveSessionId) {
|
|
2144
1807
|
const existing = await deps.sessionManager.getSession(effectiveSessionId);
|
|
@@ -2161,13 +1824,8 @@ export async function handleGeminiRequestAsync(deps, params) {
|
|
|
2161
1824
|
catch (err) {
|
|
2162
1825
|
return createErrorResponse("gemini_request_async", 1, "", corrId, err);
|
|
2163
1826
|
}
|
|
2164
|
-
// Start job only after all session I/O succeeds. U23: forward outputFormat
|
|
2165
|
-
// so AsyncJobManager records it in the durable store (the manager also
|
|
2166
|
-
// surfaces it in the snapshot).
|
|
2167
1827
|
assertUpstreamCliArgs("gemini", args);
|
|
2168
1828
|
assertUpstreamCliEnv("gemini", undefined);
|
|
2169
|
-
// Slice 1.5: pure async path — no upstream safeFlightStart, so the
|
|
2170
|
-
// manager owns both logStart and logComplete for this corrId.
|
|
2171
1829
|
const geminiAsyncFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, effectiveSessionId, params.outputFormat);
|
|
2172
1830
|
const job = deps.asyncJobManager.startJob("gemini", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, geminiAsyncFrHandoff.flightRecorderEntry, geminiAsyncFrHandoff.extractUsage, true);
|
|
2173
1831
|
deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
|
|
@@ -2244,7 +1902,6 @@ export async function handleGrokRequest(deps, params) {
|
|
|
2244
1902
|
}, runtime);
|
|
2245
1903
|
deps.logger.info(`[${corrId}] grok_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode}, prompt length=${prep.effectivePrompt.length}`);
|
|
2246
1904
|
try {
|
|
2247
|
-
// Session arg planning (pure, no I/O)
|
|
2248
1905
|
const sessionResult = resolveGrokSessionArgs({
|
|
2249
1906
|
sessionId: params.sessionId,
|
|
2250
1907
|
resumeLatest: params.resumeLatest,
|
|
@@ -2260,7 +1917,6 @@ export async function handleGrokRequest(deps, params) {
|
|
|
2260
1917
|
}
|
|
2261
1918
|
const grokFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, params.sessionId, params.outputFormat);
|
|
2262
1919
|
const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, grokFrHandoff.flightRecorderEntry, grokFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
|
|
2263
|
-
// Deferred — job still running, return async reference
|
|
2264
1920
|
if (isDeferredResponse(result)) {
|
|
2265
1921
|
return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
|
|
2266
1922
|
}
|
|
@@ -2281,7 +1937,6 @@ export async function handleGrokRequest(deps, params) {
|
|
|
2281
1937
|
return createErrorResponse("grok", code, stderr, corrId);
|
|
2282
1938
|
}
|
|
2283
1939
|
wasSuccessful = true;
|
|
2284
|
-
// Post-success session I/O (sync handlers: no phantom sessions on CLI failure)
|
|
2285
1940
|
let effectiveSessionId = sessionResult.effectiveSessionId;
|
|
2286
1941
|
if (sessionResult.userProvidedSession && effectiveSessionId) {
|
|
2287
1942
|
const existing = await deps.sessionManager.getSession(effectiveSessionId);
|
|
@@ -2374,14 +2029,12 @@ export async function handleGrokRequestAsync(deps, params) {
|
|
|
2374
2029
|
return prep;
|
|
2375
2030
|
const { corrId, args, requestedMcpServers, approvalDecision } = prep;
|
|
2376
2031
|
try {
|
|
2377
|
-
// Session arg planning (pure, no I/O)
|
|
2378
2032
|
const sessionResult = resolveGrokSessionArgs({
|
|
2379
2033
|
sessionId: params.sessionId,
|
|
2380
2034
|
resumeLatest: params.resumeLatest,
|
|
2381
2035
|
createNewSession: params.createNewSession,
|
|
2382
2036
|
});
|
|
2383
2037
|
args.push(...sessionResult.resumeArgs);
|
|
2384
|
-
// Pre-start session I/O (async handlers: prevent orphaned jobs)
|
|
2385
2038
|
let effectiveSessionId = sessionResult.effectiveSessionId;
|
|
2386
2039
|
if (sessionResult.userProvidedSession && effectiveSessionId) {
|
|
2387
2040
|
const existing = await deps.sessionManager.getSession(effectiveSessionId);
|
|
@@ -2408,7 +2061,6 @@ export async function handleGrokRequestAsync(deps, params) {
|
|
|
2408
2061
|
catch (err) {
|
|
2409
2062
|
return createErrorResponse("grok_request_async", 1, "", corrId, err);
|
|
2410
2063
|
}
|
|
2411
|
-
// Start job only after all session I/O succeeds
|
|
2412
2064
|
assertUpstreamCliArgs("grok", args);
|
|
2413
2065
|
assertUpstreamCliEnv("grok", undefined);
|
|
2414
2066
|
const grokAsyncFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, effectiveSessionId, params.outputFormat);
|
|
@@ -2505,8 +2157,6 @@ export async function handleMistralRequest(deps, params) {
|
|
|
2505
2157
|
deps.logger.info(`[${corrId}] mistral_request detected stale Vibe model selection; retrying once with ${recoveryModel}`);
|
|
2506
2158
|
const retryPrep = buildMistralRetryPrep({ ...params, effectivePrompt: prep.effectivePrompt }, recoveryModel);
|
|
2507
2159
|
const retryArgs = [...retryPrep.args, ...sessionResult.resumeArgs];
|
|
2508
|
-
// Reuse the FR handoff built above — the retry preserves corrId,
|
|
2509
|
-
// so the manager's logComplete still updates the original row.
|
|
2510
2160
|
result = await awaitJobOrDefer("mistral", retryArgs, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, true, runtime, retryPrep.env, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
|
|
2511
2161
|
if (isDeferredResponse(result)) {
|
|
2512
2162
|
return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
|
|
@@ -2717,11 +2367,6 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
2717
2367
|
if (!("args" in prep))
|
|
2718
2368
|
return prep;
|
|
2719
2369
|
const { corrId, args, requestedMcpServers, approvalDecision } = prep;
|
|
2720
|
-
// U26 fix: outputSchema temp-file ownership. The cleanup callable lives in
|
|
2721
|
-
// exactly one place at a time: this scope until startJob succeeds, then
|
|
2722
|
-
// AsyncJobManager (via onComplete → persistComplete → fireOnComplete) once
|
|
2723
|
-
// the job is registered. Any code path that fails to hand it off MUST run
|
|
2724
|
-
// it locally.
|
|
2725
2370
|
const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
|
|
2726
2371
|
let prepCleanupOwnedHere = prepCleanup !== undefined;
|
|
2727
2372
|
const runPrepCleanupLocally = () => {
|
|
@@ -2736,7 +2381,6 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
2736
2381
|
}
|
|
2737
2382
|
};
|
|
2738
2383
|
try {
|
|
2739
|
-
// Pre-start session I/O (async handlers: prevent orphaned jobs)
|
|
2740
2384
|
let effectiveSessionId = params.sessionId;
|
|
2741
2385
|
if (!params.createNewSession && !params.sessionId) {
|
|
2742
2386
|
const activeSession = await deps.sessionManager.getActiveSession("codex");
|
|
@@ -2755,9 +2399,6 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
2755
2399
|
const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
|
|
2756
2400
|
effectiveSessionId = newSession.id;
|
|
2757
2401
|
}
|
|
2758
|
-
// Slice λ: resolve worktree directive after session I/O so resume reuse
|
|
2759
|
-
// can read metadata.worktreePath. A pre-startJob failure here means
|
|
2760
|
-
// prepCleanup is still owned locally; run it before returning.
|
|
2761
2402
|
let worktreeResolution = {};
|
|
2762
2403
|
try {
|
|
2763
2404
|
worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
|
|
@@ -2766,22 +2407,15 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
2766
2407
|
runPrepCleanupLocally();
|
|
2767
2408
|
return createErrorResponse("codex_request_async", 1, "", corrId, err);
|
|
2768
2409
|
}
|
|
2769
|
-
// Start job only after all session I/O succeeds. If startJob throws before
|
|
2770
|
-
// registering the record, ownership stays here and we run it in the catch.
|
|
2771
2410
|
assertUpstreamCliArgs("codex", args);
|
|
2772
2411
|
assertUpstreamCliEnv("codex", undefined);
|
|
2773
2412
|
const codexAsyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, effectiveSessionId, params.outputFormat);
|
|
2774
2413
|
let job;
|
|
2775
2414
|
try {
|
|
2776
2415
|
job = deps.asyncJobManager.startJob("codex", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup, codexAsyncFrHandoff.flightRecorderEntry, codexAsyncFrHandoff.extractUsage, true);
|
|
2777
|
-
// Handoff succeeded: AsyncJobManager will fire prepCleanup on terminal
|
|
2778
|
-
// status. Release our local ownership claim so the catch path doesn't
|
|
2779
|
-
// double-fire.
|
|
2780
2416
|
prepCleanupOwnedHere = false;
|
|
2781
2417
|
}
|
|
2782
2418
|
catch (startErr) {
|
|
2783
|
-
// startJob never stored the record → manager won't call onComplete. We
|
|
2784
|
-
// still own the cleanup; let the outer catch run it.
|
|
2785
2419
|
throw startErr;
|
|
2786
2420
|
}
|
|
2787
2421
|
deps.logger.info(`[${corrId}] codex_request_async started job ${job.id}`);
|
|
@@ -2808,42 +2442,15 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
2808
2442
|
};
|
|
2809
2443
|
}
|
|
2810
2444
|
catch (error) {
|
|
2811
|
-
// Pre-start failure: either session I/O threw, or startJob threw before
|
|
2812
|
-
// registering the record. In either case the manager will NOT fire
|
|
2813
|
-
// prepCleanup, so we must run it here.
|
|
2814
2445
|
runPrepCleanupLocally();
|
|
2815
2446
|
return createErrorResponse("codex_request_async", 1, "", corrId, error);
|
|
2816
2447
|
}
|
|
2817
2448
|
}
|
|
2818
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
2819
|
-
// Claude Code Tool
|
|
2820
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
2821
2449
|
export function createGatewayServer(deps = {}) {
|
|
2822
2450
|
const runtime = resolveGatewayServerRuntime(deps, { isolateState: true });
|
|
2823
2451
|
const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger, persistence, flightRecorder, cacheAwareness, } = runtime;
|
|
2824
|
-
// `flightRecorder` is destructured into closure scope so the session_get
|
|
2825
|
-
// handler (see ~line 5590) has the FlightRecorderQuery read capability
|
|
2826
|
-
// available without re-resolving runtime. Slice 2 will populate the
|
|
2827
|
-
// `cacheState` field of session_get's response from this read surface.
|
|
2828
|
-
// `cacheAwareness` is the loaded [cache_awareness] block (config.ts).
|
|
2829
2452
|
void flightRecorder;
|
|
2830
2453
|
void cacheAwareness;
|
|
2831
|
-
// Structural invariant: tools register iff ALL THREE conditions hold:
|
|
2832
|
-
// (1) persistence.backend !== "none" — the operator/config has not
|
|
2833
|
-
// explicitly disabled durable persistence;
|
|
2834
|
-
// (2) persistence.asyncJobsEnabled === true — the derived opt-in flag
|
|
2835
|
-
// agrees (loadPersistenceConfig sets this iff backend is one of
|
|
2836
|
-
// sqlite/postgres/memory);
|
|
2837
|
-
// (3) asyncJobManager.hasStore() === true — the runtime manager
|
|
2838
|
-
// actually has a store attached (isolate-mode runtimes use null).
|
|
2839
|
-
//
|
|
2840
|
-
// Each guard closes a distinct re-entry path for the silent-loss footgun:
|
|
2841
|
-
// - Without (1), a caller can inject {backend:'none', asyncJobsEnabled:true}
|
|
2842
|
-
// and re-advertise the async tools while reporting backend='none' in
|
|
2843
|
-
// llm_process_health — exactly contradicting SPEC CLAIM 4f.
|
|
2844
|
-
// - Without (2), config that opts out is ignored.
|
|
2845
|
-
// - Without (3), a null-store manager (isolate-mode / HTTP per-session)
|
|
2846
|
-
// accepts registrations that have nowhere to persist results.
|
|
2847
2454
|
const asyncJobsEnabled = persistence.backend !== "none" && persistence.asyncJobsEnabled && asyncJobManager.hasStore();
|
|
2848
2455
|
const server = newGatewayMcpServer();
|
|
2849
2456
|
registerBaseResources(server, runtime);
|
|
@@ -2880,7 +2487,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
2880
2487
|
.enum(CLAUDE_PERMISSION_MODES)
|
|
2881
2488
|
.optional()
|
|
2882
2489
|
.describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op (no flag emitted)."),
|
|
2883
|
-
// U25 — Claude high-impact features
|
|
2884
2490
|
agent: z
|
|
2885
2491
|
.string()
|
|
2886
2492
|
.optional()
|
|
@@ -2920,7 +2526,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
2920
2526
|
.boolean()
|
|
2921
2527
|
.optional()
|
|
2922
2528
|
.describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
|
|
2923
|
-
// Phase 4 slice η — Claude reliability + structured-output parity
|
|
2924
2529
|
fallbackModel: z
|
|
2925
2530
|
.string()
|
|
2926
2531
|
.min(1)
|
|
@@ -2930,12 +2535,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
2930
2535
|
.union([z.string(), z.record(z.string(), z.unknown())])
|
|
2931
2536
|
.optional()
|
|
2932
2537
|
.describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
|
|
2933
|
-
// Phase 4 slice ζ — Claude additional-workspace-dirs parity
|
|
2934
2538
|
addDir: z
|
|
2935
2539
|
.array(z.string())
|
|
2936
2540
|
.optional()
|
|
2937
2541
|
.describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
|
|
2938
|
-
// Claude session / settings / tools surface (2.x)
|
|
2939
2542
|
noSessionPersistence: z
|
|
2940
2543
|
.boolean()
|
|
2941
2544
|
.optional()
|
|
@@ -3028,18 +2631,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
3028
2631
|
const { corrId, args } = prep;
|
|
3029
2632
|
let durationMs = 0;
|
|
3030
2633
|
let wasSuccessful = false;
|
|
3031
|
-
// Session resolution happens BEFORE safeFlightStart so that:
|
|
3032
|
-
// (1) the TTL warning reads the PRIOR session's lastWriteAt
|
|
3033
|
-
// rather than the row about to be inserted (codex-r1/F1).
|
|
3034
|
-
// (2) the flight-recorder row is tagged with effectiveSessionId
|
|
3035
|
-
// (the session the CLI will actually resume), not the raw
|
|
3036
|
-
// user-provided sessionId.
|
|
3037
2634
|
let effectiveSessionId = sessionId;
|
|
3038
2635
|
let useContinue = continueSession;
|
|
3039
|
-
// Guard the active-session lookup: in some test harnesses the
|
|
3040
|
-
// sessionManager is undefined; the original try-catch wrapped this
|
|
3041
|
-
// block, so we replicate that tolerance here. Failure leaves
|
|
3042
|
-
// effectiveSessionId as the user-provided sessionId.
|
|
3043
2636
|
let activeSession = null;
|
|
3044
2637
|
try {
|
|
3045
2638
|
activeSession = await sessionManager.getActiveSession("claude");
|
|
@@ -3054,16 +2647,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
3054
2647
|
if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
|
|
3055
2648
|
useContinue = true;
|
|
3056
2649
|
}
|
|
3057
|
-
// Slice 3: if the resolved session has a near-expiry Anthropic
|
|
3058
|
-
// cache breakpoint, attach a structured warning (NOT a hard error)
|
|
3059
|
-
// to the response. Computed BEFORE safeFlightStart so the current
|
|
3060
|
-
// row does not skew lastRequestAt.
|
|
3061
2650
|
const ttlWarning = maybeBuildCacheTtlWarning({
|
|
3062
2651
|
runtime,
|
|
3063
2652
|
sessionId: effectiveSessionId,
|
|
3064
2653
|
cli: "claude",
|
|
3065
2654
|
});
|
|
3066
|
-
// Rec #4: include any prep-time warnings (e.g. cacheable_prefix_uncached).
|
|
3067
2655
|
const warnings = [
|
|
3068
2656
|
...(ttlWarning ? [ttlWarning] : []),
|
|
3069
2657
|
...(prep.warnings ?? []),
|
|
@@ -3087,8 +2675,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3087
2675
|
args.push("--session-id", effectiveSessionId);
|
|
3088
2676
|
await sessionManager.updateSessionUsage(effectiveSessionId);
|
|
3089
2677
|
}
|
|
3090
|
-
// Slice λ: resolve worktree directive into spawn cwd. Done after
|
|
3091
|
-
// session resolution so resume reuse can read metadata.worktreePath.
|
|
3092
2678
|
let worktreeResolution = {};
|
|
3093
2679
|
try {
|
|
3094
2680
|
worktreeResolution = await resolveWorktreeForRequest(worktree, effectiveSessionId, runtime);
|
|
@@ -3096,11 +2682,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
3096
2682
|
catch (err) {
|
|
3097
2683
|
return createErrorResponse("claude_request", 1, "", corrId, err);
|
|
3098
2684
|
}
|
|
3099
|
-
// Idle timeout only for stream-json (text/json produce no output until done)
|
|
3100
2685
|
const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
|
|
3101
2686
|
const claudeSyncFrHandoff = buildAsyncFlightRecorderHandoff("claude", prep, effectiveSessionId, outputFormat);
|
|
3102
2687
|
const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh, runtime, undefined, undefined, claudeSyncFrHandoff.flightRecorderEntry, claudeSyncFrHandoff.extractUsage, prep.stdinPayload, worktreeResolution.cwd);
|
|
3103
|
-
// Deferred — job still running, return async reference
|
|
3104
2688
|
if (isDeferredResponse(result)) {
|
|
3105
2689
|
return buildDeferredToolResponse(result, effectiveSessionId);
|
|
3106
2690
|
}
|
|
@@ -3118,9 +2702,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3118
2702
|
errorMessage: stderr || `Exit code ${code}`,
|
|
3119
2703
|
status: "failed",
|
|
3120
2704
|
}, runtime);
|
|
3121
|
-
// Slice 3: attach any computed warnings to the error response so
|
|
3122
|
-
// the caller still sees cache_ttl_expiring_soon when the CLI
|
|
3123
|
-
// happens to fail for an unrelated reason.
|
|
3124
2705
|
const errResp = createErrorResponse("claude", code, stderr, corrId);
|
|
3125
2706
|
if (warnings.length > 0) {
|
|
3126
2707
|
errResp.warnings = warnings;
|
|
@@ -3128,7 +2709,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3128
2709
|
return errResp;
|
|
3129
2710
|
}
|
|
3130
2711
|
wasSuccessful = true;
|
|
3131
|
-
// If we used a session ID and it's not tracked yet, create a session record
|
|
3132
2712
|
if (effectiveSessionId) {
|
|
3133
2713
|
const existingSession = await sessionManager.getSession(effectiveSessionId);
|
|
3134
2714
|
if (!existingSession) {
|
|
@@ -3136,7 +2716,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3136
2716
|
}
|
|
3137
2717
|
}
|
|
3138
2718
|
logger.info(`[${corrId}] claude_request completed successfully in ${durationMs}ms`);
|
|
3139
|
-
// Parse stream-json NDJSON output to extract result text
|
|
3140
2719
|
if (outputFormat === "stream-json") {
|
|
3141
2720
|
const parsed = parseStreamJson(stdout);
|
|
3142
2721
|
if (parsed.costUsd !== null) {
|
|
@@ -3203,9 +2782,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3203
2782
|
performanceMetrics.recordRequest("claude", finalizedDurationMs, wasSuccessful);
|
|
3204
2783
|
}
|
|
3205
2784
|
});
|
|
3206
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3207
|
-
// Codex Tool
|
|
3208
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3209
2785
|
server.tool("codex_request", {
|
|
3210
2786
|
prompt: z
|
|
3211
2787
|
.string()
|
|
@@ -3270,14 +2846,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
3270
2846
|
.boolean()
|
|
3271
2847
|
.default(false)
|
|
3272
2848
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3273
|
-
// U23: emit `--json` so the codex-json-parser surfaces input/output/cache
|
|
3274
|
-
// tokens (and any cost) through extractUsageAndCost. Without "json", the
|
|
3275
|
-
// parser is unreachable and Codex usage is never reported.
|
|
3276
2849
|
outputFormat: z
|
|
3277
2850
|
.enum(["text", "json"])
|
|
3278
2851
|
.default("text")
|
|
3279
2852
|
.describe("Codex output format. `json` emits --json (JSONL events) so token usage and cost are parsed and reported in the flight recorder. `text` is the default."),
|
|
3280
|
-
// U26: high-impact feature flags. All optional.
|
|
3281
2853
|
outputSchema: z
|
|
3282
2854
|
.union([z.string(), z.record(z.string(), z.unknown())])
|
|
3283
2855
|
.optional()
|
|
@@ -3307,7 +2879,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3307
2879
|
.boolean()
|
|
3308
2880
|
.optional()
|
|
3309
2881
|
.describe("Codex --ignore-rules: skip project rule files for this run."),
|
|
3310
|
-
// Phase 4 slice ζ — Codex working-dir + add-dir parity (new sessions only).
|
|
3311
2882
|
workingDir: z
|
|
3312
2883
|
.string()
|
|
3313
2884
|
.min(1)
|
|
@@ -3365,15 +2936,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3365
2936
|
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
3366
2937
|
}, runtime);
|
|
3367
2938
|
logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prep.effectivePrompt.length}`);
|
|
3368
|
-
// U26 fix: pass the outputSchema cleanup to awaitJobOrDefer, which
|
|
3369
|
-
// guarantees the cleanup runs exactly once — inline for direct
|
|
3370
|
-
// execution, on terminal status for the job-backed path (sync
|
|
3371
|
-
// completion or deferred). The outer finally MUST NOT clean again.
|
|
3372
2939
|
const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
|
|
3373
|
-
// Slice λ: resolve worktree directive into spawn cwd. Codex has no
|
|
3374
|
-
// in-handler session resolution prior to spawn (session lookup is
|
|
3375
|
-
// lazy via `codex exec resume`), so the user-supplied sessionId is
|
|
3376
|
-
// the only reuse key.
|
|
3377
2940
|
let worktreeResolution = {};
|
|
3378
2941
|
try {
|
|
3379
2942
|
worktreeResolution = await resolveWorktreeForRequest(worktree, sessionId, runtime);
|
|
@@ -3384,8 +2947,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3384
2947
|
try {
|
|
3385
2948
|
const codexSyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, sessionId, outputFormat);
|
|
3386
2949
|
const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), outputFormat, forceRefresh, runtime, undefined, prepCleanup, codexSyncFrHandoff.flightRecorderEntry, codexSyncFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
|
|
3387
|
-
// Deferred — job still running, return async reference. Cleanup
|
|
3388
|
-
// ownership belongs to AsyncJobManager via onComplete.
|
|
3389
2950
|
if (isDeferredResponse(result)) {
|
|
3390
2951
|
return buildDeferredToolResponse(result, sessionId);
|
|
3391
2952
|
}
|
|
@@ -3406,7 +2967,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3406
2967
|
return createErrorResponse("codex", code, stderr, corrId);
|
|
3407
2968
|
}
|
|
3408
2969
|
wasSuccessful = true;
|
|
3409
|
-
// Track session usage
|
|
3410
2970
|
let effectiveSessionId = sessionId;
|
|
3411
2971
|
if (!createNewSession && !sessionId) {
|
|
3412
2972
|
const activeSession = await sessionManager.getActiveSession("codex");
|
|
@@ -3468,12 +3028,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
3468
3028
|
finally {
|
|
3469
3029
|
const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
|
|
3470
3030
|
performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
|
|
3471
|
-
// Cleanup is owned by awaitJobOrDefer's contract; nothing to do here.
|
|
3472
3031
|
}
|
|
3473
3032
|
});
|
|
3474
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3475
|
-
// U26: codex_fork_session — `codex fork <SESSION_ID|--last> <prompt>`
|
|
3476
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3477
3033
|
server.tool("codex_fork_session", {
|
|
3478
3034
|
prompt: z
|
|
3479
3035
|
.string()
|
|
@@ -3510,8 +3066,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3510
3066
|
const startTime = Date.now();
|
|
3511
3067
|
let durationMs = 0;
|
|
3512
3068
|
let wasSuccessful = false;
|
|
3513
|
-
// Enforce mutual exclusion at tool boundary (Zod records the params but
|
|
3514
|
-
// the SDK's `.tool(...)` does not accept top-level refines).
|
|
3515
3069
|
if (sessionId && forkLast) {
|
|
3516
3070
|
return createErrorResponse("codex_fork_session", 1, "", corrId, new Error("sessionId and forkLast are mutually exclusive"));
|
|
3517
3071
|
}
|
|
@@ -3527,11 +3081,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3527
3081
|
}
|
|
3528
3082
|
const cliInfo = getCliInfo();
|
|
3529
3083
|
const resolvedModel = resolveModelAlias("codex", model, cliInfo);
|
|
3530
|
-
// Compose argv: forkArgs already starts with `fork`. Inject model and
|
|
3531
|
-
// sandbox/approval flags BEFORE the positional <sessionId|--last> +
|
|
3532
|
-
// prompt to keep them as flags rather than positionals. forkArgs layout
|
|
3533
|
-
// is either ["fork", "--last", prompt] or ["fork", sessionId, prompt];
|
|
3534
|
-
// we splice flags right after "fork".
|
|
3535
3084
|
const flagSegment = [];
|
|
3536
3085
|
if (resolvedModel)
|
|
3537
3086
|
flagSegment.push("--model", resolvedModel);
|
|
@@ -3568,9 +3117,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3568
3117
|
performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
|
|
3569
3118
|
}
|
|
3570
3119
|
});
|
|
3571
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3572
|
-
// Gemini Tool
|
|
3573
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3574
3120
|
server.tool("gemini_request", {
|
|
3575
3121
|
prompt: z
|
|
3576
3122
|
.string()
|
|
@@ -3621,11 +3167,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3621
3167
|
.boolean()
|
|
3622
3168
|
.default(false)
|
|
3623
3169
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3624
|
-
// U23: emit `-o json` to extract token usage via parseGeminiJson. Default
|
|
3625
|
-
// remains text so existing callers see no behavior change. Phase 4 slice
|
|
3626
|
-
// ε adds `stream-json` (NDJSON event stream parsed by
|
|
3627
|
-
// parseGeminiStreamJson — `init`/`message`/`result` lines, idle-timeout
|
|
3628
|
-
// semantics covered by Gemini's existing real-time stdout streaming).
|
|
3629
3170
|
outputFormat: z
|
|
3630
3171
|
.enum(["text", "json", "stream-json"])
|
|
3631
3172
|
.default("text")
|
|
@@ -3672,9 +3213,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3672
3213
|
worktree,
|
|
3673
3214
|
});
|
|
3674
3215
|
});
|
|
3675
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3676
|
-
// Grok Tool
|
|
3677
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3678
3216
|
server.tool("grok_request", {
|
|
3679
3217
|
prompt: z
|
|
3680
3218
|
.string()
|
|
@@ -3745,13 +3283,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
3745
3283
|
.default(false)
|
|
3746
3284
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3747
3285
|
maxTurns: MAX_TURNS_SCHEMA.optional().describe("Grok `--max-turns N`: cap on agent-loop iterations for cost / latency control (Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
|
|
3748
|
-
// Phase 4 slice ζ — Grok working-directory parity.
|
|
3749
3286
|
workingDir: z
|
|
3750
3287
|
.string()
|
|
3751
3288
|
.min(1)
|
|
3752
3289
|
.optional()
|
|
3753
3290
|
.describe("Grok --cwd <DIR>: working directory for this invocation. Lets headless callers run Grok against a directory other than the gateway process's cwd."),
|
|
3754
|
-
// Phase 4 slice θ — Grok HIGH parity (sandbox, rules, system-prompt-override, allow, deny).
|
|
3755
3291
|
sandbox: z
|
|
3756
3292
|
.string()
|
|
3757
3293
|
.min(1)
|
|
@@ -3819,9 +3355,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3819
3355
|
worktree,
|
|
3820
3356
|
});
|
|
3821
3357
|
});
|
|
3822
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3823
|
-
// Mistral Vibe Tool
|
|
3824
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3825
3358
|
server.tool("mistral_request", {
|
|
3826
3359
|
prompt: z
|
|
3827
3360
|
.string()
|
|
@@ -3892,7 +3425,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3892
3425
|
maxTurns: MAX_TURNS_SCHEMA.optional().describe("Vibe `--max-turns N`: cap the agent-loop iteration count (programmatic mode only, Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
|
|
3893
3426
|
maxPrice: MAX_PRICE_SCHEMA.optional().describe("Vibe `--max-price DOLLARS`: interrupt the session when cumulative cost crosses this cap (programmatic mode only, Phase 4 slice δ). Bounded to finite values ≤ 10000 USD."),
|
|
3894
3427
|
maxTokens: MAX_TOKENS_SCHEMA.optional().describe("Vibe `--max-tokens N`: cap cumulative prompt + completion tokens for the session (programmatic mode only). Bounded to safe integers ≤ 100000000."),
|
|
3895
|
-
// Phase 4 slice ζ — Vibe working-directory + additional-dirs parity.
|
|
3896
3428
|
workingDir: z
|
|
3897
3429
|
.string()
|
|
3898
3430
|
.min(1)
|
|
@@ -3932,16 +3464,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3932
3464
|
worktree,
|
|
3933
3465
|
});
|
|
3934
3466
|
});
|
|
3935
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3936
|
-
// Async Long-Running Job Tools (No Time-Bound LLM Execution)
|
|
3937
|
-
//
|
|
3938
|
-
// STRUCTURAL INVARIANT: these tools are only registered when a real job
|
|
3939
|
-
// store is attached (`persistence.asyncJobsEnabled === true`). When the
|
|
3940
|
-
// operator has configured `[persistence].backend = "none"`, none of the
|
|
3941
|
-
// *_request_async / llm_job_* tools exist in the MCP tool list at all —
|
|
3942
|
-
// orchestrating agents get a clean "tool not found" signal at connect
|
|
3943
|
-
// time instead of silent in-memory loss after the 1-hour TTL.
|
|
3944
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
3945
3467
|
if (asyncJobsEnabled) {
|
|
3946
3468
|
server.tool("claude_request_async", {
|
|
3947
3469
|
prompt: z
|
|
@@ -3975,7 +3497,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
3975
3497
|
.enum(CLAUDE_PERMISSION_MODES)
|
|
3976
3498
|
.optional()
|
|
3977
3499
|
.describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op."),
|
|
3978
|
-
// U25 — Claude high-impact features
|
|
3979
3500
|
agent: z
|
|
3980
3501
|
.string()
|
|
3981
3502
|
.optional()
|
|
@@ -4015,7 +3536,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4015
3536
|
.boolean()
|
|
4016
3537
|
.optional()
|
|
4017
3538
|
.describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
|
|
4018
|
-
// Phase 4 slice η — Claude reliability + structured-output parity
|
|
4019
3539
|
fallbackModel: z
|
|
4020
3540
|
.string()
|
|
4021
3541
|
.min(1)
|
|
@@ -4025,12 +3545,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
4025
3545
|
.union([z.string(), z.record(z.string(), z.unknown())])
|
|
4026
3546
|
.optional()
|
|
4027
3547
|
.describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
|
|
4028
|
-
// Phase 4 slice ζ — Claude additional-workspace-dirs parity
|
|
4029
3548
|
addDir: z
|
|
4030
3549
|
.array(z.string())
|
|
4031
3550
|
.optional()
|
|
4032
3551
|
.describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
|
|
4033
|
-
// Claude session / settings / tools surface (2.x)
|
|
4034
3552
|
noSessionPersistence: z
|
|
4035
3553
|
.boolean()
|
|
4036
3554
|
.optional()
|
|
@@ -4120,7 +3638,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4120
3638
|
return prep;
|
|
4121
3639
|
const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
|
|
4122
3640
|
try {
|
|
4123
|
-
// Session management (before job start for async)
|
|
4124
3641
|
let effectiveSessionId = sessionId;
|
|
4125
3642
|
let useContinue = continueSession;
|
|
4126
3643
|
const activeSession = await sessionManager.getActiveSession("claude");
|
|
@@ -4144,14 +3661,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
4144
3661
|
await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
|
|
4145
3662
|
}
|
|
4146
3663
|
}
|
|
4147
|
-
// Slice 3: TTL warning on resume (async path too).
|
|
4148
3664
|
const ttlWarning = maybeBuildCacheTtlWarning({
|
|
4149
3665
|
runtime,
|
|
4150
3666
|
sessionId: effectiveSessionId,
|
|
4151
3667
|
cli: "claude",
|
|
4152
3668
|
});
|
|
4153
|
-
// Slice λ: resolve worktree directive after session metadata is
|
|
4154
|
-
// settled so resume reuse can read metadata.worktreePath.
|
|
4155
3669
|
let worktreeResolution = {};
|
|
4156
3670
|
try {
|
|
4157
3671
|
worktreeResolution = await resolveWorktreeForRequest(worktree, effectiveSessionId, runtime);
|
|
@@ -4159,7 +3673,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4159
3673
|
catch (err) {
|
|
4160
3674
|
return createErrorResponse("claude_request_async", 1, "", corrId, err);
|
|
4161
3675
|
}
|
|
4162
|
-
// Idle timeout only for stream-json (text/json produce no output until done)
|
|
4163
3676
|
const effectiveIdleTimeout = outputFormat === "stream-json"
|
|
4164
3677
|
? resolveIdleTimeout("claude", idleTimeoutMs)
|
|
4165
3678
|
: undefined;
|
|
@@ -4185,8 +3698,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4185
3698
|
if (worktreeResolution.worktreePath) {
|
|
4186
3699
|
asyncResponse.worktreePath = worktreeResolution.worktreePath;
|
|
4187
3700
|
}
|
|
4188
|
-
// Rec #4: include any prep-time warnings (e.g.
|
|
4189
|
-
// cacheable_prefix_uncached) alongside ttlWarning.
|
|
4190
3701
|
const mergedWarnings = [
|
|
4191
3702
|
...(ttlWarning ? [ttlWarning] : []),
|
|
4192
3703
|
...(prep.warnings ?? []),
|
|
@@ -4270,12 +3781,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
4270
3781
|
.boolean()
|
|
4271
3782
|
.default(false)
|
|
4272
3783
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
4273
|
-
// U23: emit `--json` to enable JSONL event-stream parsing for token usage.
|
|
4274
3784
|
outputFormat: z
|
|
4275
3785
|
.enum(["text", "json"])
|
|
4276
3786
|
.default("text")
|
|
4277
3787
|
.describe("Codex output format. `json` emits --json (JSONL events) for token usage extraction."),
|
|
4278
|
-
// U26: high-impact feature flags. All optional.
|
|
4279
3788
|
outputSchema: z
|
|
4280
3789
|
.union([z.string(), z.record(z.string(), z.unknown())])
|
|
4281
3790
|
.optional()
|
|
@@ -4290,7 +3799,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4290
3799
|
images: z.array(z.string()).optional().describe("Codex -i <path>: image attachments."),
|
|
4291
3800
|
ignoreUserConfig: z.boolean().optional().describe("Codex --ignore-user-config."),
|
|
4292
3801
|
ignoreRules: z.boolean().optional().describe("Codex --ignore-rules."),
|
|
4293
|
-
// Phase 4 slice ζ — Codex working-dir + add-dir parity (new sessions only).
|
|
4294
3802
|
workingDir: z
|
|
4295
3803
|
.string()
|
|
4296
3804
|
.min(1)
|
|
@@ -4387,11 +3895,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4387
3895
|
.boolean()
|
|
4388
3896
|
.default(false)
|
|
4389
3897
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
4390
|
-
// U23: emit `-o json` to extract token usage via parseGeminiJson. Default
|
|
4391
|
-
// remains text so existing callers see no behavior change. Phase 4 slice
|
|
4392
|
-
// ε adds `stream-json` (NDJSON event stream parsed by
|
|
4393
|
-
// parseGeminiStreamJson — `init`/`message`/`result` lines, idle-timeout
|
|
4394
|
-
// semantics covered by Gemini's existing real-time stdout streaming).
|
|
4395
3898
|
outputFormat: z
|
|
4396
3899
|
.enum(["text", "json", "stream-json"])
|
|
4397
3900
|
.default("text")
|
|
@@ -4506,13 +4009,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
4506
4009
|
.default(false)
|
|
4507
4010
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
4508
4011
|
maxTurns: MAX_TURNS_SCHEMA.optional().describe("Grok `--max-turns N`: cap on agent-loop iterations for cost / latency control (Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
|
|
4509
|
-
// Phase 4 slice ζ — Grok working-directory parity.
|
|
4510
4012
|
workingDir: z
|
|
4511
4013
|
.string()
|
|
4512
4014
|
.min(1)
|
|
4513
4015
|
.optional()
|
|
4514
4016
|
.describe("Grok --cwd <DIR>: working directory for this invocation. Lets headless callers run Grok against a directory other than the gateway process's cwd."),
|
|
4515
|
-
// Phase 4 slice θ — Grok HIGH parity (sandbox, rules, system-prompt-override, allow, deny).
|
|
4516
4017
|
sandbox: z
|
|
4517
4018
|
.string()
|
|
4518
4019
|
.min(1)
|
|
@@ -4648,7 +4149,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4648
4149
|
maxTurns: MAX_TURNS_SCHEMA.optional().describe("Vibe `--max-turns N`: cap the agent-loop iteration count (programmatic mode only, Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
|
|
4649
4150
|
maxPrice: MAX_PRICE_SCHEMA.optional().describe("Vibe `--max-price DOLLARS`: interrupt the session when cumulative cost crosses this cap (programmatic mode only, Phase 4 slice δ). Bounded to finite values ≤ 10000 USD."),
|
|
4650
4151
|
maxTokens: MAX_TOKENS_SCHEMA.optional().describe("Vibe `--max-tokens N`: cap cumulative prompt + completion tokens for the session (programmatic mode only). Bounded to safe integers ≤ 100000000."),
|
|
4651
|
-
// Phase 4 slice ζ — Vibe working-directory + additional-dirs parity.
|
|
4652
4152
|
workingDir: z
|
|
4653
4153
|
.string()
|
|
4654
4154
|
.min(1)
|
|
@@ -4744,7 +4244,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4744
4244
|
isError: true,
|
|
4745
4245
|
};
|
|
4746
4246
|
}
|
|
4747
|
-
// Parse stream-json output for Claude async jobs
|
|
4748
4247
|
const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
|
|
4749
4248
|
let parsed;
|
|
4750
4249
|
if (outputFormat === "stream-json" && result.stdout) {
|
|
@@ -4804,14 +4303,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
4804
4303
|
],
|
|
4805
4304
|
};
|
|
4806
4305
|
});
|
|
4807
|
-
}
|
|
4808
|
-
// Read back any persisted request (sync OR async) by its correlation id.
|
|
4809
|
-
// Registered unconditionally — it reads the flight recorder, which is
|
|
4810
|
-
// independent of async-job persistence. Every sync/async response echoes
|
|
4811
|
-
// its id in `structuredContent.correlationId`; pass that id here to recover
|
|
4812
|
-
// the persisted prompt/response after the inline result is gone. With flight
|
|
4813
|
-
// recording disabled (LLM_GATEWAY_LOGS_DB=none → NoopFlightRecorder) the
|
|
4814
|
-
// query yields no rows and this returns the "not found" shape.
|
|
4306
|
+
}
|
|
4815
4307
|
server.tool("llm_request_result", {
|
|
4816
4308
|
correlationId: z
|
|
4817
4309
|
.string()
|
|
@@ -4882,9 +4374,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4882
4374
|
],
|
|
4883
4375
|
};
|
|
4884
4376
|
});
|
|
4885
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
4886
|
-
// Approval Audit Tools
|
|
4887
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
4888
4377
|
server.tool("approval_list", {
|
|
4889
4378
|
limit: z
|
|
4890
4379
|
.number()
|
|
@@ -4912,9 +4401,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4912
4401
|
],
|
|
4913
4402
|
};
|
|
4914
4403
|
});
|
|
4915
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
4916
|
-
// List Models Tool
|
|
4917
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
4918
4404
|
server.tool("list_models", {
|
|
4919
4405
|
cli: z
|
|
4920
4406
|
.preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
|
|
@@ -4993,9 +4479,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
4993
4479
|
};
|
|
4994
4480
|
}
|
|
4995
4481
|
});
|
|
4996
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
4997
|
-
// Session Management Tools
|
|
4998
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
4999
4482
|
server.tool("session_create", {
|
|
5000
4483
|
cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
|
|
5001
4484
|
description: z.string().optional().describe("Session description"),
|
|
@@ -5171,15 +4654,6 @@ export function createGatewayServer(deps = {}) {
|
|
|
5171
4654
|
};
|
|
5172
4655
|
}
|
|
5173
4656
|
const activeSession = await sessionManager.getActiveSession(session.cli);
|
|
5174
|
-
// Slice 2: project a compact cacheState view from the flight
|
|
5175
|
-
// recorder at read time. NOT persisted on the Session interface
|
|
5176
|
-
// (sessions.json stays content-free per the project invariant).
|
|
5177
|
-
// The field is OMITTED entirely (not null, not empty object) when
|
|
5178
|
-
// the session has zero rows in the flight recorder so the response
|
|
5179
|
-
// stays compact for fresh sessions.
|
|
5180
|
-
//
|
|
5181
|
-
// Slice 3: include ttlRemainingMs derived from the gateway's
|
|
5182
|
-
// configured TTL policy. Null for non-claude sessions.
|
|
5183
4657
|
let cacheState;
|
|
5184
4658
|
try {
|
|
5185
4659
|
const stats = computeSessionCacheStats(flightRecorder, session.id);
|
|
@@ -5248,16 +4722,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
5248
4722
|
});
|
|
5249
4723
|
return server;
|
|
5250
4724
|
}
|
|
5251
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
5252
|
-
// Async Initialization
|
|
5253
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
5254
4725
|
async function initializeSessionManager() {
|
|
5255
4726
|
const config = loadConfig();
|
|
5256
|
-
// Slice λ: file-backed sessions get a cleanup hook that tears down any
|
|
5257
|
-
// git worktrees recorded on session.metadata.worktreePath. PG-backed
|
|
5258
|
-
// sessions skip the hook (multi-tenant deployments don't necessarily
|
|
5259
|
-
// own a single filesystem); revisit if/when worktree support extends
|
|
5260
|
-
// there.
|
|
5261
4727
|
const worktreeCleanupHook = createWorktreeSessionCleanupHook(logger);
|
|
5262
4728
|
if (config.database) {
|
|
5263
4729
|
logger.info("Initializing PostgreSQL session manager");
|
|
@@ -5275,9 +4741,6 @@ async function initializeSessionManager() {
|
|
|
5275
4741
|
}
|
|
5276
4742
|
resourceProvider = new ResourceProvider(sessionManager, performanceMetrics, getFlightRecorder(logger), getCacheAwarenessConfig(logger));
|
|
5277
4743
|
}
|
|
5278
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
5279
|
-
// Health Check Resource (only if using PostgreSQL)
|
|
5280
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
5281
4744
|
function registerHealthResource(server) {
|
|
5282
4745
|
if (db) {
|
|
5283
4746
|
server.registerResource("health", "health://status", {
|
|
@@ -5298,7 +4761,6 @@ function registerHealthResource(server) {
|
|
|
5298
4761
|
});
|
|
5299
4762
|
logger.info("Health check resource registered");
|
|
5300
4763
|
}
|
|
5301
|
-
// Process health resource (always available, not dependent on DB)
|
|
5302
4764
|
server.registerResource("process-health", "metrics://process-health", {
|
|
5303
4765
|
title: "Process Health",
|
|
5304
4766
|
description: "Async job health (CPU, memory, zombie detection)",
|
|
@@ -5317,13 +4779,9 @@ function registerHealthResource(server) {
|
|
|
5317
4779
|
});
|
|
5318
4780
|
logger.info("Process health resource registered");
|
|
5319
4781
|
}
|
|
5320
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
5321
|
-
// Graceful Shutdown
|
|
5322
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
5323
4782
|
async function shutdown(signal) {
|
|
5324
4783
|
logger.info(`Received ${signal}, shutting down gracefully...`);
|
|
5325
4784
|
try {
|
|
5326
|
-
// Kill all active process groups (SIGTERM → wait 3s → SIGKILL)
|
|
5327
4785
|
await killAllProcessGroups();
|
|
5328
4786
|
logger.info("All process groups terminated");
|
|
5329
4787
|
if (activeHttpGateway) {
|
|
@@ -5353,9 +4811,6 @@ async function shutdown(signal) {
|
|
|
5353
4811
|
}
|
|
5354
4812
|
process.on("SIGTERM", () => shutdown("SIGTERM"));
|
|
5355
4813
|
process.on("SIGINT", () => shutdown("SIGINT"));
|
|
5356
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
5357
|
-
// Server Startup
|
|
5358
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
5359
4814
|
async function main() {
|
|
5360
4815
|
startWindowsBootstrapperSelfHeal();
|
|
5361
4816
|
const args = process.argv.slice(2);
|
|
@@ -5419,7 +4874,6 @@ async function main() {
|
|
|
5419
4874
|
process.env.MCP_TRANSPORT ||
|
|
5420
4875
|
"stdio";
|
|
5421
4876
|
logger.info(`Starting llm-cli-gateway MCP server with ${transportMode} transport`);
|
|
5422
|
-
// Initialize session manager first
|
|
5423
4877
|
await initializeSessionManager();
|
|
5424
4878
|
const serverDeps = {
|
|
5425
4879
|
sessionManager,
|
|
@@ -5446,14 +4900,11 @@ async function main() {
|
|
|
5446
4900
|
activeServer = createGatewayServer({
|
|
5447
4901
|
...serverDeps,
|
|
5448
4902
|
});
|
|
5449
|
-
// Register health check resource if using PostgreSQL
|
|
5450
4903
|
registerHealthResource(activeServer);
|
|
5451
4904
|
const transport = new StdioServerTransport();
|
|
5452
4905
|
await activeServer.connect(transport);
|
|
5453
4906
|
logger.info("llm-cli-gateway MCP server connected and ready");
|
|
5454
4907
|
}
|
|
5455
|
-
// Guard: only auto-start when run directly (not imported for testing)
|
|
5456
|
-
// Resolve symlinks so `llm-cli-gateway` (npm-linked bin) matches import.meta.url
|
|
5457
4908
|
const __entryUrl = entrypointFileURL(process.argv[1]);
|
|
5458
4909
|
if (__entryUrl === import.meta.url) {
|
|
5459
4910
|
main().catch(error => {
|