llm-cli-gateway 1.13.2 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +371 -44
- package/dist/async-job-manager.d.ts +15 -1
- package/dist/async-job-manager.js +31 -6
- package/dist/cache-stats.d.ts +26 -0
- package/dist/cache-stats.js +45 -2
- package/dist/executor.d.ts +8 -0
- package/dist/executor.js +7 -2
- package/dist/flight-recorder.d.ts +7 -0
- package/dist/flight-recorder.js +27 -2
- package/dist/index.d.ts +126 -1
- package/dist/index.js +480 -50
- package/dist/prompt-parts.d.ts +74 -0
- package/dist/prompt-parts.js +47 -0
- package/dist/session-manager.d.ts +20 -2
- package/dist/session-manager.js +28 -3
- package/dist/upstream-contracts.d.ts +8 -1
- package/dist/upstream-contracts.js +37 -1
- package/dist/worktree-manager.d.ts +41 -0
- package/dist/worktree-manager.js +214 -0
- package/package.json +2 -1
package/dist/index.js
CHANGED
|
@@ -13,10 +13,11 @@ import { parseGeminiJson, parseGeminiStreamJson } from "./gemini-json-parser.js"
|
|
|
13
13
|
import { parseVibeMetaJson } from "./mistral-meta-json-parser.js";
|
|
14
14
|
import { homedir } from "os";
|
|
15
15
|
import { createSessionManager } from "./session-manager.js";
|
|
16
|
+
import { createWorktree, createWorktreeSessionCleanupHook, } from "./worktree-manager.js";
|
|
16
17
|
import { ResourceProvider } from "./resources.js";
|
|
17
18
|
import { PerformanceMetrics } from "./metrics.js";
|
|
18
19
|
import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse as optimizeResponseText, } from "./optimizer.js";
|
|
19
|
-
import { loadConfig, loadPersistenceConfig, loadCacheAwarenessConfig, } from "./config.js";
|
|
20
|
+
import { loadConfig, loadPersistenceConfig, loadCacheAwarenessConfig, minStableTokensForModel, } from "./config.js";
|
|
20
21
|
import { checkHealth } from "./health.js";
|
|
21
22
|
import { clearModelRegistryCache, getAvailableCliInfo, getCliInfo, resolveModelAlias, } from "./model-registry.js";
|
|
22
23
|
import { AsyncJobManager, } from "./async-job-manager.js";
|
|
@@ -26,7 +27,7 @@ import { checkReviewIntegrity } from "./review-integrity.js";
|
|
|
26
27
|
import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES, } from "./claude-mcp-config.js";
|
|
27
28
|
import { resolveGrokSessionArgs, resolveMistralSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, prepareMistralRequest as buildMistralCliInvocation, MISTRAL_AGENT_MODES, GATEWAY_SESSION_PREFIX, resolveClaudePermissionFlags, resolveCodexSandboxFlags, CLAUDE_PERMISSION_MODES, GEMINI_APPROVAL_MODES, CODEX_SANDBOX_MODES, CODEX_ASK_FOR_APPROVAL_MODES, CLAUDE_EFFORT_LEVELS, prepareClaudeHighImpactFlags, validateClaudeAgentsMap, prepareCodexHighImpactFlags, prepareCodexForkRequest, CODEX_CONFIG_OVERRIDES_SCHEMA, prepareGeminiHighImpactFlags, prependGeminiAttachments, resolveGeminiSessionPlan, GEMINI_HIGH_IMPACT_PARAMS_SCHEMA, } from "./request-helpers.js";
|
|
28
29
|
import { createFlightRecorder } from "./flight-recorder.js";
|
|
29
|
-
import { resolvePromptInput, PromptPartsSchema } from "./prompt-parts.js";
|
|
30
|
+
import { resolvePromptInput, PromptPartsSchema, assembleClaudeCacheBlocks, } from "./prompt-parts.js";
|
|
30
31
|
import { computeSessionCacheStats, computeTtlRemaining } from "./cache-stats.js";
|
|
31
32
|
import { getCliVersions, runCliUpgrade } from "./cli-updater.js";
|
|
32
33
|
import { startHttpGateway } from "./http-transport.js";
|
|
@@ -246,6 +247,50 @@ export const MAX_TURNS_SCHEMA = z.number().int().positive().safe().max(10_000);
|
|
|
246
247
|
// upstream CLIs would reject. 1µUSD per request is fine-grained enough
|
|
247
248
|
// for any plausible budget-cap use.
|
|
248
249
|
export const MAX_PRICE_SCHEMA = z.number().positive().finite().min(1e-6).max(10_000);
|
|
250
|
+
/**
|
|
251
|
+
* Slice λ: shared worktree directive for all 10 `*_request` / `*_request_async`
|
|
252
|
+
* tools. `true` creates a fresh worktree under `<repoRoot>/.worktrees/<uuid>`
|
|
253
|
+
* branched from HEAD. `{ name?, ref? }` lets the caller supply a sanitized
|
|
254
|
+
* name and/or git ref (default ref: HEAD).
|
|
255
|
+
*
|
|
256
|
+
* Lifecycle is gateway-owned: the gateway pre-creates the worktree via
|
|
257
|
+
* `git worktree add`, then spawns the child CLI with `cwd: <worktree-path>`.
|
|
258
|
+
* No `-w` / `--worktree` flag is ever emitted to the underlying CLI. When
|
|
259
|
+
* the request carries a sessionId and the session already has a worktree,
|
|
260
|
+
* that worktree is reused. On session_delete or TTL eviction the gateway
|
|
261
|
+
* runs `git worktree remove --force`.
|
|
262
|
+
*
|
|
263
|
+
* Tool response: when a worktree was used, the successful response stdout
|
|
264
|
+
* is prefixed with `[gateway] worktree=<absolute-path>\n` so callers can
|
|
265
|
+
* parse/use the path without a schema change (slice λ §1.d).
|
|
266
|
+
*
|
|
267
|
+
* NOTE: callers should `.gitignore` the `.worktrees/` directory in their
|
|
268
|
+
* repo (the gateway does NOT auto-gitignore — see slice λ spec Q4).
|
|
269
|
+
*/
|
|
270
|
+
export const WORKTREE_SCHEMA = z
|
|
271
|
+
.union([
|
|
272
|
+
z.boolean(),
|
|
273
|
+
z
|
|
274
|
+
.object({
|
|
275
|
+
name: z.string().min(1).max(64).optional(),
|
|
276
|
+
ref: z.string().min(1).max(255).optional(),
|
|
277
|
+
})
|
|
278
|
+
.strict(),
|
|
279
|
+
])
|
|
280
|
+
.describe("Slice λ: run this request inside a dedicated git worktree owned by " +
|
|
281
|
+
"the gateway. `true` creates a fresh worktree at " +
|
|
282
|
+
"`<repoRoot>/.worktrees/<uuid>` branched from HEAD. " +
|
|
283
|
+
"`{ name?, ref? }` lets the caller supply a sanitized name and/or a " +
|
|
284
|
+
"git ref (default: HEAD). When the request carries a sessionId and " +
|
|
285
|
+
"the session already has a worktree, that worktree is reused. The " +
|
|
286
|
+
"gateway spawns the child CLI with `cwd: <worktree-path>` — no " +
|
|
287
|
+
"`-w`/`--worktree` flag is ever emitted to the underlying CLI. On " +
|
|
288
|
+
"session_delete or TTL eviction the gateway runs `git worktree " +
|
|
289
|
+
"remove --force`. Successful responses are prefixed with " +
|
|
290
|
+
"`[gateway] worktree=<absolute-path>\\n` so callers can use the " +
|
|
291
|
+
"path. NOTE: callers should `.gitignore` the `.worktrees/` " +
|
|
292
|
+
"directory in their repo (the gateway does NOT auto-gitignore — " +
|
|
293
|
+
"see slice λ spec Q4).");
|
|
249
294
|
// U22: Session-provider enum extended to five providers. The storage layer's
|
|
250
295
|
// CLI_TYPES already includes "mistral"; the MCP-tool layer mirrors that here so
|
|
251
296
|
// session_create / session_list / session_clear_all accept the fifth provider.
|
|
@@ -253,7 +298,7 @@ export const SESSION_PROVIDER_VALUES = ["claude", "codex", "gemini", "grok", "mi
|
|
|
253
298
|
export const SESSION_PROVIDER_ENUM = z.enum(SESSION_PROVIDER_VALUES);
|
|
254
299
|
let activeServer = null;
|
|
255
300
|
let activeHttpGateway = null;
|
|
256
|
-
function resolveGatewayServerRuntime(deps = {}, options = {}) {
|
|
301
|
+
export function resolveGatewayServerRuntime(deps = {}, options = {}) {
|
|
257
302
|
const runtimeLogger = deps.logger ?? logger;
|
|
258
303
|
const runtimeSessionManager = deps.sessionManager ?? sessionManager;
|
|
259
304
|
const runtimePerformanceMetrics = deps.performanceMetrics ??
|
|
@@ -316,7 +361,24 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, f
|
|
|
316
361
|
* `writeFlightStart` is NEVER true on this path: the sync handler is
|
|
317
362
|
* always the upstream logStart writer.
|
|
318
363
|
*/
|
|
319
|
-
flightRecorderEntry, extractUsage
|
|
364
|
+
flightRecorderEntry, extractUsage,
|
|
365
|
+
/**
|
|
366
|
+
* Slice κ: optional stdin payload piped to the child CLI. Currently
|
|
367
|
+
* only Claude's `--input-format stream-json` path sets this. Threaded
|
|
368
|
+
* through both the direct-execute fallback (SYNC_DEADLINE_MS===0) and
|
|
369
|
+
* the AsyncJobManager spawn path, and participates in the dedup key.
|
|
370
|
+
*/
|
|
371
|
+
stdin,
|
|
372
|
+
/**
|
|
373
|
+
* Slice λ: optional working directory for the spawned child process,
|
|
374
|
+
* derived from a gateway-owned git worktree. Threaded to both the
|
|
375
|
+
* direct-execute fallback (`executeCli({ cwd })`) and the
|
|
376
|
+
* AsyncJobManager dedup-aware spawn path
|
|
377
|
+
* (`startJobWithDedup({ cwd })`). `cwd` also participates in the
|
|
378
|
+
* dedup key (see async-job-manager.buildRequestKey) so two requests
|
|
379
|
+
* with identical argv in different worktrees do not collide.
|
|
380
|
+
*/
|
|
381
|
+
cwd) {
|
|
320
382
|
// U26 fix: ownership of onComplete is a contract. Once this function returns
|
|
321
383
|
// OR throws, the caller MUST consider onComplete consumed — i.e. it has
|
|
322
384
|
// either been run, or the AsyncJobManager has taken ownership of it. The
|
|
@@ -350,6 +412,8 @@ flightRecorderEntry, extractUsage) {
|
|
|
350
412
|
idleTimeout: idleTimeoutMs,
|
|
351
413
|
logger: runtime.logger,
|
|
352
414
|
env: env ? { ...process.env, ...env } : undefined,
|
|
415
|
+
stdin,
|
|
416
|
+
cwd,
|
|
353
417
|
});
|
|
354
418
|
}
|
|
355
419
|
finally {
|
|
@@ -361,10 +425,12 @@ flightRecorderEntry, extractUsage) {
|
|
|
361
425
|
let outcome;
|
|
362
426
|
try {
|
|
363
427
|
outcome = runtime.asyncJobManager.startJobWithDedup(cli, args, corrId, {
|
|
428
|
+
cwd,
|
|
364
429
|
idleTimeoutMs,
|
|
365
430
|
outputFormat,
|
|
366
431
|
forceRefresh,
|
|
367
432
|
env,
|
|
433
|
+
stdin,
|
|
368
434
|
onComplete,
|
|
369
435
|
// Sync-deferred path: the upstream sync handler already wrote
|
|
370
436
|
// logStart for this corrId, so writeFlightStart stays false. The
|
|
@@ -446,6 +512,73 @@ function buildDeferredToolResponse(deferred, sessionId) {
|
|
|
446
512
|
],
|
|
447
513
|
};
|
|
448
514
|
}
|
|
515
|
+
/**
|
|
516
|
+
* Slice λ: resolve a request's worktree directive into a spawn cwd.
|
|
517
|
+
*
|
|
518
|
+
* - `worktreeOpt` is the Zod-validated input value (boolean |
|
|
519
|
+
* `{ name?, ref? }` | undefined).
|
|
520
|
+
* - When the request has a session AND the session already has a
|
|
521
|
+
* `metadata.worktreePath`, that path is reused (resume semantics).
|
|
522
|
+
* The reused path is returned without touching git; if the directory
|
|
523
|
+
* was externally removed between requests, the next CLI invocation
|
|
524
|
+
* will surface the error naturally.
|
|
525
|
+
* - When no reusable worktree exists, `createWorktree` runs; on success
|
|
526
|
+
* the new path is written to `session.metadata` (only when a session
|
|
527
|
+
* exists — request-scoped worktrees do NOT persist).
|
|
528
|
+
* - Returns `{}` when `worktreeOpt` is undefined/false (preserves
|
|
529
|
+
* pre-λ behaviour at non-worktree call sites).
|
|
530
|
+
* - Errors propagate as `WorktreeError`/`Error`; the caller wraps them
|
|
531
|
+
* in a `createErrorResponse` envelope. Do NOT swallow.
|
|
532
|
+
*
|
|
533
|
+
* Spec: docs/plans/slice-lambda.spec.md §"Implementation surface to
|
|
534
|
+
* verify" §5.
|
|
535
|
+
*/
|
|
536
|
+
export async function resolveWorktreeForRequest(worktreeOpt, sessionId, runtime) {
|
|
537
|
+
if (!worktreeOpt)
|
|
538
|
+
return {};
|
|
539
|
+
const sessionManager = runtime.sessionManager;
|
|
540
|
+
if (sessionId) {
|
|
541
|
+
const session = await Promise.resolve(sessionManager.getSession(sessionId));
|
|
542
|
+
const existingPath = session?.metadata?.worktreePath;
|
|
543
|
+
if (typeof existingPath === "string" && existingPath.length > 0) {
|
|
544
|
+
return { cwd: existingPath, worktreePath: existingPath };
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
const name = worktreeOpt === true ? undefined : worktreeOpt.name;
|
|
548
|
+
const ref = worktreeOpt === true ? undefined : worktreeOpt.ref;
|
|
549
|
+
const repoRoot = process.cwd();
|
|
550
|
+
const handle = await createWorktree({
|
|
551
|
+
repoRoot,
|
|
552
|
+
name,
|
|
553
|
+
ref,
|
|
554
|
+
logger: runtime.logger,
|
|
555
|
+
});
|
|
556
|
+
if (sessionId) {
|
|
557
|
+
await Promise.resolve(sessionManager.updateSessionMetadata(sessionId, {
|
|
558
|
+
worktreePath: handle.path,
|
|
559
|
+
worktreeName: handle.name,
|
|
560
|
+
}));
|
|
561
|
+
}
|
|
562
|
+
return { cwd: handle.path, worktreePath: handle.path };
|
|
563
|
+
}
|
|
564
|
+
/**
|
|
565
|
+
* Slice λ §1.d: response-envelope shape decision for `worktreePath`.
|
|
566
|
+
*
|
|
567
|
+
* We surface the worktree path inline as a stdout prefix
|
|
568
|
+
* (`[gateway] worktree=<absolute-path>\n`) rather than as a
|
|
569
|
+
* structuredContent field or JSON wrapper. Rationale:
|
|
570
|
+
* - zero schema change across all 10 tools and their downstream parsers
|
|
571
|
+
* - matches how other slice features (session warnings, cache_state
|
|
572
|
+
* aggregates) surface side-channel metadata today
|
|
573
|
+
* - callers that want the path can split on the first newline; callers
|
|
574
|
+
* that don't care see a single ignorable header line
|
|
575
|
+
*
|
|
576
|
+
* Use `formatWorktreePrefix(resolution.worktreePath)` once per tool, at
|
|
577
|
+
* the moment a successful response is constructed.
|
|
578
|
+
*/
|
|
579
|
+
export function formatWorktreePrefix(worktreePath) {
|
|
580
|
+
return worktreePath ? `[gateway] worktree=${worktreePath}\n` : "";
|
|
581
|
+
}
|
|
449
582
|
// Helper function for standardized error responses
|
|
450
583
|
function createErrorResponse(cli, code, stderr, correlationId, error) {
|
|
451
584
|
let errorMessage = `Error executing ${cli} CLI`;
|
|
@@ -575,6 +708,7 @@ function buildAsyncFlightRecorderHandoff(cliName, prep, sessionId, outputFormat)
|
|
|
575
708
|
sessionId,
|
|
576
709
|
stablePrefixHash: prep.stablePrefixHash ?? undefined,
|
|
577
710
|
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
711
|
+
cacheControlBlocks: prep.cacheControlBlocks,
|
|
578
712
|
},
|
|
579
713
|
extractUsage: (stdout) => extractUsageAndCost(cli, stdout, fmt, { sessionId: sid, home }),
|
|
580
714
|
};
|
|
@@ -919,6 +1053,19 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
919
1053
|
score: reviewIntegrity.totalScore,
|
|
920
1054
|
});
|
|
921
1055
|
}
|
|
1056
|
+
// Rec #5 (slice κ): refuse the optimizePrompt + cacheControl combo
|
|
1057
|
+
// before running optimization. Optimization rewrites the assembled
|
|
1058
|
+
// prompt text the flight-recorder logs, but the κ stdin payload is
|
|
1059
|
+
// built from raw `promptParts` content blocks — letting both run
|
|
1060
|
+
// produces a FR row whose `prompt` no longer matches what Claude
|
|
1061
|
+
// actually received, AND any optimisation-driven text change would
|
|
1062
|
+
// silently break Anthropic prefix-cache reuse on the next call.
|
|
1063
|
+
const ccEarly = params.promptParts?.cacheControl;
|
|
1064
|
+
const cacheControlRequestedEarly = !!(ccEarly &&
|
|
1065
|
+
(ccEarly.system || ccEarly.tools || ccEarly.context));
|
|
1066
|
+
if (params.optimizePrompt && cacheControlRequestedEarly) {
|
|
1067
|
+
return createErrorResponse(params.operation, 1, "", corrId, new Error("optimizePrompt is incompatible with promptParts.cacheControl (slice κ): optimization rewrites the assembled prompt text the flight recorder logs, while the cache_control payload is built from raw promptParts; the two would desync and break Anthropic prefix-cache reuse. Disable optimizePrompt when opting into cacheControl."));
|
|
1068
|
+
}
|
|
922
1069
|
let effectivePrompt = assembledPrompt;
|
|
923
1070
|
if (params.optimizePrompt) {
|
|
924
1071
|
const optimized = optimizePromptText(effectivePrompt);
|
|
@@ -950,19 +1097,127 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
950
1097
|
return createApprovalDeniedResponse(params.operation, approvalDecision);
|
|
951
1098
|
}
|
|
952
1099
|
}
|
|
953
|
-
|
|
1100
|
+
// Rec #2 (slice κ): auto-emit `cache_control` when the caller passes
|
|
1101
|
+
// `promptParts` whose stable prefix exceeds the per-model minimum,
|
|
1102
|
+
// the caller has NOT explicitly set `cacheControl`, the gateway
|
|
1103
|
+
// config has opted in (`[cache_awareness].emit_anthropic_cache_control`),
|
|
1104
|
+
// and outputFormat is stream-json. Auto-emit marks the LAST non-empty
|
|
1105
|
+
// stable block (context → tools → system priority — the rightmost
|
|
1106
|
+
// stable block covers the widest prefix). Skipped when optimizePrompt
|
|
1107
|
+
// is on (same rec #5 desync risk).
|
|
1108
|
+
//
|
|
1109
|
+
// The 1h ttl is forced regardless of `anthropic_ttl_seconds`: 5m
|
|
1110
|
+
// breakpoints from caller content are rejected by Anthropic once
|
|
1111
|
+
// Claude Code's own 1h-marked session-wrap blocks land ahead of them.
|
|
1112
|
+
let autoEmittedCacheControlBlock = null;
|
|
1113
|
+
if (!cacheControlRequestedEarly &&
|
|
1114
|
+
runtime.cacheAwareness.emitAnthropicCacheControl &&
|
|
1115
|
+
!params.optimizePrompt &&
|
|
1116
|
+
params.outputFormat === "stream-json" &&
|
|
1117
|
+
params.promptParts &&
|
|
1118
|
+
stablePrefixTokens !== null) {
|
|
1119
|
+
const threshold = minStableTokensForModel(runtime.cacheAwareness, resolvedModel ?? "default");
|
|
1120
|
+
if (stablePrefixTokens >= threshold) {
|
|
1121
|
+
const pp = params.promptParts;
|
|
1122
|
+
// Rightmost non-empty stable block — its cache_control breakpoint
|
|
1123
|
+
// covers everything above it in the message (the API matches
|
|
1124
|
+
// breakpoints in order).
|
|
1125
|
+
if (pp.context && pp.context.length > 0)
|
|
1126
|
+
autoEmittedCacheControlBlock = "context";
|
|
1127
|
+
else if (pp.tools && pp.tools.length > 0)
|
|
1128
|
+
autoEmittedCacheControlBlock = "tools";
|
|
1129
|
+
else if (pp.system && pp.system.length > 0)
|
|
1130
|
+
autoEmittedCacheControlBlock = "system";
|
|
1131
|
+
if (autoEmittedCacheControlBlock !== null) {
|
|
1132
|
+
runtime.logger.info(`[${corrId}] auto-emitting cache_control on '${autoEmittedCacheControlBlock}' (stablePrefixTokens=${stablePrefixTokens} >= ${threshold} for model='${resolvedModel ?? "default"}')`);
|
|
1133
|
+
if (runtime.cacheAwareness.anthropicTtlSeconds !== 3600) {
|
|
1134
|
+
runtime.logger.warn(`[${corrId}] [cache_awareness].anthropic_ttl_seconds=${runtime.cacheAwareness.anthropicTtlSeconds} ignored for Claude CLI path — Anthropic rejects 5m blocks after Claude Code's 1h-marked session-wrap content; using ttl='1h'.`);
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
}
|
|
1138
|
+
}
|
|
1139
|
+
// Rec #4: warn when promptParts has a cacheable stable prefix but no
|
|
1140
|
+
// cache_control breakpoint is being emitted (neither explicit nor
|
|
1141
|
+
// auto). Either the caller forgot to set `cacheControl` or
|
|
1142
|
+
// `[cache_awareness].emit_anthropic_cache_control` is off — both
|
|
1143
|
+
// leave the stable prefix bytes unreused across calls, defeating the
|
|
1144
|
+
// point of using `promptParts`.
|
|
1145
|
+
const warnings = [];
|
|
1146
|
+
if (!cacheControlRequestedEarly &&
|
|
1147
|
+
autoEmittedCacheControlBlock === null &&
|
|
1148
|
+
params.promptParts &&
|
|
1149
|
+
stablePrefixTokens !== null) {
|
|
1150
|
+
const threshold = minStableTokensForModel(runtime.cacheAwareness, resolvedModel ?? "default");
|
|
1151
|
+
if (stablePrefixTokens >= threshold) {
|
|
1152
|
+
const reason = params.outputFormat !== "stream-json"
|
|
1153
|
+
? "outputFormat is not 'stream-json'"
|
|
1154
|
+
: !runtime.cacheAwareness.emitAnthropicCacheControl
|
|
1155
|
+
? "[cache_awareness].emit_anthropic_cache_control is false"
|
|
1156
|
+
: "no eligible non-empty stable block";
|
|
1157
|
+
warnings.push({
|
|
1158
|
+
code: "cacheable_prefix_uncached",
|
|
1159
|
+
message: `Stable prefix is cacheable (${stablePrefixTokens} tokens >= ${threshold} for model='${resolvedModel ?? "default"}') but no cache_control breakpoint will be emitted (${reason}). Set promptParts.cacheControl explicitly, switch outputFormat to 'stream-json', or enable [cache_awareness].emit_anthropic_cache_control.`,
|
|
1160
|
+
stablePrefixTokens,
|
|
1161
|
+
threshold,
|
|
1162
|
+
reason,
|
|
1163
|
+
});
|
|
1164
|
+
}
|
|
1165
|
+
}
|
|
1166
|
+
// Slice κ: switch from the legacy positional `-p <prompt>` emission
|
|
1167
|
+
// to `claude -p --input-format stream-json` and feed a JSON
|
|
1168
|
+
// content-blocks payload via stdin. Non-κ callers (no cacheControl,
|
|
1169
|
+
// or cacheControl with all flags false) take the existing positional
|
|
1170
|
+
// path bit-for-bit. The κ path activates on EITHER an explicit caller
|
|
1171
|
+
// opt-in (`cacheControlRequestedEarly`) OR a gateway-driven auto-emit
|
|
1172
|
+
// (`autoEmittedCacheControlBlock`).
|
|
1173
|
+
const cacheControlRequested = cacheControlRequestedEarly || autoEmittedCacheControlBlock !== null;
|
|
1174
|
+
let stdinPayload;
|
|
1175
|
+
let cacheControlBlocks;
|
|
1176
|
+
if (cacheControlRequested) {
|
|
1177
|
+
if (params.outputFormat !== "stream-json") {
|
|
1178
|
+
return createErrorResponse(params.operation, 1, "", corrId, new Error("promptParts.cacheControl requires outputFormat: 'stream-json' (slice κ pipes the cache_control blocks over --input-format stream-json; text/json output formats cannot carry the required NDJSON usage events)."));
|
|
1179
|
+
}
|
|
1180
|
+
// promptParts is non-null whenever cacheControlRequested is true
|
|
1181
|
+
// (explicit opt-in lives in PromptParts; auto-emit guard requires
|
|
1182
|
+
// promptParts to be defined).
|
|
1183
|
+
const effectiveParts = autoEmittedCacheControlBlock !== null
|
|
1184
|
+
? {
|
|
1185
|
+
...params.promptParts,
|
|
1186
|
+
cacheControl: {
|
|
1187
|
+
...(params.promptParts.cacheControl ?? {}),
|
|
1188
|
+
[autoEmittedCacheControlBlock]: true,
|
|
1189
|
+
},
|
|
1190
|
+
}
|
|
1191
|
+
: params.promptParts;
|
|
1192
|
+
const built = assembleClaudeCacheBlocks(effectiveParts);
|
|
1193
|
+
stdinPayload = `${JSON.stringify(built.payload)}\n`;
|
|
1194
|
+
cacheControlBlocks = built.markedBlockCount;
|
|
1195
|
+
}
|
|
1196
|
+
const args = cacheControlRequested
|
|
1197
|
+
? [
|
|
1198
|
+
"-p",
|
|
1199
|
+
"--input-format",
|
|
1200
|
+
"stream-json",
|
|
1201
|
+
"--output-format",
|
|
1202
|
+
"stream-json",
|
|
1203
|
+
"--include-partial-messages",
|
|
1204
|
+
"--verbose",
|
|
1205
|
+
]
|
|
1206
|
+
: ["-p", effectivePrompt];
|
|
954
1207
|
if (resolvedModel)
|
|
955
1208
|
args.push("--model", resolvedModel);
|
|
956
|
-
if (
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
1209
|
+
if (!cacheControlRequested) {
|
|
1210
|
+
if (params.outputFormat === "json") {
|
|
1211
|
+
args.push("--output-format", "json");
|
|
1212
|
+
}
|
|
1213
|
+
else if (params.outputFormat === "stream-json") {
|
|
1214
|
+
// Claude CLI 2.x rejects `--print --output-format stream-json` without
|
|
1215
|
+
// `--verbose`: "When using --print, --output-format=stream-json requires
|
|
1216
|
+
// --verbose". --verbose only affects what claude logs to stderr; the
|
|
1217
|
+
// stream-json stdout payload is unchanged, so the gateway's NDJSON
|
|
1218
|
+
// parser is unaffected.
|
|
1219
|
+
args.push("--output-format", "stream-json", "--include-partial-messages", "--verbose");
|
|
1220
|
+
}
|
|
966
1221
|
}
|
|
967
1222
|
if (params.allowedTools && params.allowedTools.length > 0) {
|
|
968
1223
|
sanitizeCliArgValues(params.allowedTools, "allowedTools");
|
|
@@ -1025,6 +1280,9 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1025
1280
|
args,
|
|
1026
1281
|
stablePrefixHash,
|
|
1027
1282
|
stablePrefixTokens,
|
|
1283
|
+
stdinPayload,
|
|
1284
|
+
cacheControlBlocks,
|
|
1285
|
+
warnings: warnings.length > 0 ? warnings : undefined,
|
|
1028
1286
|
};
|
|
1029
1287
|
}
|
|
1030
1288
|
export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
@@ -1722,8 +1980,15 @@ export async function handleGeminiRequest(deps, params) {
|
|
|
1722
1980
|
args.push(...sessionPlan.args);
|
|
1723
1981
|
const userProvidedSession = sessionPlan.resumed;
|
|
1724
1982
|
const effectiveSessionIdHint = sessionPlan.resumed ? params.sessionId : undefined;
|
|
1983
|
+
let worktreeResolution = {};
|
|
1984
|
+
try {
|
|
1985
|
+
worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionIdHint, runtime);
|
|
1986
|
+
}
|
|
1987
|
+
catch (err) {
|
|
1988
|
+
return createErrorResponse("gemini_request", 1, "", corrId, err);
|
|
1989
|
+
}
|
|
1725
1990
|
const geminiFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, params.sessionId, params.outputFormat);
|
|
1726
|
-
const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, geminiFrHandoff.flightRecorderEntry, geminiFrHandoff.extractUsage);
|
|
1991
|
+
const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, geminiFrHandoff.flightRecorderEntry, geminiFrHandoff.extractUsage, worktreeResolution.cwd);
|
|
1727
1992
|
// Deferred — job still running, return async reference
|
|
1728
1993
|
if (isDeferredResponse(result)) {
|
|
1729
1994
|
return buildDeferredToolResponse(result, effectiveSessionIdHint);
|
|
@@ -1765,6 +2030,12 @@ export async function handleGeminiRequest(deps, params) {
|
|
|
1765
2030
|
}
|
|
1766
2031
|
deps.logger.info(`[${corrId}] gemini_request completed successfully in ${durationMs}ms`);
|
|
1767
2032
|
const response = buildCliResponse("gemini", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, userProvidedSession, params.outputFormat);
|
|
2033
|
+
if (worktreeResolution.worktreePath) {
|
|
2034
|
+
const first = response.content[0];
|
|
2035
|
+
if (first && first.type === "text") {
|
|
2036
|
+
first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
|
|
2037
|
+
}
|
|
2038
|
+
}
|
|
1768
2039
|
const geminiUsage = extractUsageAndCost("gemini", stdout, params.outputFormat);
|
|
1769
2040
|
safeFlightComplete(corrId, {
|
|
1770
2041
|
response: stdout,
|
|
@@ -1852,6 +2123,13 @@ export async function handleGeminiRequestAsync(deps, params) {
|
|
|
1852
2123
|
}
|
|
1853
2124
|
await deps.sessionManager.updateSessionUsage(effectiveSessionId);
|
|
1854
2125
|
}
|
|
2126
|
+
let worktreeResolution = {};
|
|
2127
|
+
try {
|
|
2128
|
+
worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
|
|
2129
|
+
}
|
|
2130
|
+
catch (err) {
|
|
2131
|
+
return createErrorResponse("gemini_request_async", 1, "", corrId, err);
|
|
2132
|
+
}
|
|
1855
2133
|
// Start job only after all session I/O succeeds. U23: forward outputFormat
|
|
1856
2134
|
// so AsyncJobManager records it in the durable store (the manager also
|
|
1857
2135
|
// surfaces it in the snapshot).
|
|
@@ -1860,7 +2138,7 @@ export async function handleGeminiRequestAsync(deps, params) {
|
|
|
1860
2138
|
// Slice 1.5: pure async path — no upstream safeFlightStart, so the
|
|
1861
2139
|
// manager owns both logStart and logComplete for this corrId.
|
|
1862
2140
|
const geminiAsyncFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, effectiveSessionId, params.outputFormat);
|
|
1863
|
-
const job = deps.asyncJobManager.startJob("gemini", args, corrId,
|
|
2141
|
+
const job = deps.asyncJobManager.startJob("gemini", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, geminiAsyncFrHandoff.flightRecorderEntry, geminiAsyncFrHandoff.extractUsage, true);
|
|
1864
2142
|
deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
|
|
1865
2143
|
const asyncResponse = {
|
|
1866
2144
|
success: true,
|
|
@@ -1873,6 +2151,9 @@ export async function handleGeminiRequestAsync(deps, params) {
|
|
|
1873
2151
|
if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
|
|
1874
2152
|
asyncResponse.reviewIntegrity = prep.reviewIntegrity;
|
|
1875
2153
|
}
|
|
2154
|
+
if (worktreeResolution.worktreePath) {
|
|
2155
|
+
asyncResponse.worktreePath = worktreeResolution.worktreePath;
|
|
2156
|
+
}
|
|
1876
2157
|
return {
|
|
1877
2158
|
content: [
|
|
1878
2159
|
{
|
|
@@ -1937,8 +2218,15 @@ export async function handleGrokRequest(deps, params) {
|
|
|
1937
2218
|
createNewSession: params.createNewSession,
|
|
1938
2219
|
});
|
|
1939
2220
|
args.push(...sessionResult.resumeArgs);
|
|
2221
|
+
let worktreeResolution = {};
|
|
2222
|
+
try {
|
|
2223
|
+
worktreeResolution = await resolveWorktreeForRequest(params.worktree, sessionResult.effectiveSessionId, runtime);
|
|
2224
|
+
}
|
|
2225
|
+
catch (err) {
|
|
2226
|
+
return createErrorResponse("grok_request", 1, "", corrId, err);
|
|
2227
|
+
}
|
|
1940
2228
|
const grokFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, params.sessionId, params.outputFormat);
|
|
1941
|
-
const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, grokFrHandoff.flightRecorderEntry, grokFrHandoff.extractUsage);
|
|
2229
|
+
const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, grokFrHandoff.flightRecorderEntry, grokFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
|
|
1942
2230
|
// Deferred — job still running, return async reference
|
|
1943
2231
|
if (isDeferredResponse(result)) {
|
|
1944
2232
|
return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
|
|
@@ -1982,6 +2270,12 @@ export async function handleGrokRequest(deps, params) {
|
|
|
1982
2270
|
}
|
|
1983
2271
|
deps.logger.info(`[${corrId}] grok_request completed successfully in ${durationMs}ms`);
|
|
1984
2272
|
const response = buildCliResponse("grok", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession, params.outputFormat);
|
|
2273
|
+
if (worktreeResolution.worktreePath) {
|
|
2274
|
+
const first = response.content[0];
|
|
2275
|
+
if (first && first.type === "text") {
|
|
2276
|
+
first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
|
|
2277
|
+
}
|
|
2278
|
+
}
|
|
1985
2279
|
safeFlightComplete(corrId, {
|
|
1986
2280
|
response: stdout,
|
|
1987
2281
|
durationMs,
|
|
@@ -2072,11 +2366,18 @@ export async function handleGrokRequestAsync(deps, params) {
|
|
|
2072
2366
|
const newSession = await deps.sessionManager.createSession("grok", "Grok Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
|
|
2073
2367
|
effectiveSessionId = newSession.id;
|
|
2074
2368
|
}
|
|
2369
|
+
let worktreeResolution = {};
|
|
2370
|
+
try {
|
|
2371
|
+
worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
|
|
2372
|
+
}
|
|
2373
|
+
catch (err) {
|
|
2374
|
+
return createErrorResponse("grok_request_async", 1, "", corrId, err);
|
|
2375
|
+
}
|
|
2075
2376
|
// Start job only after all session I/O succeeds
|
|
2076
2377
|
assertUpstreamCliArgs("grok", args);
|
|
2077
2378
|
assertUpstreamCliEnv("grok", undefined);
|
|
2078
2379
|
const grokAsyncFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, effectiveSessionId, params.outputFormat);
|
|
2079
|
-
const job = deps.asyncJobManager.startJob("grok", args, corrId,
|
|
2380
|
+
const job = deps.asyncJobManager.startJob("grok", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, grokAsyncFrHandoff.flightRecorderEntry, grokAsyncFrHandoff.extractUsage, true);
|
|
2080
2381
|
deps.logger.info(`[${corrId}] grok_request_async started job ${job.id}`);
|
|
2081
2382
|
const asyncResponse = {
|
|
2082
2383
|
success: true,
|
|
@@ -2089,6 +2390,9 @@ export async function handleGrokRequestAsync(deps, params) {
|
|
|
2089
2390
|
if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
|
|
2090
2391
|
asyncResponse.reviewIntegrity = prep.reviewIntegrity;
|
|
2091
2392
|
}
|
|
2393
|
+
if (worktreeResolution.worktreePath) {
|
|
2394
|
+
asyncResponse.worktreePath = worktreeResolution.worktreePath;
|
|
2395
|
+
}
|
|
2092
2396
|
return {
|
|
2093
2397
|
content: [
|
|
2094
2398
|
{
|
|
@@ -2149,8 +2453,15 @@ export async function handleMistralRequest(deps, params) {
|
|
|
2149
2453
|
createNewSession: params.createNewSession,
|
|
2150
2454
|
});
|
|
2151
2455
|
args.push(...sessionResult.resumeArgs);
|
|
2456
|
+
let worktreeResolution = {};
|
|
2457
|
+
try {
|
|
2458
|
+
worktreeResolution = await resolveWorktreeForRequest(params.worktree, sessionResult.effectiveSessionId, runtime);
|
|
2459
|
+
}
|
|
2460
|
+
catch (err) {
|
|
2461
|
+
return createErrorResponse("mistral_request", 1, "", corrId, err);
|
|
2462
|
+
}
|
|
2152
2463
|
const mistralFrHandoff = buildAsyncFlightRecorderHandoff("mistral", prep, params.sessionId, params.outputFormat);
|
|
2153
|
-
let result = await awaitJobOrDefer("mistral", args, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, mistralEnv, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage);
|
|
2464
|
+
let result = await awaitJobOrDefer("mistral", args, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, mistralEnv, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
|
|
2154
2465
|
if (isDeferredResponse(result)) {
|
|
2155
2466
|
return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
|
|
2156
2467
|
}
|
|
@@ -2162,7 +2473,7 @@ export async function handleMistralRequest(deps, params) {
|
|
|
2162
2473
|
const retryArgs = [...retryPrep.args, ...sessionResult.resumeArgs];
|
|
2163
2474
|
// Reuse the FR handoff built above — the retry preserves corrId,
|
|
2164
2475
|
// so the manager's logComplete still updates the original row.
|
|
2165
|
-
result = await awaitJobOrDefer("mistral", retryArgs, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, true, runtime, retryPrep.env, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage);
|
|
2476
|
+
result = await awaitJobOrDefer("mistral", retryArgs, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, true, runtime, retryPrep.env, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
|
|
2166
2477
|
if (isDeferredResponse(result)) {
|
|
2167
2478
|
return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
|
|
2168
2479
|
}
|
|
@@ -2208,6 +2519,12 @@ export async function handleMistralRequest(deps, params) {
|
|
|
2208
2519
|
}
|
|
2209
2520
|
deps.logger.info(`[${corrId}] mistral_request completed successfully in ${durationMs}ms`);
|
|
2210
2521
|
const response = buildCliResponse("mistral", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession, params.outputFormat);
|
|
2522
|
+
if (worktreeResolution.worktreePath) {
|
|
2523
|
+
const first = response.content[0];
|
|
2524
|
+
if (first && first.type === "text") {
|
|
2525
|
+
first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
|
|
2526
|
+
}
|
|
2527
|
+
}
|
|
2211
2528
|
safeFlightComplete(corrId, {
|
|
2212
2529
|
response: stdout,
|
|
2213
2530
|
durationMs,
|
|
@@ -2293,10 +2610,17 @@ export async function handleMistralRequestAsync(deps, params) {
|
|
|
2293
2610
|
const newSession = await deps.sessionManager.createSession("mistral", "Mistral Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
|
|
2294
2611
|
effectiveSessionId = newSession.id;
|
|
2295
2612
|
}
|
|
2613
|
+
let worktreeResolution = {};
|
|
2614
|
+
try {
|
|
2615
|
+
worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
|
|
2616
|
+
}
|
|
2617
|
+
catch (err) {
|
|
2618
|
+
return createErrorResponse("mistral_request_async", 1, "", corrId, err);
|
|
2619
|
+
}
|
|
2296
2620
|
assertUpstreamCliArgs("mistral", args);
|
|
2297
2621
|
assertUpstreamCliEnv("mistral", mistralEnv);
|
|
2298
2622
|
const mistralAsyncFrHandoff = buildAsyncFlightRecorderHandoff("mistral", prep, effectiveSessionId, params.outputFormat);
|
|
2299
|
-
const job = deps.asyncJobManager.startJob("mistral", args, corrId,
|
|
2623
|
+
const job = deps.asyncJobManager.startJob("mistral", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, mistralEnv, undefined, mistralAsyncFrHandoff.flightRecorderEntry, mistralAsyncFrHandoff.extractUsage, true);
|
|
2300
2624
|
deps.logger.info(`[${corrId}] mistral_request_async started job ${job.id}`);
|
|
2301
2625
|
const asyncResponse = {
|
|
2302
2626
|
success: true,
|
|
@@ -2309,6 +2633,9 @@ export async function handleMistralRequestAsync(deps, params) {
|
|
|
2309
2633
|
if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
|
|
2310
2634
|
asyncResponse.reviewIntegrity = prep.reviewIntegrity;
|
|
2311
2635
|
}
|
|
2636
|
+
if (worktreeResolution.worktreePath) {
|
|
2637
|
+
asyncResponse.worktreePath = worktreeResolution.worktreePath;
|
|
2638
|
+
}
|
|
2312
2639
|
return {
|
|
2313
2640
|
content: [
|
|
2314
2641
|
{
|
|
@@ -2395,6 +2722,17 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
2395
2722
|
const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
|
|
2396
2723
|
effectiveSessionId = newSession.id;
|
|
2397
2724
|
}
|
|
2725
|
+
// Slice λ: resolve worktree directive after session I/O so resume reuse
|
|
2726
|
+
// can read metadata.worktreePath. A pre-startJob failure here means
|
|
2727
|
+
// prepCleanup is still owned locally; run it before returning.
|
|
2728
|
+
let worktreeResolution = {};
|
|
2729
|
+
try {
|
|
2730
|
+
worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
|
|
2731
|
+
}
|
|
2732
|
+
catch (err) {
|
|
2733
|
+
runPrepCleanupLocally();
|
|
2734
|
+
return createErrorResponse("codex_request_async", 1, "", corrId, err);
|
|
2735
|
+
}
|
|
2398
2736
|
// Start job only after all session I/O succeeds. If startJob throws before
|
|
2399
2737
|
// registering the record, ownership stays here and we run it in the catch.
|
|
2400
2738
|
assertUpstreamCliArgs("codex", args);
|
|
@@ -2402,7 +2740,7 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
2402
2740
|
const codexAsyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, effectiveSessionId, params.outputFormat);
|
|
2403
2741
|
let job;
|
|
2404
2742
|
try {
|
|
2405
|
-
job = deps.asyncJobManager.startJob("codex", args, corrId,
|
|
2743
|
+
job = deps.asyncJobManager.startJob("codex", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup, codexAsyncFrHandoff.flightRecorderEntry, codexAsyncFrHandoff.extractUsage, true);
|
|
2406
2744
|
// Handoff succeeded: AsyncJobManager will fire prepCleanup on terminal
|
|
2407
2745
|
// status. Release our local ownership claim so the catch path doesn't
|
|
2408
2746
|
// double-fire.
|
|
@@ -2424,6 +2762,9 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
2424
2762
|
if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
|
|
2425
2763
|
asyncResponse.reviewIntegrity = prep.reviewIntegrity;
|
|
2426
2764
|
}
|
|
2765
|
+
if (worktreeResolution.worktreePath) {
|
|
2766
|
+
asyncResponse.worktreePath = worktreeResolution.worktreePath;
|
|
2767
|
+
}
|
|
2427
2768
|
return {
|
|
2428
2769
|
content: [
|
|
2429
2770
|
{
|
|
@@ -2481,15 +2822,15 @@ export function createGatewayServer(deps = {}) {
|
|
|
2481
2822
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
2482
2823
|
.optional()
|
|
2483
2824
|
.describe("Prompt text for Claude (mutually exclusive with promptParts)"),
|
|
2484
|
-
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt
|
|
2825
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task, cacheControl? }. Use for repeated calls that share a stable prefix — `system`/`tools`/`context` are the stable head; `task` is the volatile tail (never marked). Set `cacheControl: { system?: boolean, tools?: boolean, context?: boolean }` to opt into explicit Anthropic prefix caching via `--input-format stream-json` (slice κ). Requires `outputFormat: 'stream-json'` and hard-codes `ttl='1h'` (Anthropic rejects 5m blocks after Claude Code's 1h-marked session-wrap content). Mutually exclusive with `prompt`. The stable prefix hash is logged to the flight recorder for cache_state aggregates."),
|
|
2485
2826
|
model: z
|
|
2486
2827
|
.string()
|
|
2487
2828
|
.optional()
|
|
2488
2829
|
.describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
|
|
2489
2830
|
outputFormat: z
|
|
2490
2831
|
.enum(["text", "json", "stream-json"])
|
|
2491
|
-
.default("
|
|
2492
|
-
.describe("Output format (text|json|stream-json). stream-json
|
|
2832
|
+
.default("stream-json")
|
|
2833
|
+
.describe("Output format (text|json|stream-json). DEFAULT: stream-json — the gateway parses NDJSON usage events to extract input/output/cache_read/cache_creation tokens + cost + model, persists them to the flight recorder for cache_state aggregates, and still returns the assistant text. Override to 'text' only when you truly want unparsed stdout (loses observability)."),
|
|
2493
2834
|
sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
|
|
2494
2835
|
continueSession: z.boolean().default(false).describe("Continue active session"),
|
|
2495
2836
|
createNewSession: z.boolean().default(false).describe("Force new session"),
|
|
@@ -2561,6 +2902,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
2561
2902
|
.array(z.string())
|
|
2562
2903
|
.optional()
|
|
2563
2904
|
.describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
|
|
2905
|
+
worktree: WORKTREE_SCHEMA.optional(),
|
|
2564
2906
|
approvalStrategy: z
|
|
2565
2907
|
.enum(["legacy", "mcp_managed"])
|
|
2566
2908
|
.default("legacy")
|
|
@@ -2591,7 +2933,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
2591
2933
|
.boolean()
|
|
2592
2934
|
.default(false)
|
|
2593
2935
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
2594
|
-
}, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
2936
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
2595
2937
|
const startTime = Date.now();
|
|
2596
2938
|
if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
|
|
2597
2939
|
return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
|
|
@@ -2665,7 +3007,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
2665
3007
|
sessionId: effectiveSessionId,
|
|
2666
3008
|
cli: "claude",
|
|
2667
3009
|
});
|
|
2668
|
-
|
|
3010
|
+
// Rec #4: include any prep-time warnings (e.g. cacheable_prefix_uncached).
|
|
3011
|
+
const warnings = [
|
|
3012
|
+
...(ttlWarning ? [ttlWarning] : []),
|
|
3013
|
+
...(prep.warnings ?? []),
|
|
3014
|
+
];
|
|
2669
3015
|
safeFlightStart({
|
|
2670
3016
|
correlationId: corrId,
|
|
2671
3017
|
cli: "claude",
|
|
@@ -2674,8 +3020,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
2674
3020
|
sessionId: effectiveSessionId,
|
|
2675
3021
|
stablePrefixHash: prep.stablePrefixHash ?? undefined,
|
|
2676
3022
|
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
3023
|
+
cacheControlBlocks: prep.cacheControlBlocks,
|
|
2677
3024
|
}, runtime);
|
|
2678
|
-
logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prep.effectivePrompt.length}, sessionId=${effectiveSessionId}`);
|
|
3025
|
+
logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prep.effectivePrompt.length}, sessionId=${effectiveSessionId}, cacheControlBlocks=${prep.cacheControlBlocks ?? 0}`);
|
|
2679
3026
|
try {
|
|
2680
3027
|
if (useContinue) {
|
|
2681
3028
|
args.push("--continue");
|
|
@@ -2684,10 +3031,19 @@ export function createGatewayServer(deps = {}) {
|
|
|
2684
3031
|
args.push("--session-id", effectiveSessionId);
|
|
2685
3032
|
await sessionManager.updateSessionUsage(effectiveSessionId);
|
|
2686
3033
|
}
|
|
3034
|
+
// Slice λ: resolve worktree directive into spawn cwd. Done after
|
|
3035
|
+
// session resolution so resume reuse can read metadata.worktreePath.
|
|
3036
|
+
let worktreeResolution = {};
|
|
3037
|
+
try {
|
|
3038
|
+
worktreeResolution = await resolveWorktreeForRequest(worktree, effectiveSessionId, runtime);
|
|
3039
|
+
}
|
|
3040
|
+
catch (err) {
|
|
3041
|
+
return createErrorResponse("claude_request", 1, "", corrId, err);
|
|
3042
|
+
}
|
|
2687
3043
|
// Idle timeout only for stream-json (text/json produce no output until done)
|
|
2688
3044
|
const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
|
|
2689
3045
|
const claudeSyncFrHandoff = buildAsyncFlightRecorderHandoff("claude", prep, effectiveSessionId, outputFormat);
|
|
2690
|
-
const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh, runtime, undefined, undefined, claudeSyncFrHandoff.flightRecorderEntry, claudeSyncFrHandoff.extractUsage);
|
|
3046
|
+
const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh, runtime, undefined, undefined, claudeSyncFrHandoff.flightRecorderEntry, claudeSyncFrHandoff.extractUsage, prep.stdinPayload, worktreeResolution.cwd);
|
|
2691
3047
|
// Deferred — job still running, return async reference
|
|
2692
3048
|
if (isDeferredResponse(result)) {
|
|
2693
3049
|
return buildDeferredToolResponse(result, effectiveSessionId);
|
|
@@ -2744,7 +3100,14 @@ export function createGatewayServer(deps = {}) {
|
|
|
2744
3100
|
exitCode: 0,
|
|
2745
3101
|
status: "completed",
|
|
2746
3102
|
}, runtime);
|
|
2747
|
-
|
|
3103
|
+
const streamResponse = buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat, warnings);
|
|
3104
|
+
if (worktreeResolution.worktreePath) {
|
|
3105
|
+
const first = streamResponse.content[0];
|
|
3106
|
+
if (first && first.type === "text") {
|
|
3107
|
+
first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
|
|
3108
|
+
}
|
|
3109
|
+
}
|
|
3110
|
+
return streamResponse;
|
|
2748
3111
|
}
|
|
2749
3112
|
safeFlightComplete(corrId, {
|
|
2750
3113
|
response: stdout,
|
|
@@ -2755,7 +3118,14 @@ export function createGatewayServer(deps = {}) {
|
|
|
2755
3118
|
exitCode: 0,
|
|
2756
3119
|
status: "completed",
|
|
2757
3120
|
}, runtime);
|
|
2758
|
-
|
|
3121
|
+
const nonStreamResponse = buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat, warnings);
|
|
3122
|
+
if (worktreeResolution.worktreePath) {
|
|
3123
|
+
const first = nonStreamResponse.content[0];
|
|
3124
|
+
if (first && first.type === "text") {
|
|
3125
|
+
first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
|
|
3126
|
+
}
|
|
3127
|
+
}
|
|
3128
|
+
return nonStreamResponse;
|
|
2759
3129
|
}
|
|
2760
3130
|
catch (error) {
|
|
2761
3131
|
const elapsedMs = Math.max(0, Date.now() - startTime);
|
|
@@ -2888,7 +3258,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
2888
3258
|
.array(z.string())
|
|
2889
3259
|
.optional()
|
|
2890
3260
|
.describe("Codex --add-dir <DIR>: additional writable workspace directories. Emitted once per entry on new sessions only; resume inherits the original session's writable-dir policy."),
|
|
2891
|
-
|
|
3261
|
+
worktree: WORKTREE_SCHEMA.optional(),
|
|
3262
|
+
}, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, workingDir, addDir, worktree, }) => {
|
|
2892
3263
|
const startTime = Date.now();
|
|
2893
3264
|
const prep = prepareCodexRequest({
|
|
2894
3265
|
prompt,
|
|
@@ -2940,9 +3311,20 @@ export function createGatewayServer(deps = {}) {
|
|
|
2940
3311
|
// execution, on terminal status for the job-backed path (sync
|
|
2941
3312
|
// completion or deferred). The outer finally MUST NOT clean again.
|
|
2942
3313
|
const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
|
|
3314
|
+
// Slice λ: resolve worktree directive into spawn cwd. Codex has no
|
|
3315
|
+
// in-handler session resolution prior to spawn (session lookup is
|
|
3316
|
+
// lazy via `codex exec resume`), so the user-supplied sessionId is
|
|
3317
|
+
// the only reuse key.
|
|
3318
|
+
let worktreeResolution = {};
|
|
3319
|
+
try {
|
|
3320
|
+
worktreeResolution = await resolveWorktreeForRequest(worktree, sessionId, runtime);
|
|
3321
|
+
}
|
|
3322
|
+
catch (err) {
|
|
3323
|
+
return createErrorResponse("codex_request", 1, "", corrId, err);
|
|
3324
|
+
}
|
|
2943
3325
|
try {
|
|
2944
3326
|
const codexSyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, sessionId, outputFormat);
|
|
2945
|
-
const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), outputFormat, forceRefresh, runtime, undefined, prepCleanup, codexSyncFrHandoff.flightRecorderEntry, codexSyncFrHandoff.extractUsage);
|
|
3327
|
+
const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), outputFormat, forceRefresh, runtime, undefined, prepCleanup, codexSyncFrHandoff.flightRecorderEntry, codexSyncFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
|
|
2946
3328
|
// Deferred — job still running, return async reference. Cleanup
|
|
2947
3329
|
// ownership belongs to AsyncJobManager via onComplete.
|
|
2948
3330
|
if (isDeferredResponse(result)) {
|
|
@@ -3000,7 +3382,14 @@ export function createGatewayServer(deps = {}) {
|
|
|
3000
3382
|
cacheCreationTokens: codexUsage.cacheCreationTokens,
|
|
3001
3383
|
costUsd: codexUsage.costUsd,
|
|
3002
3384
|
}, runtime);
|
|
3003
|
-
|
|
3385
|
+
const codexResponse = buildCliResponse("codex", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
|
|
3386
|
+
if (worktreeResolution.worktreePath) {
|
|
3387
|
+
const first = codexResponse.content[0];
|
|
3388
|
+
if (first && first.type === "text") {
|
|
3389
|
+
first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
|
|
3390
|
+
}
|
|
3391
|
+
}
|
|
3392
|
+
return codexResponse;
|
|
3004
3393
|
}
|
|
3005
3394
|
catch (error) {
|
|
3006
3395
|
const elapsedMs = Math.max(0, Date.now() - startTime);
|
|
@@ -3190,7 +3579,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
3190
3579
|
.boolean()
|
|
3191
3580
|
.default(false)
|
|
3192
3581
|
.describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
|
|
3193
|
-
|
|
3582
|
+
worktree: WORKTREE_SCHEMA.optional(),
|
|
3583
|
+
}, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, worktree, }) => {
|
|
3194
3584
|
return handleGeminiRequest({ sessionManager, logger, runtime }, {
|
|
3195
3585
|
prompt,
|
|
3196
3586
|
promptParts,
|
|
@@ -3215,6 +3605,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3215
3605
|
adminPolicyFiles,
|
|
3216
3606
|
attachments,
|
|
3217
3607
|
skipTrust,
|
|
3608
|
+
worktree,
|
|
3218
3609
|
});
|
|
3219
3610
|
});
|
|
3220
3611
|
//──────────────────────────────────────────────────────────────────────────────
|
|
@@ -3320,7 +3711,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
3320
3711
|
.array(z.string())
|
|
3321
3712
|
.optional()
|
|
3322
3713
|
.describe('Grok --deny <RULE>: permission deny rules. Each entry is emitted as its own --deny instance (per `grok --help`: "Repeat to add multiple rules").'),
|
|
3323
|
-
|
|
3714
|
+
worktree: WORKTREE_SCHEMA.optional(),
|
|
3715
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, worktree, }) => {
|
|
3324
3716
|
return handleGrokRequest({ sessionManager, logger, runtime }, {
|
|
3325
3717
|
prompt,
|
|
3326
3718
|
promptParts,
|
|
@@ -3350,6 +3742,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3350
3742
|
systemPromptOverride,
|
|
3351
3743
|
allow,
|
|
3352
3744
|
deny,
|
|
3745
|
+
worktree,
|
|
3353
3746
|
});
|
|
3354
3747
|
});
|
|
3355
3748
|
//──────────────────────────────────────────────────────────────────────────────
|
|
@@ -3439,7 +3832,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
3439
3832
|
.array(z.string())
|
|
3440
3833
|
.optional()
|
|
3441
3834
|
.describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance (Vibe states this flag may be specified multiple times)."),
|
|
3442
|
-
|
|
3835
|
+
worktree: WORKTREE_SCHEMA.optional(),
|
|
3836
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, workingDir, addDir, worktree, }) => {
|
|
3443
3837
|
return handleMistralRequest({ sessionManager, logger, runtime }, {
|
|
3444
3838
|
prompt,
|
|
3445
3839
|
promptParts,
|
|
@@ -3466,6 +3860,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3466
3860
|
maxPrice,
|
|
3467
3861
|
workingDir,
|
|
3468
3862
|
addDir,
|
|
3863
|
+
worktree,
|
|
3469
3864
|
});
|
|
3470
3865
|
});
|
|
3471
3866
|
//──────────────────────────────────────────────────────────────────────────────
|
|
@@ -3486,15 +3881,15 @@ export function createGatewayServer(deps = {}) {
|
|
|
3486
3881
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
3487
3882
|
.optional()
|
|
3488
3883
|
.describe("Prompt text for Claude (mutually exclusive with promptParts)"),
|
|
3489
|
-
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt
|
|
3884
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task, cacheControl? }. Same semantics as claude_request: stable head (system/tools/context) + volatile tail (task). Set `cacheControl: { system?, tools?, context?: boolean }` to opt into explicit Anthropic prefix caching via `--input-format stream-json` (slice κ); requires `outputFormat: 'stream-json'` and hard-codes `ttl='1h'`. Mutually exclusive with `prompt`. Stable prefix hash logged to flight recorder."),
|
|
3490
3885
|
model: z
|
|
3491
3886
|
.string()
|
|
3492
3887
|
.optional()
|
|
3493
3888
|
.describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
|
|
3494
3889
|
outputFormat: z
|
|
3495
3890
|
.enum(["text", "json", "stream-json"])
|
|
3496
|
-
.default("
|
|
3497
|
-
.describe("Output format (text|json|stream-json). stream-json:
|
|
3891
|
+
.default("stream-json")
|
|
3892
|
+
.describe("Output format (text|json|stream-json). DEFAULT: stream-json — same rationale as claude_request: keeps usage/cache/cost observable for cache_state aggregates. Override to 'text' only when raw stdout is required (loses observability)."),
|
|
3498
3893
|
sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
|
|
3499
3894
|
continueSession: z.boolean().default(false).describe("Continue active session"),
|
|
3500
3895
|
createNewSession: z.boolean().default(false).describe("Force new session"),
|
|
@@ -3566,6 +3961,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3566
3961
|
.array(z.string())
|
|
3567
3962
|
.optional()
|
|
3568
3963
|
.describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
|
|
3964
|
+
worktree: WORKTREE_SCHEMA.optional(),
|
|
3569
3965
|
approvalStrategy: z
|
|
3570
3966
|
.enum(["legacy", "mcp_managed"])
|
|
3571
3967
|
.default("legacy")
|
|
@@ -3595,7 +3991,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3595
3991
|
.boolean()
|
|
3596
3992
|
.default(false)
|
|
3597
3993
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3598
|
-
}, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3994
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3599
3995
|
if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
|
|
3600
3996
|
return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
|
|
3601
3997
|
}
|
|
@@ -3662,6 +4058,15 @@ export function createGatewayServer(deps = {}) {
|
|
|
3662
4058
|
sessionId: effectiveSessionId,
|
|
3663
4059
|
cli: "claude",
|
|
3664
4060
|
});
|
|
4061
|
+
// Slice λ: resolve worktree directive after session metadata is
|
|
4062
|
+
// settled so resume reuse can read metadata.worktreePath.
|
|
4063
|
+
let worktreeResolution = {};
|
|
4064
|
+
try {
|
|
4065
|
+
worktreeResolution = await resolveWorktreeForRequest(worktree, effectiveSessionId, runtime);
|
|
4066
|
+
}
|
|
4067
|
+
catch (err) {
|
|
4068
|
+
return createErrorResponse("claude_request_async", 1, "", corrId, err);
|
|
4069
|
+
}
|
|
3665
4070
|
// Idle timeout only for stream-json (text/json produce no output until done)
|
|
3666
4071
|
const effectiveIdleTimeout = outputFormat === "stream-json"
|
|
3667
4072
|
? resolveIdleTimeout("claude", idleTimeoutMs)
|
|
@@ -3669,7 +4074,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3669
4074
|
assertUpstreamCliArgs("claude", args);
|
|
3670
4075
|
assertUpstreamCliEnv("claude", undefined);
|
|
3671
4076
|
const claudeAsyncFrHandoff = buildAsyncFlightRecorderHandoff("claude", prep, effectiveSessionId, outputFormat);
|
|
3672
|
-
const job = asyncJobManager.startJob("claude", args, corrId,
|
|
4077
|
+
const job = asyncJobManager.startJob("claude", args, corrId, worktreeResolution.cwd, effectiveIdleTimeout, outputFormat, forceRefresh, undefined, undefined, claudeAsyncFrHandoff.flightRecorderEntry, claudeAsyncFrHandoff.extractUsage, true, prep.stdinPayload);
|
|
3673
4078
|
logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
|
|
3674
4079
|
const asyncResponse = {
|
|
3675
4080
|
success: true,
|
|
@@ -3685,8 +4090,17 @@ export function createGatewayServer(deps = {}) {
|
|
|
3685
4090
|
if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
|
|
3686
4091
|
asyncResponse.reviewIntegrity = prep.reviewIntegrity;
|
|
3687
4092
|
}
|
|
3688
|
-
if (
|
|
3689
|
-
asyncResponse.
|
|
4093
|
+
if (worktreeResolution.worktreePath) {
|
|
4094
|
+
asyncResponse.worktreePath = worktreeResolution.worktreePath;
|
|
4095
|
+
}
|
|
4096
|
+
// Rec #4: include any prep-time warnings (e.g.
|
|
4097
|
+
// cacheable_prefix_uncached) alongside ttlWarning.
|
|
4098
|
+
const mergedWarnings = [
|
|
4099
|
+
...(ttlWarning ? [ttlWarning] : []),
|
|
4100
|
+
...(prep.warnings ?? []),
|
|
4101
|
+
];
|
|
4102
|
+
if (mergedWarnings.length > 0) {
|
|
4103
|
+
asyncResponse.warnings = mergedWarnings;
|
|
3690
4104
|
}
|
|
3691
4105
|
return {
|
|
3692
4106
|
content: [
|
|
@@ -3791,7 +4205,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
3791
4205
|
.array(z.string())
|
|
3792
4206
|
.optional()
|
|
3793
4207
|
.describe("Codex --add-dir <DIR>: additional writable workspace directories (repeat per entry). New sessions only."),
|
|
3794
|
-
|
|
4208
|
+
worktree: WORKTREE_SCHEMA.optional(),
|
|
4209
|
+
}, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, workingDir, addDir, worktree, }) => {
|
|
3795
4210
|
return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3796
4211
|
prompt,
|
|
3797
4212
|
promptParts,
|
|
@@ -3822,6 +4237,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3822
4237
|
ignoreRules,
|
|
3823
4238
|
workingDir,
|
|
3824
4239
|
addDir,
|
|
4240
|
+
worktree,
|
|
3825
4241
|
});
|
|
3826
4242
|
});
|
|
3827
4243
|
server.tool("gemini_request_async", {
|
|
@@ -3893,7 +4309,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
3893
4309
|
.boolean()
|
|
3894
4310
|
.default(false)
|
|
3895
4311
|
.describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
|
|
3896
|
-
|
|
4312
|
+
worktree: WORKTREE_SCHEMA.optional(),
|
|
4313
|
+
}, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, worktree, }) => {
|
|
3897
4314
|
return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3898
4315
|
prompt,
|
|
3899
4316
|
promptParts,
|
|
@@ -3917,6 +4334,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3917
4334
|
adminPolicyFiles,
|
|
3918
4335
|
attachments,
|
|
3919
4336
|
skipTrust,
|
|
4337
|
+
worktree,
|
|
3920
4338
|
});
|
|
3921
4339
|
});
|
|
3922
4340
|
server.tool("grok_request_async", {
|
|
@@ -4018,7 +4436,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
4018
4436
|
.array(z.string())
|
|
4019
4437
|
.optional()
|
|
4020
4438
|
.describe("Grok --deny <RULE>: permission deny rules. Each entry → its own --deny instance."),
|
|
4021
|
-
|
|
4439
|
+
worktree: WORKTREE_SCHEMA.optional(),
|
|
4440
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, worktree, }) => {
|
|
4022
4441
|
return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
4023
4442
|
prompt,
|
|
4024
4443
|
promptParts,
|
|
@@ -4047,6 +4466,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
4047
4466
|
systemPromptOverride,
|
|
4048
4467
|
allow,
|
|
4049
4468
|
deny,
|
|
4469
|
+
worktree,
|
|
4050
4470
|
});
|
|
4051
4471
|
});
|
|
4052
4472
|
server.tool("mistral_request_async", {
|
|
@@ -4132,7 +4552,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
4132
4552
|
.array(z.string())
|
|
4133
4553
|
.optional()
|
|
4134
4554
|
.describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance."),
|
|
4135
|
-
|
|
4555
|
+
worktree: WORKTREE_SCHEMA.optional(),
|
|
4556
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, workingDir, addDir, worktree, }) => {
|
|
4136
4557
|
return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
4137
4558
|
prompt,
|
|
4138
4559
|
promptParts,
|
|
@@ -4158,6 +4579,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
4158
4579
|
maxPrice,
|
|
4159
4580
|
workingDir,
|
|
4160
4581
|
addDir,
|
|
4582
|
+
worktree,
|
|
4161
4583
|
});
|
|
4162
4584
|
});
|
|
4163
4585
|
server.tool("llm_job_status", {
|
|
@@ -4673,6 +5095,12 @@ export function createGatewayServer(deps = {}) {
|
|
|
4673
5095
|
//──────────────────────────────────────────────────────────────────────────────
|
|
4674
5096
|
async function initializeSessionManager() {
|
|
4675
5097
|
const config = loadConfig();
|
|
5098
|
+
// Slice λ: file-backed sessions get a cleanup hook that tears down any
|
|
5099
|
+
// git worktrees recorded on session.metadata.worktreePath. PG-backed
|
|
5100
|
+
// sessions skip the hook (multi-tenant deployments don't necessarily
|
|
5101
|
+
// own a single filesystem); revisit if/when worktree support extends
|
|
5102
|
+
// there.
|
|
5103
|
+
const worktreeCleanupHook = createWorktreeSessionCleanupHook(logger);
|
|
4676
5104
|
if (config.database && config.redis) {
|
|
4677
5105
|
logger.info("Initializing PostgreSQL + Redis session manager");
|
|
4678
5106
|
const { createDatabaseConnection } = await import("./db.js");
|
|
@@ -4682,7 +5110,9 @@ async function initializeSessionManager() {
|
|
|
4682
5110
|
}
|
|
4683
5111
|
else {
|
|
4684
5112
|
logger.info("Initializing file-based session manager");
|
|
4685
|
-
sessionManager = await createSessionManager(config, undefined, logger
|
|
5113
|
+
sessionManager = await createSessionManager(config, undefined, logger, {
|
|
5114
|
+
cleanupHook: worktreeCleanupHook,
|
|
5115
|
+
});
|
|
4686
5116
|
logger.info("File-based session manager initialized");
|
|
4687
5117
|
}
|
|
4688
5118
|
resourceProvider = new ResourceProvider(sessionManager, performanceMetrics, getFlightRecorder(logger), getCacheAwarenessConfig(logger));
|