llm-cli-gateway 1.13.2 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -13,10 +13,11 @@ import { parseGeminiJson, parseGeminiStreamJson } from "./gemini-json-parser.js"
13
13
  import { parseVibeMetaJson } from "./mistral-meta-json-parser.js";
14
14
  import { homedir } from "os";
15
15
  import { createSessionManager } from "./session-manager.js";
16
+ import { createWorktree, createWorktreeSessionCleanupHook, } from "./worktree-manager.js";
16
17
  import { ResourceProvider } from "./resources.js";
17
18
  import { PerformanceMetrics } from "./metrics.js";
18
19
  import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse as optimizeResponseText, } from "./optimizer.js";
19
- import { loadConfig, loadPersistenceConfig, loadCacheAwarenessConfig, } from "./config.js";
20
+ import { loadConfig, loadPersistenceConfig, loadCacheAwarenessConfig, minStableTokensForModel, } from "./config.js";
20
21
  import { checkHealth } from "./health.js";
21
22
  import { clearModelRegistryCache, getAvailableCliInfo, getCliInfo, resolveModelAlias, } from "./model-registry.js";
22
23
  import { AsyncJobManager, } from "./async-job-manager.js";
@@ -26,7 +27,7 @@ import { checkReviewIntegrity } from "./review-integrity.js";
26
27
  import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES, } from "./claude-mcp-config.js";
27
28
  import { resolveGrokSessionArgs, resolveMistralSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, prepareMistralRequest as buildMistralCliInvocation, MISTRAL_AGENT_MODES, GATEWAY_SESSION_PREFIX, resolveClaudePermissionFlags, resolveCodexSandboxFlags, CLAUDE_PERMISSION_MODES, GEMINI_APPROVAL_MODES, CODEX_SANDBOX_MODES, CODEX_ASK_FOR_APPROVAL_MODES, CLAUDE_EFFORT_LEVELS, prepareClaudeHighImpactFlags, validateClaudeAgentsMap, prepareCodexHighImpactFlags, prepareCodexForkRequest, CODEX_CONFIG_OVERRIDES_SCHEMA, prepareGeminiHighImpactFlags, prependGeminiAttachments, resolveGeminiSessionPlan, GEMINI_HIGH_IMPACT_PARAMS_SCHEMA, } from "./request-helpers.js";
28
29
  import { createFlightRecorder } from "./flight-recorder.js";
29
- import { resolvePromptInput, PromptPartsSchema } from "./prompt-parts.js";
30
+ import { resolvePromptInput, PromptPartsSchema, assembleClaudeCacheBlocks, } from "./prompt-parts.js";
30
31
  import { computeSessionCacheStats, computeTtlRemaining } from "./cache-stats.js";
31
32
  import { getCliVersions, runCliUpgrade } from "./cli-updater.js";
32
33
  import { startHttpGateway } from "./http-transport.js";
@@ -246,6 +247,50 @@ export const MAX_TURNS_SCHEMA = z.number().int().positive().safe().max(10_000);
246
247
  // upstream CLIs would reject. 1µUSD per request is fine-grained enough
247
248
  // for any plausible budget-cap use.
248
249
  export const MAX_PRICE_SCHEMA = z.number().positive().finite().min(1e-6).max(10_000);
250
+ /**
251
+ * Slice λ: shared worktree directive for all 10 `*_request` / `*_request_async`
252
+ * tools. `true` creates a fresh worktree under `<repoRoot>/.worktrees/<uuid>`
253
+ * branched from HEAD. `{ name?, ref? }` lets the caller supply a sanitized
254
+ * name and/or git ref (default ref: HEAD).
255
+ *
256
+ * Lifecycle is gateway-owned: the gateway pre-creates the worktree via
257
+ * `git worktree add`, then spawns the child CLI with `cwd: <worktree-path>`.
258
+ * No `-w` / `--worktree` flag is ever emitted to the underlying CLI. When
259
+ * the request carries a sessionId and the session already has a worktree,
260
+ * that worktree is reused. On session_delete or TTL eviction the gateway
261
+ * runs `git worktree remove --force`.
262
+ *
263
+ * Tool response: when a worktree was used, the successful response stdout
264
+ * is prefixed with `[gateway] worktree=<absolute-path>\n` so callers can
265
+ * parse/use the path without a schema change (slice λ §1.d).
266
+ *
267
+ * NOTE: callers should `.gitignore` the `.worktrees/` directory in their
268
+ * repo (the gateway does NOT auto-gitignore — see slice λ spec Q4).
269
+ */
270
+ export const WORKTREE_SCHEMA = z
271
+ .union([
272
+ z.boolean(),
273
+ z
274
+ .object({
275
+ name: z.string().min(1).max(64).optional(),
276
+ ref: z.string().min(1).max(255).optional(),
277
+ })
278
+ .strict(),
279
+ ])
280
+ .describe("Slice λ: run this request inside a dedicated git worktree owned by " +
281
+ "the gateway. `true` creates a fresh worktree at " +
282
+ "`<repoRoot>/.worktrees/<uuid>` branched from HEAD. " +
283
+ "`{ name?, ref? }` lets the caller supply a sanitized name and/or a " +
284
+ "git ref (default: HEAD). When the request carries a sessionId and " +
285
+ "the session already has a worktree, that worktree is reused. The " +
286
+ "gateway spawns the child CLI with `cwd: <worktree-path>` — no " +
287
+ "`-w`/`--worktree` flag is ever emitted to the underlying CLI. On " +
288
+ "session_delete or TTL eviction the gateway runs `git worktree " +
289
+ "remove --force`. Successful responses are prefixed with " +
290
+ "`[gateway] worktree=<absolute-path>\\n` so callers can use the " +
291
+ "path. NOTE: callers should `.gitignore` the `.worktrees/` " +
292
+ "directory in their repo (the gateway does NOT auto-gitignore — " +
293
+ "see slice λ spec Q4).");
249
294
  // U22: Session-provider enum extended to five providers. The storage layer's
250
295
  // CLI_TYPES already includes "mistral"; the MCP-tool layer mirrors that here so
251
296
  // session_create / session_list / session_clear_all accept the fifth provider.
@@ -253,7 +298,7 @@ export const SESSION_PROVIDER_VALUES = ["claude", "codex", "gemini", "grok", "mi
253
298
  export const SESSION_PROVIDER_ENUM = z.enum(SESSION_PROVIDER_VALUES);
254
299
  let activeServer = null;
255
300
  let activeHttpGateway = null;
256
- function resolveGatewayServerRuntime(deps = {}, options = {}) {
301
+ export function resolveGatewayServerRuntime(deps = {}, options = {}) {
257
302
  const runtimeLogger = deps.logger ?? logger;
258
303
  const runtimeSessionManager = deps.sessionManager ?? sessionManager;
259
304
  const runtimePerformanceMetrics = deps.performanceMetrics ??
@@ -316,7 +361,24 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, f
316
361
  * `writeFlightStart` is NEVER true on this path: the sync handler is
317
362
  * always the upstream logStart writer.
318
363
  */
319
- flightRecorderEntry, extractUsage) {
364
+ flightRecorderEntry, extractUsage,
365
+ /**
366
+ * Slice κ: optional stdin payload piped to the child CLI. Currently
367
+ * only Claude's `--input-format stream-json` path sets this. Threaded
368
+ * through both the direct-execute fallback (SYNC_DEADLINE_MS===0) and
369
+ * the AsyncJobManager spawn path, and participates in the dedup key.
370
+ */
371
+ stdin,
372
+ /**
373
+ * Slice λ: optional working directory for the spawned child process,
374
+ * derived from a gateway-owned git worktree. Threaded to both the
375
+ * direct-execute fallback (`executeCli({ cwd })`) and the
376
+ * AsyncJobManager dedup-aware spawn path
377
+ * (`startJobWithDedup({ cwd })`). `cwd` also participates in the
378
+ * dedup key (see async-job-manager.buildRequestKey) so two requests
379
+ * with identical argv in different worktrees do not collide.
380
+ */
381
+ cwd) {
320
382
  // U26 fix: ownership of onComplete is a contract. Once this function returns
321
383
  // OR throws, the caller MUST consider onComplete consumed — i.e. it has
322
384
  // either been run, or the AsyncJobManager has taken ownership of it. The
@@ -350,6 +412,8 @@ flightRecorderEntry, extractUsage) {
350
412
  idleTimeout: idleTimeoutMs,
351
413
  logger: runtime.logger,
352
414
  env: env ? { ...process.env, ...env } : undefined,
415
+ stdin,
416
+ cwd,
353
417
  });
354
418
  }
355
419
  finally {
@@ -361,10 +425,12 @@ flightRecorderEntry, extractUsage) {
361
425
  let outcome;
362
426
  try {
363
427
  outcome = runtime.asyncJobManager.startJobWithDedup(cli, args, corrId, {
428
+ cwd,
364
429
  idleTimeoutMs,
365
430
  outputFormat,
366
431
  forceRefresh,
367
432
  env,
433
+ stdin,
368
434
  onComplete,
369
435
  // Sync-deferred path: the upstream sync handler already wrote
370
436
  // logStart for this corrId, so writeFlightStart stays false. The
@@ -446,6 +512,73 @@ function buildDeferredToolResponse(deferred, sessionId) {
446
512
  ],
447
513
  };
448
514
  }
515
+ /**
516
+ * Slice λ: resolve a request's worktree directive into a spawn cwd.
517
+ *
518
+ * - `worktreeOpt` is the Zod-validated input value (boolean |
519
+ * `{ name?, ref? }` | undefined).
520
+ * - When the request has a session AND the session already has a
521
+ * `metadata.worktreePath`, that path is reused (resume semantics).
522
+ * The reused path is returned without touching git; if the directory
523
+ * was externally removed between requests, the next CLI invocation
524
+ * will surface the error naturally.
525
+ * - When no reusable worktree exists, `createWorktree` runs; on success
526
+ * the new path is written to `session.metadata` (only when a session
527
+ * exists — request-scoped worktrees do NOT persist).
528
+ * - Returns `{}` when `worktreeOpt` is undefined/false (preserves
529
+ * pre-λ behaviour at non-worktree call sites).
530
+ * - Errors propagate as `WorktreeError`/`Error`; the caller wraps them
531
+ * in a `createErrorResponse` envelope. Do NOT swallow.
532
+ *
533
+ * Spec: docs/plans/slice-lambda.spec.md §"Implementation surface to
534
+ * verify" §5.
535
+ */
536
+ export async function resolveWorktreeForRequest(worktreeOpt, sessionId, runtime) {
537
+ if (!worktreeOpt)
538
+ return {};
539
+ const sessionManager = runtime.sessionManager;
540
+ if (sessionId) {
541
+ const session = await Promise.resolve(sessionManager.getSession(sessionId));
542
+ const existingPath = session?.metadata?.worktreePath;
543
+ if (typeof existingPath === "string" && existingPath.length > 0) {
544
+ return { cwd: existingPath, worktreePath: existingPath };
545
+ }
546
+ }
547
+ const name = worktreeOpt === true ? undefined : worktreeOpt.name;
548
+ const ref = worktreeOpt === true ? undefined : worktreeOpt.ref;
549
+ const repoRoot = process.cwd();
550
+ const handle = await createWorktree({
551
+ repoRoot,
552
+ name,
553
+ ref,
554
+ logger: runtime.logger,
555
+ });
556
+ if (sessionId) {
557
+ await Promise.resolve(sessionManager.updateSessionMetadata(sessionId, {
558
+ worktreePath: handle.path,
559
+ worktreeName: handle.name,
560
+ }));
561
+ }
562
+ return { cwd: handle.path, worktreePath: handle.path };
563
+ }
564
+ /**
565
+ * Slice λ §1.d: response-envelope shape decision for `worktreePath`.
566
+ *
567
+ * We surface the worktree path inline as a stdout prefix
568
+ * (`[gateway] worktree=<absolute-path>\n`) rather than as a
569
+ * structuredContent field or JSON wrapper. Rationale:
570
+ * - zero schema change across all 10 tools and their downstream parsers
571
+ * - matches how other slice features (session warnings, cache_state
572
+ * aggregates) surface side-channel metadata today
573
+ * - callers that want the path can split on the first newline; callers
574
+ * that don't care see a single ignorable header line
575
+ *
576
+ * Use `formatWorktreePrefix(resolution.worktreePath)` once per tool, at
577
+ * the moment a successful response is constructed.
578
+ */
579
+ export function formatWorktreePrefix(worktreePath) {
580
+ return worktreePath ? `[gateway] worktree=${worktreePath}\n` : "";
581
+ }
449
582
  // Helper function for standardized error responses
450
583
  function createErrorResponse(cli, code, stderr, correlationId, error) {
451
584
  let errorMessage = `Error executing ${cli} CLI`;
@@ -575,6 +708,7 @@ function buildAsyncFlightRecorderHandoff(cliName, prep, sessionId, outputFormat)
575
708
  sessionId,
576
709
  stablePrefixHash: prep.stablePrefixHash ?? undefined,
577
710
  stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
711
+ cacheControlBlocks: prep.cacheControlBlocks,
578
712
  },
579
713
  extractUsage: (stdout) => extractUsageAndCost(cli, stdout, fmt, { sessionId: sid, home }),
580
714
  };
@@ -919,6 +1053,19 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
919
1053
  score: reviewIntegrity.totalScore,
920
1054
  });
921
1055
  }
1056
+ // Rec #5 (slice κ): refuse the optimizePrompt + cacheControl combo
1057
+ // before running optimization. Optimization rewrites the assembled
1058
+ // prompt text the flight-recorder logs, but the κ stdin payload is
1059
+ // built from raw `promptParts` content blocks — letting both run
1060
+ // produces a FR row whose `prompt` no longer matches what Claude
1061
+ // actually received, AND any optimisation-driven text change would
1062
+ // silently break Anthropic prefix-cache reuse on the next call.
1063
+ const ccEarly = params.promptParts?.cacheControl;
1064
+ const cacheControlRequestedEarly = !!(ccEarly &&
1065
+ (ccEarly.system || ccEarly.tools || ccEarly.context));
1066
+ if (params.optimizePrompt && cacheControlRequestedEarly) {
1067
+ return createErrorResponse(params.operation, 1, "", corrId, new Error("optimizePrompt is incompatible with promptParts.cacheControl (slice κ): optimization rewrites the assembled prompt text the flight recorder logs, while the cache_control payload is built from raw promptParts; the two would desync and break Anthropic prefix-cache reuse. Disable optimizePrompt when opting into cacheControl."));
1068
+ }
922
1069
  let effectivePrompt = assembledPrompt;
923
1070
  if (params.optimizePrompt) {
924
1071
  const optimized = optimizePromptText(effectivePrompt);
@@ -950,19 +1097,127 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
950
1097
  return createApprovalDeniedResponse(params.operation, approvalDecision);
951
1098
  }
952
1099
  }
953
- const args = ["-p", effectivePrompt];
1100
+ // Rec #2 (slice κ): auto-emit `cache_control` when the caller passes
1101
+ // `promptParts` whose stable prefix exceeds the per-model minimum,
1102
+ // the caller has NOT explicitly set `cacheControl`, the gateway
1103
+ // config has opted in (`[cache_awareness].emit_anthropic_cache_control`),
1104
+ // and outputFormat is stream-json. Auto-emit marks the LAST non-empty
1105
+ // stable block (context → tools → system priority — the rightmost
1106
+ // stable block covers the widest prefix). Skipped when optimizePrompt
1107
+ // is on (same rec #5 desync risk).
1108
+ //
1109
+ // The 1h ttl is forced regardless of `anthropic_ttl_seconds`: 5m
1110
+ // breakpoints from caller content are rejected by Anthropic once
1111
+ // Claude Code's own 1h-marked session-wrap blocks land ahead of them.
1112
+ let autoEmittedCacheControlBlock = null;
1113
+ if (!cacheControlRequestedEarly &&
1114
+ runtime.cacheAwareness.emitAnthropicCacheControl &&
1115
+ !params.optimizePrompt &&
1116
+ params.outputFormat === "stream-json" &&
1117
+ params.promptParts &&
1118
+ stablePrefixTokens !== null) {
1119
+ const threshold = minStableTokensForModel(runtime.cacheAwareness, resolvedModel ?? "default");
1120
+ if (stablePrefixTokens >= threshold) {
1121
+ const pp = params.promptParts;
1122
+ // Rightmost non-empty stable block — its cache_control breakpoint
1123
+ // covers everything above it in the message (the API matches
1124
+ // breakpoints in order).
1125
+ if (pp.context && pp.context.length > 0)
1126
+ autoEmittedCacheControlBlock = "context";
1127
+ else if (pp.tools && pp.tools.length > 0)
1128
+ autoEmittedCacheControlBlock = "tools";
1129
+ else if (pp.system && pp.system.length > 0)
1130
+ autoEmittedCacheControlBlock = "system";
1131
+ if (autoEmittedCacheControlBlock !== null) {
1132
+ runtime.logger.info(`[${corrId}] auto-emitting cache_control on '${autoEmittedCacheControlBlock}' (stablePrefixTokens=${stablePrefixTokens} >= ${threshold} for model='${resolvedModel ?? "default"}')`);
1133
+ if (runtime.cacheAwareness.anthropicTtlSeconds !== 3600) {
1134
+ runtime.logger.warn(`[${corrId}] [cache_awareness].anthropic_ttl_seconds=${runtime.cacheAwareness.anthropicTtlSeconds} ignored for Claude CLI path — Anthropic rejects 5m blocks after Claude Code's 1h-marked session-wrap content; using ttl='1h'.`);
1135
+ }
1136
+ }
1137
+ }
1138
+ }
1139
+ // Rec #4: warn when promptParts has a cacheable stable prefix but no
1140
+ // cache_control breakpoint is being emitted (neither explicit nor
1141
+ // auto). Either the caller forgot to set `cacheControl` or
1142
+ // `[cache_awareness].emit_anthropic_cache_control` is off — both
1143
+ // leave the stable prefix bytes unreused across calls, defeating the
1144
+ // point of using `promptParts`.
1145
+ const warnings = [];
1146
+ if (!cacheControlRequestedEarly &&
1147
+ autoEmittedCacheControlBlock === null &&
1148
+ params.promptParts &&
1149
+ stablePrefixTokens !== null) {
1150
+ const threshold = minStableTokensForModel(runtime.cacheAwareness, resolvedModel ?? "default");
1151
+ if (stablePrefixTokens >= threshold) {
1152
+ const reason = params.outputFormat !== "stream-json"
1153
+ ? "outputFormat is not 'stream-json'"
1154
+ : !runtime.cacheAwareness.emitAnthropicCacheControl
1155
+ ? "[cache_awareness].emit_anthropic_cache_control is false"
1156
+ : "no eligible non-empty stable block";
1157
+ warnings.push({
1158
+ code: "cacheable_prefix_uncached",
1159
+ message: `Stable prefix is cacheable (${stablePrefixTokens} tokens >= ${threshold} for model='${resolvedModel ?? "default"}') but no cache_control breakpoint will be emitted (${reason}). Set promptParts.cacheControl explicitly, switch outputFormat to 'stream-json', or enable [cache_awareness].emit_anthropic_cache_control.`,
1160
+ stablePrefixTokens,
1161
+ threshold,
1162
+ reason,
1163
+ });
1164
+ }
1165
+ }
1166
+ // Slice κ: switch from the legacy positional `-p <prompt>` emission
1167
+ // to `claude -p --input-format stream-json` and feed a JSON
1168
+ // content-blocks payload via stdin. Non-κ callers (no cacheControl,
1169
+ // or cacheControl with all flags false) take the existing positional
1170
+ // path bit-for-bit. The κ path activates on EITHER an explicit caller
1171
+ // opt-in (`cacheControlRequestedEarly`) OR a gateway-driven auto-emit
1172
+ // (`autoEmittedCacheControlBlock`).
1173
+ const cacheControlRequested = cacheControlRequestedEarly || autoEmittedCacheControlBlock !== null;
1174
+ let stdinPayload;
1175
+ let cacheControlBlocks;
1176
+ if (cacheControlRequested) {
1177
+ if (params.outputFormat !== "stream-json") {
1178
+ return createErrorResponse(params.operation, 1, "", corrId, new Error("promptParts.cacheControl requires outputFormat: 'stream-json' (slice κ pipes the cache_control blocks over --input-format stream-json; text/json output formats cannot carry the required NDJSON usage events)."));
1179
+ }
1180
+ // promptParts is non-null whenever cacheControlRequested is true
1181
+ // (explicit opt-in lives in PromptParts; auto-emit guard requires
1182
+ // promptParts to be defined).
1183
+ const effectiveParts = autoEmittedCacheControlBlock !== null
1184
+ ? {
1185
+ ...params.promptParts,
1186
+ cacheControl: {
1187
+ ...(params.promptParts.cacheControl ?? {}),
1188
+ [autoEmittedCacheControlBlock]: true,
1189
+ },
1190
+ }
1191
+ : params.promptParts;
1192
+ const built = assembleClaudeCacheBlocks(effectiveParts);
1193
+ stdinPayload = `${JSON.stringify(built.payload)}\n`;
1194
+ cacheControlBlocks = built.markedBlockCount;
1195
+ }
1196
+ const args = cacheControlRequested
1197
+ ? [
1198
+ "-p",
1199
+ "--input-format",
1200
+ "stream-json",
1201
+ "--output-format",
1202
+ "stream-json",
1203
+ "--include-partial-messages",
1204
+ "--verbose",
1205
+ ]
1206
+ : ["-p", effectivePrompt];
954
1207
  if (resolvedModel)
955
1208
  args.push("--model", resolvedModel);
956
- if (params.outputFormat === "json") {
957
- args.push("--output-format", "json");
958
- }
959
- else if (params.outputFormat === "stream-json") {
960
- // Claude CLI 2.x rejects `--print --output-format stream-json` without
961
- // `--verbose`: "When using --print, --output-format=stream-json requires
962
- // --verbose". --verbose only affects what claude logs to stderr; the
963
- // stream-json stdout payload is unchanged, so the gateway's NDJSON
964
- // parser is unaffected.
965
- args.push("--output-format", "stream-json", "--include-partial-messages", "--verbose");
1209
+ if (!cacheControlRequested) {
1210
+ if (params.outputFormat === "json") {
1211
+ args.push("--output-format", "json");
1212
+ }
1213
+ else if (params.outputFormat === "stream-json") {
1214
+ // Claude CLI 2.x rejects `--print --output-format stream-json` without
1215
+ // `--verbose`: "When using --print, --output-format=stream-json requires
1216
+ // --verbose". --verbose only affects what claude logs to stderr; the
1217
+ // stream-json stdout payload is unchanged, so the gateway's NDJSON
1218
+ // parser is unaffected.
1219
+ args.push("--output-format", "stream-json", "--include-partial-messages", "--verbose");
1220
+ }
966
1221
  }
967
1222
  if (params.allowedTools && params.allowedTools.length > 0) {
968
1223
  sanitizeCliArgValues(params.allowedTools, "allowedTools");
@@ -1025,6 +1280,9 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1025
1280
  args,
1026
1281
  stablePrefixHash,
1027
1282
  stablePrefixTokens,
1283
+ stdinPayload,
1284
+ cacheControlBlocks,
1285
+ warnings: warnings.length > 0 ? warnings : undefined,
1028
1286
  };
1029
1287
  }
1030
1288
  export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntime()) {
@@ -1722,8 +1980,15 @@ export async function handleGeminiRequest(deps, params) {
1722
1980
  args.push(...sessionPlan.args);
1723
1981
  const userProvidedSession = sessionPlan.resumed;
1724
1982
  const effectiveSessionIdHint = sessionPlan.resumed ? params.sessionId : undefined;
1983
+ let worktreeResolution = {};
1984
+ try {
1985
+ worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionIdHint, runtime);
1986
+ }
1987
+ catch (err) {
1988
+ return createErrorResponse("gemini_request", 1, "", corrId, err);
1989
+ }
1725
1990
  const geminiFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, params.sessionId, params.outputFormat);
1726
- const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, geminiFrHandoff.flightRecorderEntry, geminiFrHandoff.extractUsage);
1991
+ const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, geminiFrHandoff.flightRecorderEntry, geminiFrHandoff.extractUsage, worktreeResolution.cwd);
1727
1992
  // Deferred — job still running, return async reference
1728
1993
  if (isDeferredResponse(result)) {
1729
1994
  return buildDeferredToolResponse(result, effectiveSessionIdHint);
@@ -1765,6 +2030,12 @@ export async function handleGeminiRequest(deps, params) {
1765
2030
  }
1766
2031
  deps.logger.info(`[${corrId}] gemini_request completed successfully in ${durationMs}ms`);
1767
2032
  const response = buildCliResponse("gemini", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, userProvidedSession, params.outputFormat);
2033
+ if (worktreeResolution.worktreePath) {
2034
+ const first = response.content[0];
2035
+ if (first && first.type === "text") {
2036
+ first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
2037
+ }
2038
+ }
1768
2039
  const geminiUsage = extractUsageAndCost("gemini", stdout, params.outputFormat);
1769
2040
  safeFlightComplete(corrId, {
1770
2041
  response: stdout,
@@ -1852,6 +2123,13 @@ export async function handleGeminiRequestAsync(deps, params) {
1852
2123
  }
1853
2124
  await deps.sessionManager.updateSessionUsage(effectiveSessionId);
1854
2125
  }
2126
+ let worktreeResolution = {};
2127
+ try {
2128
+ worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
2129
+ }
2130
+ catch (err) {
2131
+ return createErrorResponse("gemini_request_async", 1, "", corrId, err);
2132
+ }
1855
2133
  // Start job only after all session I/O succeeds. U23: forward outputFormat
1856
2134
  // so AsyncJobManager records it in the durable store (the manager also
1857
2135
  // surfaces it in the snapshot).
@@ -1860,7 +2138,7 @@ export async function handleGeminiRequestAsync(deps, params) {
1860
2138
  // Slice 1.5: pure async path — no upstream safeFlightStart, so the
1861
2139
  // manager owns both logStart and logComplete for this corrId.
1862
2140
  const geminiAsyncFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, effectiveSessionId, params.outputFormat);
1863
- const job = deps.asyncJobManager.startJob("gemini", args, corrId, undefined, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, geminiAsyncFrHandoff.flightRecorderEntry, geminiAsyncFrHandoff.extractUsage, true);
2141
+ const job = deps.asyncJobManager.startJob("gemini", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, geminiAsyncFrHandoff.flightRecorderEntry, geminiAsyncFrHandoff.extractUsage, true);
1864
2142
  deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
1865
2143
  const asyncResponse = {
1866
2144
  success: true,
@@ -1873,6 +2151,9 @@ export async function handleGeminiRequestAsync(deps, params) {
1873
2151
  if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
1874
2152
  asyncResponse.reviewIntegrity = prep.reviewIntegrity;
1875
2153
  }
2154
+ if (worktreeResolution.worktreePath) {
2155
+ asyncResponse.worktreePath = worktreeResolution.worktreePath;
2156
+ }
1876
2157
  return {
1877
2158
  content: [
1878
2159
  {
@@ -1937,8 +2218,15 @@ export async function handleGrokRequest(deps, params) {
1937
2218
  createNewSession: params.createNewSession,
1938
2219
  });
1939
2220
  args.push(...sessionResult.resumeArgs);
2221
+ let worktreeResolution = {};
2222
+ try {
2223
+ worktreeResolution = await resolveWorktreeForRequest(params.worktree, sessionResult.effectiveSessionId, runtime);
2224
+ }
2225
+ catch (err) {
2226
+ return createErrorResponse("grok_request", 1, "", corrId, err);
2227
+ }
1940
2228
  const grokFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, params.sessionId, params.outputFormat);
1941
- const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, grokFrHandoff.flightRecorderEntry, grokFrHandoff.extractUsage);
2229
+ const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, grokFrHandoff.flightRecorderEntry, grokFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
1942
2230
  // Deferred — job still running, return async reference
1943
2231
  if (isDeferredResponse(result)) {
1944
2232
  return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
@@ -1982,6 +2270,12 @@ export async function handleGrokRequest(deps, params) {
1982
2270
  }
1983
2271
  deps.logger.info(`[${corrId}] grok_request completed successfully in ${durationMs}ms`);
1984
2272
  const response = buildCliResponse("grok", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession, params.outputFormat);
2273
+ if (worktreeResolution.worktreePath) {
2274
+ const first = response.content[0];
2275
+ if (first && first.type === "text") {
2276
+ first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
2277
+ }
2278
+ }
1985
2279
  safeFlightComplete(corrId, {
1986
2280
  response: stdout,
1987
2281
  durationMs,
@@ -2072,11 +2366,18 @@ export async function handleGrokRequestAsync(deps, params) {
2072
2366
  const newSession = await deps.sessionManager.createSession("grok", "Grok Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
2073
2367
  effectiveSessionId = newSession.id;
2074
2368
  }
2369
+ let worktreeResolution = {};
2370
+ try {
2371
+ worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
2372
+ }
2373
+ catch (err) {
2374
+ return createErrorResponse("grok_request_async", 1, "", corrId, err);
2375
+ }
2075
2376
  // Start job only after all session I/O succeeds
2076
2377
  assertUpstreamCliArgs("grok", args);
2077
2378
  assertUpstreamCliEnv("grok", undefined);
2078
2379
  const grokAsyncFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, effectiveSessionId, params.outputFormat);
2079
- const job = deps.asyncJobManager.startJob("grok", args, corrId, undefined, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, grokAsyncFrHandoff.flightRecorderEntry, grokAsyncFrHandoff.extractUsage, true);
2380
+ const job = deps.asyncJobManager.startJob("grok", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, grokAsyncFrHandoff.flightRecorderEntry, grokAsyncFrHandoff.extractUsage, true);
2080
2381
  deps.logger.info(`[${corrId}] grok_request_async started job ${job.id}`);
2081
2382
  const asyncResponse = {
2082
2383
  success: true,
@@ -2089,6 +2390,9 @@ export async function handleGrokRequestAsync(deps, params) {
2089
2390
  if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
2090
2391
  asyncResponse.reviewIntegrity = prep.reviewIntegrity;
2091
2392
  }
2393
+ if (worktreeResolution.worktreePath) {
2394
+ asyncResponse.worktreePath = worktreeResolution.worktreePath;
2395
+ }
2092
2396
  return {
2093
2397
  content: [
2094
2398
  {
@@ -2149,8 +2453,15 @@ export async function handleMistralRequest(deps, params) {
2149
2453
  createNewSession: params.createNewSession,
2150
2454
  });
2151
2455
  args.push(...sessionResult.resumeArgs);
2456
+ let worktreeResolution = {};
2457
+ try {
2458
+ worktreeResolution = await resolveWorktreeForRequest(params.worktree, sessionResult.effectiveSessionId, runtime);
2459
+ }
2460
+ catch (err) {
2461
+ return createErrorResponse("mistral_request", 1, "", corrId, err);
2462
+ }
2152
2463
  const mistralFrHandoff = buildAsyncFlightRecorderHandoff("mistral", prep, params.sessionId, params.outputFormat);
2153
- let result = await awaitJobOrDefer("mistral", args, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, mistralEnv, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage);
2464
+ let result = await awaitJobOrDefer("mistral", args, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, mistralEnv, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
2154
2465
  if (isDeferredResponse(result)) {
2155
2466
  return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
2156
2467
  }
@@ -2162,7 +2473,7 @@ export async function handleMistralRequest(deps, params) {
2162
2473
  const retryArgs = [...retryPrep.args, ...sessionResult.resumeArgs];
2163
2474
  // Reuse the FR handoff built above — the retry preserves corrId,
2164
2475
  // so the manager's logComplete still updates the original row.
2165
- result = await awaitJobOrDefer("mistral", retryArgs, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, true, runtime, retryPrep.env, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage);
2476
+ result = await awaitJobOrDefer("mistral", retryArgs, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, true, runtime, retryPrep.env, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
2166
2477
  if (isDeferredResponse(result)) {
2167
2478
  return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
2168
2479
  }
@@ -2208,6 +2519,12 @@ export async function handleMistralRequest(deps, params) {
2208
2519
  }
2209
2520
  deps.logger.info(`[${corrId}] mistral_request completed successfully in ${durationMs}ms`);
2210
2521
  const response = buildCliResponse("mistral", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession, params.outputFormat);
2522
+ if (worktreeResolution.worktreePath) {
2523
+ const first = response.content[0];
2524
+ if (first && first.type === "text") {
2525
+ first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
2526
+ }
2527
+ }
2211
2528
  safeFlightComplete(corrId, {
2212
2529
  response: stdout,
2213
2530
  durationMs,
@@ -2293,10 +2610,17 @@ export async function handleMistralRequestAsync(deps, params) {
2293
2610
  const newSession = await deps.sessionManager.createSession("mistral", "Mistral Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
2294
2611
  effectiveSessionId = newSession.id;
2295
2612
  }
2613
+ let worktreeResolution = {};
2614
+ try {
2615
+ worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
2616
+ }
2617
+ catch (err) {
2618
+ return createErrorResponse("mistral_request_async", 1, "", corrId, err);
2619
+ }
2296
2620
  assertUpstreamCliArgs("mistral", args);
2297
2621
  assertUpstreamCliEnv("mistral", mistralEnv);
2298
2622
  const mistralAsyncFrHandoff = buildAsyncFlightRecorderHandoff("mistral", prep, effectiveSessionId, params.outputFormat);
2299
- const job = deps.asyncJobManager.startJob("mistral", args, corrId, undefined, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, mistralEnv, undefined, mistralAsyncFrHandoff.flightRecorderEntry, mistralAsyncFrHandoff.extractUsage, true);
2623
+ const job = deps.asyncJobManager.startJob("mistral", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, mistralEnv, undefined, mistralAsyncFrHandoff.flightRecorderEntry, mistralAsyncFrHandoff.extractUsage, true);
2300
2624
  deps.logger.info(`[${corrId}] mistral_request_async started job ${job.id}`);
2301
2625
  const asyncResponse = {
2302
2626
  success: true,
@@ -2309,6 +2633,9 @@ export async function handleMistralRequestAsync(deps, params) {
2309
2633
  if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
2310
2634
  asyncResponse.reviewIntegrity = prep.reviewIntegrity;
2311
2635
  }
2636
+ if (worktreeResolution.worktreePath) {
2637
+ asyncResponse.worktreePath = worktreeResolution.worktreePath;
2638
+ }
2312
2639
  return {
2313
2640
  content: [
2314
2641
  {
@@ -2395,6 +2722,17 @@ export async function handleCodexRequestAsync(deps, params) {
2395
2722
  const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
2396
2723
  effectiveSessionId = newSession.id;
2397
2724
  }
2725
+ // Slice λ: resolve worktree directive after session I/O so resume reuse
2726
+ // can read metadata.worktreePath. A pre-startJob failure here means
2727
+ // prepCleanup is still owned locally; run it before returning.
2728
+ let worktreeResolution = {};
2729
+ try {
2730
+ worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
2731
+ }
2732
+ catch (err) {
2733
+ runPrepCleanupLocally();
2734
+ return createErrorResponse("codex_request_async", 1, "", corrId, err);
2735
+ }
2398
2736
  // Start job only after all session I/O succeeds. If startJob throws before
2399
2737
  // registering the record, ownership stays here and we run it in the catch.
2400
2738
  assertUpstreamCliArgs("codex", args);
@@ -2402,7 +2740,7 @@ export async function handleCodexRequestAsync(deps, params) {
2402
2740
  const codexAsyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, effectiveSessionId, params.outputFormat);
2403
2741
  let job;
2404
2742
  try {
2405
- job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup, codexAsyncFrHandoff.flightRecorderEntry, codexAsyncFrHandoff.extractUsage, true);
2743
+ job = deps.asyncJobManager.startJob("codex", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup, codexAsyncFrHandoff.flightRecorderEntry, codexAsyncFrHandoff.extractUsage, true);
2406
2744
  // Handoff succeeded: AsyncJobManager will fire prepCleanup on terminal
2407
2745
  // status. Release our local ownership claim so the catch path doesn't
2408
2746
  // double-fire.
@@ -2424,6 +2762,9 @@ export async function handleCodexRequestAsync(deps, params) {
2424
2762
  if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
2425
2763
  asyncResponse.reviewIntegrity = prep.reviewIntegrity;
2426
2764
  }
2765
+ if (worktreeResolution.worktreePath) {
2766
+ asyncResponse.worktreePath = worktreeResolution.worktreePath;
2767
+ }
2427
2768
  return {
2428
2769
  content: [
2429
2770
  {
@@ -2481,15 +2822,15 @@ export function createGatewayServer(deps = {}) {
2481
2822
  .max(100000, "Prompt too long (max 100k chars)")
2482
2823
  .optional()
2483
2824
  .describe("Prompt text for Claude (mutually exclusive with promptParts)"),
2484
- promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
2825
+ promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task, cacheControl? }. Use for repeated calls that share a stable prefix — `system`/`tools`/`context` are the stable head; `task` is the volatile tail (never marked). Set `cacheControl: { system?: boolean, tools?: boolean, context?: boolean }` to opt into explicit Anthropic prefix caching via `--input-format stream-json` (slice κ). Requires `outputFormat: 'stream-json'` and hard-codes `ttl='1h'` (Anthropic rejects 5m blocks after Claude Code's 1h-marked session-wrap content). Mutually exclusive with `prompt`. The stable prefix hash is logged to the flight recorder for cache_state aggregates."),
2485
2826
  model: z
2486
2827
  .string()
2487
2828
  .optional()
2488
2829
  .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
2489
2830
  outputFormat: z
2490
2831
  .enum(["text", "json", "stream-json"])
2491
- .default("text")
2492
- .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
2832
+ .default("stream-json")
2833
+ .describe("Output format (text|json|stream-json). DEFAULT: stream-json — the gateway parses NDJSON usage events to extract input/output/cache_read/cache_creation tokens + cost + model, persists them to the flight recorder for cache_state aggregates, and still returns the assistant text. Override to 'text' only when you truly want unparsed stdout (loses observability)."),
2493
2834
  sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
2494
2835
  continueSession: z.boolean().default(false).describe("Continue active session"),
2495
2836
  createNewSession: z.boolean().default(false).describe("Force new session"),
@@ -2561,6 +2902,7 @@ export function createGatewayServer(deps = {}) {
2561
2902
  .array(z.string())
2562
2903
  .optional()
2563
2904
  .describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
2905
+ worktree: WORKTREE_SCHEMA.optional(),
2564
2906
  approvalStrategy: z
2565
2907
  .enum(["legacy", "mcp_managed"])
2566
2908
  .default("legacy")
@@ -2591,7 +2933,7 @@ export function createGatewayServer(deps = {}) {
2591
2933
  .boolean()
2592
2934
  .default(false)
2593
2935
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2594
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2936
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2595
2937
  const startTime = Date.now();
2596
2938
  if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
2597
2939
  return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
@@ -2665,7 +3007,11 @@ export function createGatewayServer(deps = {}) {
2665
3007
  sessionId: effectiveSessionId,
2666
3008
  cli: "claude",
2667
3009
  });
2668
- const warnings = ttlWarning ? [ttlWarning] : [];
3010
+ // Rec #4: include any prep-time warnings (e.g. cacheable_prefix_uncached).
3011
+ const warnings = [
3012
+ ...(ttlWarning ? [ttlWarning] : []),
3013
+ ...(prep.warnings ?? []),
3014
+ ];
2669
3015
  safeFlightStart({
2670
3016
  correlationId: corrId,
2671
3017
  cli: "claude",
@@ -2674,8 +3020,9 @@ export function createGatewayServer(deps = {}) {
2674
3020
  sessionId: effectiveSessionId,
2675
3021
  stablePrefixHash: prep.stablePrefixHash ?? undefined,
2676
3022
  stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
3023
+ cacheControlBlocks: prep.cacheControlBlocks,
2677
3024
  }, runtime);
2678
- logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prep.effectivePrompt.length}, sessionId=${effectiveSessionId}`);
3025
+ logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prep.effectivePrompt.length}, sessionId=${effectiveSessionId}, cacheControlBlocks=${prep.cacheControlBlocks ?? 0}`);
2679
3026
  try {
2680
3027
  if (useContinue) {
2681
3028
  args.push("--continue");
@@ -2684,10 +3031,19 @@ export function createGatewayServer(deps = {}) {
2684
3031
  args.push("--session-id", effectiveSessionId);
2685
3032
  await sessionManager.updateSessionUsage(effectiveSessionId);
2686
3033
  }
3034
+ // Slice λ: resolve worktree directive into spawn cwd. Done after
3035
+ // session resolution so resume reuse can read metadata.worktreePath.
3036
+ let worktreeResolution = {};
3037
+ try {
3038
+ worktreeResolution = await resolveWorktreeForRequest(worktree, effectiveSessionId, runtime);
3039
+ }
3040
+ catch (err) {
3041
+ return createErrorResponse("claude_request", 1, "", corrId, err);
3042
+ }
2687
3043
  // Idle timeout only for stream-json (text/json produce no output until done)
2688
3044
  const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
2689
3045
  const claudeSyncFrHandoff = buildAsyncFlightRecorderHandoff("claude", prep, effectiveSessionId, outputFormat);
2690
- const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh, runtime, undefined, undefined, claudeSyncFrHandoff.flightRecorderEntry, claudeSyncFrHandoff.extractUsage);
3046
+ const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh, runtime, undefined, undefined, claudeSyncFrHandoff.flightRecorderEntry, claudeSyncFrHandoff.extractUsage, prep.stdinPayload, worktreeResolution.cwd);
2691
3047
  // Deferred — job still running, return async reference
2692
3048
  if (isDeferredResponse(result)) {
2693
3049
  return buildDeferredToolResponse(result, effectiveSessionId);
@@ -2744,7 +3100,14 @@ export function createGatewayServer(deps = {}) {
2744
3100
  exitCode: 0,
2745
3101
  status: "completed",
2746
3102
  }, runtime);
2747
- return buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat, warnings);
3103
+ const streamResponse = buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat, warnings);
3104
+ if (worktreeResolution.worktreePath) {
3105
+ const first = streamResponse.content[0];
3106
+ if (first && first.type === "text") {
3107
+ first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
3108
+ }
3109
+ }
3110
+ return streamResponse;
2748
3111
  }
2749
3112
  safeFlightComplete(corrId, {
2750
3113
  response: stdout,
@@ -2755,7 +3118,14 @@ export function createGatewayServer(deps = {}) {
2755
3118
  exitCode: 0,
2756
3119
  status: "completed",
2757
3120
  }, runtime);
2758
- return buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat, warnings);
3121
+ const nonStreamResponse = buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat, warnings);
3122
+ if (worktreeResolution.worktreePath) {
3123
+ const first = nonStreamResponse.content[0];
3124
+ if (first && first.type === "text") {
3125
+ first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
3126
+ }
3127
+ }
3128
+ return nonStreamResponse;
2759
3129
  }
2760
3130
  catch (error) {
2761
3131
  const elapsedMs = Math.max(0, Date.now() - startTime);
@@ -2888,7 +3258,8 @@ export function createGatewayServer(deps = {}) {
2888
3258
  .array(z.string())
2889
3259
  .optional()
2890
3260
  .describe("Codex --add-dir <DIR>: additional writable workspace directories. Emitted once per entry on new sessions only; resume inherits the original session's writable-dir policy."),
2891
- }, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, workingDir, addDir, }) => {
3261
+ worktree: WORKTREE_SCHEMA.optional(),
3262
+ }, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, workingDir, addDir, worktree, }) => {
2892
3263
  const startTime = Date.now();
2893
3264
  const prep = prepareCodexRequest({
2894
3265
  prompt,
@@ -2940,9 +3311,20 @@ export function createGatewayServer(deps = {}) {
2940
3311
  // execution, on terminal status for the job-backed path (sync
2941
3312
  // completion or deferred). The outer finally MUST NOT clean again.
2942
3313
  const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
3314
+ // Slice λ: resolve worktree directive into spawn cwd. Codex has no
3315
+ // in-handler session resolution prior to spawn (session lookup is
3316
+ // lazy via `codex exec resume`), so the user-supplied sessionId is
3317
+ // the only reuse key.
3318
+ let worktreeResolution = {};
3319
+ try {
3320
+ worktreeResolution = await resolveWorktreeForRequest(worktree, sessionId, runtime);
3321
+ }
3322
+ catch (err) {
3323
+ return createErrorResponse("codex_request", 1, "", corrId, err);
3324
+ }
2943
3325
  try {
2944
3326
  const codexSyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, sessionId, outputFormat);
2945
- const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), outputFormat, forceRefresh, runtime, undefined, prepCleanup, codexSyncFrHandoff.flightRecorderEntry, codexSyncFrHandoff.extractUsage);
3327
+ const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), outputFormat, forceRefresh, runtime, undefined, prepCleanup, codexSyncFrHandoff.flightRecorderEntry, codexSyncFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
2946
3328
  // Deferred — job still running, return async reference. Cleanup
2947
3329
  // ownership belongs to AsyncJobManager via onComplete.
2948
3330
  if (isDeferredResponse(result)) {
@@ -3000,7 +3382,14 @@ export function createGatewayServer(deps = {}) {
3000
3382
  cacheCreationTokens: codexUsage.cacheCreationTokens,
3001
3383
  costUsd: codexUsage.costUsd,
3002
3384
  }, runtime);
3003
- return buildCliResponse("codex", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
3385
+ const codexResponse = buildCliResponse("codex", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
3386
+ if (worktreeResolution.worktreePath) {
3387
+ const first = codexResponse.content[0];
3388
+ if (first && first.type === "text") {
3389
+ first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
3390
+ }
3391
+ }
3392
+ return codexResponse;
3004
3393
  }
3005
3394
  catch (error) {
3006
3395
  const elapsedMs = Math.max(0, Date.now() - startTime);
@@ -3190,7 +3579,8 @@ export function createGatewayServer(deps = {}) {
3190
3579
  .boolean()
3191
3580
  .default(false)
3192
3581
  .describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
3193
- }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, }) => {
3582
+ worktree: WORKTREE_SCHEMA.optional(),
3583
+ }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, worktree, }) => {
3194
3584
  return handleGeminiRequest({ sessionManager, logger, runtime }, {
3195
3585
  prompt,
3196
3586
  promptParts,
@@ -3215,6 +3605,7 @@ export function createGatewayServer(deps = {}) {
3215
3605
  adminPolicyFiles,
3216
3606
  attachments,
3217
3607
  skipTrust,
3608
+ worktree,
3218
3609
  });
3219
3610
  });
3220
3611
  //──────────────────────────────────────────────────────────────────────────────
@@ -3320,7 +3711,8 @@ export function createGatewayServer(deps = {}) {
3320
3711
  .array(z.string())
3321
3712
  .optional()
3322
3713
  .describe('Grok --deny <RULE>: permission deny rules. Each entry is emitted as its own --deny instance (per `grok --help`: "Repeat to add multiple rules").'),
3323
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, }) => {
3714
+ worktree: WORKTREE_SCHEMA.optional(),
3715
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, worktree, }) => {
3324
3716
  return handleGrokRequest({ sessionManager, logger, runtime }, {
3325
3717
  prompt,
3326
3718
  promptParts,
@@ -3350,6 +3742,7 @@ export function createGatewayServer(deps = {}) {
3350
3742
  systemPromptOverride,
3351
3743
  allow,
3352
3744
  deny,
3745
+ worktree,
3353
3746
  });
3354
3747
  });
3355
3748
  //──────────────────────────────────────────────────────────────────────────────
@@ -3439,7 +3832,8 @@ export function createGatewayServer(deps = {}) {
3439
3832
  .array(z.string())
3440
3833
  .optional()
3441
3834
  .describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance (Vibe states this flag may be specified multiple times)."),
3442
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, workingDir, addDir, }) => {
3835
+ worktree: WORKTREE_SCHEMA.optional(),
3836
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, workingDir, addDir, worktree, }) => {
3443
3837
  return handleMistralRequest({ sessionManager, logger, runtime }, {
3444
3838
  prompt,
3445
3839
  promptParts,
@@ -3466,6 +3860,7 @@ export function createGatewayServer(deps = {}) {
3466
3860
  maxPrice,
3467
3861
  workingDir,
3468
3862
  addDir,
3863
+ worktree,
3469
3864
  });
3470
3865
  });
3471
3866
  //──────────────────────────────────────────────────────────────────────────────
@@ -3486,15 +3881,15 @@ export function createGatewayServer(deps = {}) {
3486
3881
  .max(100000, "Prompt too long (max 100k chars)")
3487
3882
  .optional()
3488
3883
  .describe("Prompt text for Claude (mutually exclusive with promptParts)"),
3489
- promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
3884
+ promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task, cacheControl? }. Same semantics as claude_request: stable head (system/tools/context) + volatile tail (task). Set `cacheControl: { system?, tools?, context?: boolean }` to opt into explicit Anthropic prefix caching via `--input-format stream-json` (slice κ); requires `outputFormat: 'stream-json'` and hard-codes `ttl='1h'`. Mutually exclusive with `prompt`. Stable prefix hash logged to flight recorder."),
3490
3885
  model: z
3491
3886
  .string()
3492
3887
  .optional()
3493
3888
  .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
3494
3889
  outputFormat: z
3495
3890
  .enum(["text", "json", "stream-json"])
3496
- .default("text")
3497
- .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
3891
+ .default("stream-json")
3892
+ .describe("Output format (text|json|stream-json). DEFAULT: stream-json — same rationale as claude_request: keeps usage/cache/cost observable for cache_state aggregates. Override to 'text' only when raw stdout is required (loses observability)."),
3498
3893
  sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
3499
3894
  continueSession: z.boolean().default(false).describe("Continue active session"),
3500
3895
  createNewSession: z.boolean().default(false).describe("Force new session"),
@@ -3566,6 +3961,7 @@ export function createGatewayServer(deps = {}) {
3566
3961
  .array(z.string())
3567
3962
  .optional()
3568
3963
  .describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
3964
+ worktree: WORKTREE_SCHEMA.optional(),
3569
3965
  approvalStrategy: z
3570
3966
  .enum(["legacy", "mcp_managed"])
3571
3967
  .default("legacy")
@@ -3595,7 +3991,7 @@ export function createGatewayServer(deps = {}) {
3595
3991
  .boolean()
3596
3992
  .default(false)
3597
3993
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3598
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3994
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3599
3995
  if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
3600
3996
  return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
3601
3997
  }
@@ -3662,6 +4058,15 @@ export function createGatewayServer(deps = {}) {
3662
4058
  sessionId: effectiveSessionId,
3663
4059
  cli: "claude",
3664
4060
  });
4061
+ // Slice λ: resolve worktree directive after session metadata is
4062
+ // settled so resume reuse can read metadata.worktreePath.
4063
+ let worktreeResolution = {};
4064
+ try {
4065
+ worktreeResolution = await resolveWorktreeForRequest(worktree, effectiveSessionId, runtime);
4066
+ }
4067
+ catch (err) {
4068
+ return createErrorResponse("claude_request_async", 1, "", corrId, err);
4069
+ }
3665
4070
  // Idle timeout only for stream-json (text/json produce no output until done)
3666
4071
  const effectiveIdleTimeout = outputFormat === "stream-json"
3667
4072
  ? resolveIdleTimeout("claude", idleTimeoutMs)
@@ -3669,7 +4074,7 @@ export function createGatewayServer(deps = {}) {
3669
4074
  assertUpstreamCliArgs("claude", args);
3670
4075
  assertUpstreamCliEnv("claude", undefined);
3671
4076
  const claudeAsyncFrHandoff = buildAsyncFlightRecorderHandoff("claude", prep, effectiveSessionId, outputFormat);
3672
- const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat, forceRefresh, undefined, undefined, claudeAsyncFrHandoff.flightRecorderEntry, claudeAsyncFrHandoff.extractUsage, true);
4077
+ const job = asyncJobManager.startJob("claude", args, corrId, worktreeResolution.cwd, effectiveIdleTimeout, outputFormat, forceRefresh, undefined, undefined, claudeAsyncFrHandoff.flightRecorderEntry, claudeAsyncFrHandoff.extractUsage, true, prep.stdinPayload);
3673
4078
  logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
3674
4079
  const asyncResponse = {
3675
4080
  success: true,
@@ -3685,8 +4090,17 @@ export function createGatewayServer(deps = {}) {
3685
4090
  if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
3686
4091
  asyncResponse.reviewIntegrity = prep.reviewIntegrity;
3687
4092
  }
3688
- if (ttlWarning) {
3689
- asyncResponse.warnings = [ttlWarning];
4093
+ if (worktreeResolution.worktreePath) {
4094
+ asyncResponse.worktreePath = worktreeResolution.worktreePath;
4095
+ }
4096
+ // Rec #4: include any prep-time warnings (e.g.
4097
+ // cacheable_prefix_uncached) alongside ttlWarning.
4098
+ const mergedWarnings = [
4099
+ ...(ttlWarning ? [ttlWarning] : []),
4100
+ ...(prep.warnings ?? []),
4101
+ ];
4102
+ if (mergedWarnings.length > 0) {
4103
+ asyncResponse.warnings = mergedWarnings;
3690
4104
  }
3691
4105
  return {
3692
4106
  content: [
@@ -3791,7 +4205,8 @@ export function createGatewayServer(deps = {}) {
3791
4205
  .array(z.string())
3792
4206
  .optional()
3793
4207
  .describe("Codex --add-dir <DIR>: additional writable workspace directories (repeat per entry). New sessions only."),
3794
- }, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, workingDir, addDir, }) => {
4208
+ worktree: WORKTREE_SCHEMA.optional(),
4209
+ }, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, workingDir, addDir, worktree, }) => {
3795
4210
  return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3796
4211
  prompt,
3797
4212
  promptParts,
@@ -3822,6 +4237,7 @@ export function createGatewayServer(deps = {}) {
3822
4237
  ignoreRules,
3823
4238
  workingDir,
3824
4239
  addDir,
4240
+ worktree,
3825
4241
  });
3826
4242
  });
3827
4243
  server.tool("gemini_request_async", {
@@ -3893,7 +4309,8 @@ export function createGatewayServer(deps = {}) {
3893
4309
  .boolean()
3894
4310
  .default(false)
3895
4311
  .describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
3896
- }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, }) => {
4312
+ worktree: WORKTREE_SCHEMA.optional(),
4313
+ }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, worktree, }) => {
3897
4314
  return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3898
4315
  prompt,
3899
4316
  promptParts,
@@ -3917,6 +4334,7 @@ export function createGatewayServer(deps = {}) {
3917
4334
  adminPolicyFiles,
3918
4335
  attachments,
3919
4336
  skipTrust,
4337
+ worktree,
3920
4338
  });
3921
4339
  });
3922
4340
  server.tool("grok_request_async", {
@@ -4018,7 +4436,8 @@ export function createGatewayServer(deps = {}) {
4018
4436
  .array(z.string())
4019
4437
  .optional()
4020
4438
  .describe("Grok --deny <RULE>: permission deny rules. Each entry → its own --deny instance."),
4021
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, }) => {
4439
+ worktree: WORKTREE_SCHEMA.optional(),
4440
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, worktree, }) => {
4022
4441
  return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
4023
4442
  prompt,
4024
4443
  promptParts,
@@ -4047,6 +4466,7 @@ export function createGatewayServer(deps = {}) {
4047
4466
  systemPromptOverride,
4048
4467
  allow,
4049
4468
  deny,
4469
+ worktree,
4050
4470
  });
4051
4471
  });
4052
4472
  server.tool("mistral_request_async", {
@@ -4132,7 +4552,8 @@ export function createGatewayServer(deps = {}) {
4132
4552
  .array(z.string())
4133
4553
  .optional()
4134
4554
  .describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance."),
4135
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, workingDir, addDir, }) => {
4555
+ worktree: WORKTREE_SCHEMA.optional(),
4556
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, workingDir, addDir, worktree, }) => {
4136
4557
  return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
4137
4558
  prompt,
4138
4559
  promptParts,
@@ -4158,6 +4579,7 @@ export function createGatewayServer(deps = {}) {
4158
4579
  maxPrice,
4159
4580
  workingDir,
4160
4581
  addDir,
4582
+ worktree,
4161
4583
  });
4162
4584
  });
4163
4585
  server.tool("llm_job_status", {
@@ -4673,6 +5095,12 @@ export function createGatewayServer(deps = {}) {
4673
5095
  //──────────────────────────────────────────────────────────────────────────────
4674
5096
  async function initializeSessionManager() {
4675
5097
  const config = loadConfig();
5098
+ // Slice λ: file-backed sessions get a cleanup hook that tears down any
5099
+ // git worktrees recorded on session.metadata.worktreePath. PG-backed
5100
+ // sessions skip the hook (multi-tenant deployments don't necessarily
5101
+ // own a single filesystem); revisit if/when worktree support extends
5102
+ // there.
5103
+ const worktreeCleanupHook = createWorktreeSessionCleanupHook(logger);
4676
5104
  if (config.database && config.redis) {
4677
5105
  logger.info("Initializing PostgreSQL + Redis session manager");
4678
5106
  const { createDatabaseConnection } = await import("./db.js");
@@ -4682,7 +5110,9 @@ async function initializeSessionManager() {
4682
5110
  }
4683
5111
  else {
4684
5112
  logger.info("Initializing file-based session manager");
4685
- sessionManager = await createSessionManager(config, undefined, logger);
5113
+ sessionManager = await createSessionManager(config, undefined, logger, {
5114
+ cleanupHook: worktreeCleanupHook,
5115
+ });
4686
5116
  logger.info("File-based session manager initialized");
4687
5117
  }
4688
5118
  resourceProvider = new ResourceProvider(sessionManager, performanceMetrics, getFlightRecorder(logger), getCacheAwarenessConfig(logger));