npm - llm-cli-gateway - Versions diffs - 1.13.2 → 1.15.0 - Mend

llm-cli-gateway 1.13.2 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/CHANGELOG.md +371 -44
package/dist/async-job-manager.d.ts +15 -1
package/dist/async-job-manager.js +31 -6
package/dist/cache-stats.d.ts +26 -0
package/dist/cache-stats.js +45 -2
package/dist/executor.d.ts +8 -0
package/dist/executor.js +7 -2
package/dist/flight-recorder.d.ts +7 -0
package/dist/flight-recorder.js +27 -2
package/dist/index.d.ts +126 -1
package/dist/index.js +480 -50
package/dist/prompt-parts.d.ts +74 -0
package/dist/prompt-parts.js +47 -0
package/dist/session-manager.d.ts +20 -2
package/dist/session-manager.js +28 -3
package/dist/upstream-contracts.d.ts +8 -1
package/dist/upstream-contracts.js +37 -1
package/dist/worktree-manager.d.ts +41 -0
package/dist/worktree-manager.js +214 -0
package/package.json +2 -1

package/dist/index.js CHANGED Viewed

@@ -13,10 +13,11 @@ import { parseGeminiJson, parseGeminiStreamJson } from "./gemini-json-parser.js"
 import { parseVibeMetaJson } from "./mistral-meta-json-parser.js";
 import { homedir } from "os";
 import { createSessionManager } from "./session-manager.js";
+import { createWorktree, createWorktreeSessionCleanupHook, } from "./worktree-manager.js";
 import { ResourceProvider } from "./resources.js";
 import { PerformanceMetrics } from "./metrics.js";
 import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse as optimizeResponseText, } from "./optimizer.js";
-import { loadConfig, loadPersistenceConfig, loadCacheAwarenessConfig, } from "./config.js";
+import { loadConfig, loadPersistenceConfig, loadCacheAwarenessConfig, minStableTokensForModel, } from "./config.js";
 import { checkHealth } from "./health.js";
 import { clearModelRegistryCache, getAvailableCliInfo, getCliInfo, resolveModelAlias, } from "./model-registry.js";
 import { AsyncJobManager, } from "./async-job-manager.js";
@@ -26,7 +27,7 @@ import { checkReviewIntegrity } from "./review-integrity.js";
 import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES, } from "./claude-mcp-config.js";
 import { resolveGrokSessionArgs, resolveMistralSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, prepareMistralRequest as buildMistralCliInvocation, MISTRAL_AGENT_MODES, GATEWAY_SESSION_PREFIX, resolveClaudePermissionFlags, resolveCodexSandboxFlags, CLAUDE_PERMISSION_MODES, GEMINI_APPROVAL_MODES, CODEX_SANDBOX_MODES, CODEX_ASK_FOR_APPROVAL_MODES, CLAUDE_EFFORT_LEVELS, prepareClaudeHighImpactFlags, validateClaudeAgentsMap, prepareCodexHighImpactFlags, prepareCodexForkRequest, CODEX_CONFIG_OVERRIDES_SCHEMA, prepareGeminiHighImpactFlags, prependGeminiAttachments, resolveGeminiSessionPlan, GEMINI_HIGH_IMPACT_PARAMS_SCHEMA, } from "./request-helpers.js";
 import { createFlightRecorder } from "./flight-recorder.js";
-import { resolvePromptInput, PromptPartsSchema } from "./prompt-parts.js";
+import { resolvePromptInput, PromptPartsSchema, assembleClaudeCacheBlocks, } from "./prompt-parts.js";
 import { computeSessionCacheStats, computeTtlRemaining } from "./cache-stats.js";
 import { getCliVersions, runCliUpgrade } from "./cli-updater.js";
 import { startHttpGateway } from "./http-transport.js";
@@ -246,6 +247,50 @@ export const MAX_TURNS_SCHEMA = z.number().int().positive().safe().max(10_000);
 // upstream CLIs would reject. 1µUSD per request is fine-grained enough
 // for any plausible budget-cap use.
 export const MAX_PRICE_SCHEMA = z.number().positive().finite().min(1e-6).max(10_000);
+/**
+ * Slice λ: shared worktree directive for all 10 `*_request` / `*_request_async`
+ * tools. `true` creates a fresh worktree under `<repoRoot>/.worktrees/<uuid>`
+ * branched from HEAD. `{ name?, ref? }` lets the caller supply a sanitized
+ * name and/or git ref (default ref: HEAD).
+ *
+ * Lifecycle is gateway-owned: the gateway pre-creates the worktree via
+ * `git worktree add`, then spawns the child CLI with `cwd: <worktree-path>`.
+ * No `-w` / `--worktree` flag is ever emitted to the underlying CLI. When
+ * the request carries a sessionId and the session already has a worktree,
+ * that worktree is reused. On session_delete or TTL eviction the gateway
+ * runs `git worktree remove --force`.
+ *
+ * Tool response: when a worktree was used, the successful response stdout
+ * is prefixed with `[gateway] worktree=<absolute-path>\n` so callers can
+ * parse/use the path without a schema change (slice λ §1.d).
+ *
+ * NOTE: callers should `.gitignore` the `.worktrees/` directory in their
+ * repo (the gateway does NOT auto-gitignore — see slice λ spec Q4).
+ */
+export const WORKTREE_SCHEMA = z
+    .union([
+    z.boolean(),
+    z
+        .object({
+        name: z.string().min(1).max(64).optional(),
+        ref: z.string().min(1).max(255).optional(),
+    })
+        .strict(),
+])
+    .describe("Slice λ: run this request inside a dedicated git worktree owned by " +
+    "the gateway. `true` creates a fresh worktree at " +
+    "`<repoRoot>/.worktrees/<uuid>` branched from HEAD. " +
+    "`{ name?, ref? }` lets the caller supply a sanitized name and/or a " +
+    "git ref (default: HEAD). When the request carries a sessionId and " +
+    "the session already has a worktree, that worktree is reused. The " +
+    "gateway spawns the child CLI with `cwd: <worktree-path>` — no " +
+    "`-w`/`--worktree` flag is ever emitted to the underlying CLI. On " +
+    "session_delete or TTL eviction the gateway runs `git worktree " +
+    "remove --force`. Successful responses are prefixed with " +
+    "`[gateway] worktree=<absolute-path>\\n` so callers can use the " +
+    "path. NOTE: callers should `.gitignore` the `.worktrees/` " +
+    "directory in their repo (the gateway does NOT auto-gitignore — " +
+    "see slice λ spec Q4).");
 // U22: Session-provider enum extended to five providers. The storage layer's
 // CLI_TYPES already includes "mistral"; the MCP-tool layer mirrors that here so
 // session_create / session_list / session_clear_all accept the fifth provider.
@@ -253,7 +298,7 @@ export const SESSION_PROVIDER_VALUES = ["claude", "codex", "gemini", "grok", "mi
 export const SESSION_PROVIDER_ENUM = z.enum(SESSION_PROVIDER_VALUES);
 let activeServer = null;
 let activeHttpGateway = null;
-function resolveGatewayServerRuntime(deps = {}, options = {}) {
+export function resolveGatewayServerRuntime(deps = {}, options = {}) {
     const runtimeLogger = deps.logger ?? logger;
     const runtimeSessionManager = deps.sessionManager ?? sessionManager;
     const runtimePerformanceMetrics = deps.performanceMetrics ??
@@ -316,7 +361,24 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, f
  * `writeFlightStart` is NEVER true on this path: the sync handler is
  * always the upstream logStart writer.
  */
-flightRecorderEntry, extractUsage) {
+flightRecorderEntry, extractUsage,
+/**
+ * Slice κ: optional stdin payload piped to the child CLI. Currently
+ * only Claude's `--input-format stream-json` path sets this. Threaded
+ * through both the direct-execute fallback (SYNC_DEADLINE_MS===0) and
+ * the AsyncJobManager spawn path, and participates in the dedup key.
+ */
+stdin,
+/**
+ * Slice λ: optional working directory for the spawned child process,
+ * derived from a gateway-owned git worktree. Threaded to both the
+ * direct-execute fallback (`executeCli({ cwd })`) and the
+ * AsyncJobManager dedup-aware spawn path
+ * (`startJobWithDedup({ cwd })`). `cwd` also participates in the
+ * dedup key (see async-job-manager.buildRequestKey) so two requests
+ * with identical argv in different worktrees do not collide.
+ */
+cwd) {
     // U26 fix: ownership of onComplete is a contract. Once this function returns
     // OR throws, the caller MUST consider onComplete consumed — i.e. it has
     // either been run, or the AsyncJobManager has taken ownership of it. The
@@ -350,6 +412,8 @@ flightRecorderEntry, extractUsage) {
                 idleTimeout: idleTimeoutMs,
                 logger: runtime.logger,
                 env: env ? { ...process.env, ...env } : undefined,
+                stdin,
+                cwd,
             });
         }
         finally {
@@ -361,10 +425,12 @@ flightRecorderEntry, extractUsage) {
     let outcome;
     try {
         outcome = runtime.asyncJobManager.startJobWithDedup(cli, args, corrId, {
+            cwd,
             idleTimeoutMs,
             outputFormat,
             forceRefresh,
             env,
+            stdin,
             onComplete,
             // Sync-deferred path: the upstream sync handler already wrote
             // logStart for this corrId, so writeFlightStart stays false. The
@@ -446,6 +512,73 @@ function buildDeferredToolResponse(deferred, sessionId) {
         ],
     };
 }
+/**
+ * Slice λ: resolve a request's worktree directive into a spawn cwd.
+ *
+ * - `worktreeOpt` is the Zod-validated input value (boolean |
+ *   `{ name?, ref? }` | undefined).
+ * - When the request has a session AND the session already has a
+ *   `metadata.worktreePath`, that path is reused (resume semantics).
+ *   The reused path is returned without touching git; if the directory
+ *   was externally removed between requests, the next CLI invocation
+ *   will surface the error naturally.
+ * - When no reusable worktree exists, `createWorktree` runs; on success
+ *   the new path is written to `session.metadata` (only when a session
+ *   exists — request-scoped worktrees do NOT persist).
+ * - Returns `{}` when `worktreeOpt` is undefined/false (preserves
+ *   pre-λ behaviour at non-worktree call sites).
+ * - Errors propagate as `WorktreeError`/`Error`; the caller wraps them
+ *   in a `createErrorResponse` envelope. Do NOT swallow.
+ *
+ * Spec: docs/plans/slice-lambda.spec.md §"Implementation surface to
+ * verify" §5.
+ */
+export async function resolveWorktreeForRequest(worktreeOpt, sessionId, runtime) {
+    if (!worktreeOpt)
+        return {};
+    const sessionManager = runtime.sessionManager;
+    if (sessionId) {
+        const session = await Promise.resolve(sessionManager.getSession(sessionId));
+        const existingPath = session?.metadata?.worktreePath;
+        if (typeof existingPath === "string" && existingPath.length > 0) {
+            return { cwd: existingPath, worktreePath: existingPath };
+        }
+    }
+    const name = worktreeOpt === true ? undefined : worktreeOpt.name;
+    const ref = worktreeOpt === true ? undefined : worktreeOpt.ref;
+    const repoRoot = process.cwd();
+    const handle = await createWorktree({
+        repoRoot,
+        name,
+        ref,
+        logger: runtime.logger,
+    });
+    if (sessionId) {
+        await Promise.resolve(sessionManager.updateSessionMetadata(sessionId, {
+            worktreePath: handle.path,
+            worktreeName: handle.name,
+        }));
+    }
+    return { cwd: handle.path, worktreePath: handle.path };
+}
+/**
+ * Slice λ §1.d: response-envelope shape decision for `worktreePath`.
+ *
+ * We surface the worktree path inline as a stdout prefix
+ * (`[gateway] worktree=<absolute-path>\n`) rather than as a
+ * structuredContent field or JSON wrapper. Rationale:
+ *   - zero schema change across all 10 tools and their downstream parsers
+ *   - matches how other slice features (session warnings, cache_state
+ *     aggregates) surface side-channel metadata today
+ *   - callers that want the path can split on the first newline; callers
+ *     that don't care see a single ignorable header line
+ *
+ * Use `formatWorktreePrefix(resolution.worktreePath)` once per tool, at
+ * the moment a successful response is constructed.
+ */
+export function formatWorktreePrefix(worktreePath) {
+    return worktreePath ? `[gateway] worktree=${worktreePath}\n` : "";
+}
 // Helper function for standardized error responses
 function createErrorResponse(cli, code, stderr, correlationId, error) {
     let errorMessage = `Error executing ${cli} CLI`;
@@ -575,6 +708,7 @@ function buildAsyncFlightRecorderHandoff(cliName, prep, sessionId, outputFormat)
             sessionId,
             stablePrefixHash: prep.stablePrefixHash ?? undefined,
             stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
+            cacheControlBlocks: prep.cacheControlBlocks,
         },
         extractUsage: (stdout) => extractUsageAndCost(cli, stdout, fmt, { sessionId: sid, home }),
     };
@@ -919,6 +1053,19 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
             score: reviewIntegrity.totalScore,
         });
     }
+    // Rec #5 (slice κ): refuse the optimizePrompt + cacheControl combo
+    // before running optimization. Optimization rewrites the assembled
+    // prompt text the flight-recorder logs, but the κ stdin payload is
+    // built from raw `promptParts` content blocks — letting both run
+    // produces a FR row whose `prompt` no longer matches what Claude
+    // actually received, AND any optimisation-driven text change would
+    // silently break Anthropic prefix-cache reuse on the next call.
+    const ccEarly = params.promptParts?.cacheControl;
+    const cacheControlRequestedEarly = !!(ccEarly &&
+        (ccEarly.system || ccEarly.tools || ccEarly.context));
+    if (params.optimizePrompt && cacheControlRequestedEarly) {
+        return createErrorResponse(params.operation, 1, "", corrId, new Error("optimizePrompt is incompatible with promptParts.cacheControl (slice κ): optimization rewrites the assembled prompt text the flight recorder logs, while the cache_control payload is built from raw promptParts; the two would desync and break Anthropic prefix-cache reuse. Disable optimizePrompt when opting into cacheControl."));
+    }
     let effectivePrompt = assembledPrompt;
     if (params.optimizePrompt) {
         const optimized = optimizePromptText(effectivePrompt);
@@ -950,19 +1097,127 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
             return createApprovalDeniedResponse(params.operation, approvalDecision);
         }
     }
-    const args = ["-p", effectivePrompt];
+    // Rec #2 (slice κ): auto-emit `cache_control` when the caller passes
+    // `promptParts` whose stable prefix exceeds the per-model minimum,
+    // the caller has NOT explicitly set `cacheControl`, the gateway
+    // config has opted in (`[cache_awareness].emit_anthropic_cache_control`),
+    // and outputFormat is stream-json. Auto-emit marks the LAST non-empty
+    // stable block (context → tools → system priority — the rightmost
+    // stable block covers the widest prefix). Skipped when optimizePrompt
+    // is on (same rec #5 desync risk).
+    //
+    // The 1h ttl is forced regardless of `anthropic_ttl_seconds`: 5m
+    // breakpoints from caller content are rejected by Anthropic once
+    // Claude Code's own 1h-marked session-wrap blocks land ahead of them.
+    let autoEmittedCacheControlBlock = null;
+    if (!cacheControlRequestedEarly &&
+        runtime.cacheAwareness.emitAnthropicCacheControl &&
+        !params.optimizePrompt &&
+        params.outputFormat === "stream-json" &&
+        params.promptParts &&
+        stablePrefixTokens !== null) {
+        const threshold = minStableTokensForModel(runtime.cacheAwareness, resolvedModel ?? "default");
+        if (stablePrefixTokens >= threshold) {
+            const pp = params.promptParts;
+            // Rightmost non-empty stable block — its cache_control breakpoint
+            // covers everything above it in the message (the API matches
+            // breakpoints in order).
+            if (pp.context && pp.context.length > 0)
+                autoEmittedCacheControlBlock = "context";
+            else if (pp.tools && pp.tools.length > 0)
+                autoEmittedCacheControlBlock = "tools";
+            else if (pp.system && pp.system.length > 0)
+                autoEmittedCacheControlBlock = "system";
+            if (autoEmittedCacheControlBlock !== null) {
+                runtime.logger.info(`[${corrId}] auto-emitting cache_control on '${autoEmittedCacheControlBlock}' (stablePrefixTokens=${stablePrefixTokens} >= ${threshold} for model='${resolvedModel ?? "default"}')`);
+                if (runtime.cacheAwareness.anthropicTtlSeconds !== 3600) {
+                    runtime.logger.warn(`[${corrId}] [cache_awareness].anthropic_ttl_seconds=${runtime.cacheAwareness.anthropicTtlSeconds} ignored for Claude CLI path — Anthropic rejects 5m blocks after Claude Code's 1h-marked session-wrap content; using ttl='1h'.`);
+                }
+            }
+        }
+    }
+    // Rec #4: warn when promptParts has a cacheable stable prefix but no
+    // cache_control breakpoint is being emitted (neither explicit nor
+    // auto). Either the caller forgot to set `cacheControl` or
+    // `[cache_awareness].emit_anthropic_cache_control` is off — both
+    // leave the stable prefix bytes unreused across calls, defeating the
+    // point of using `promptParts`.
+    const warnings = [];
+    if (!cacheControlRequestedEarly &&
+        autoEmittedCacheControlBlock === null &&
+        params.promptParts &&
+        stablePrefixTokens !== null) {
+        const threshold = minStableTokensForModel(runtime.cacheAwareness, resolvedModel ?? "default");
+        if (stablePrefixTokens >= threshold) {
+            const reason = params.outputFormat !== "stream-json"
+                ? "outputFormat is not 'stream-json'"
+                : !runtime.cacheAwareness.emitAnthropicCacheControl
+                    ? "[cache_awareness].emit_anthropic_cache_control is false"
+                    : "no eligible non-empty stable block";
+            warnings.push({
+                code: "cacheable_prefix_uncached",
+                message: `Stable prefix is cacheable (${stablePrefixTokens} tokens >= ${threshold} for model='${resolvedModel ?? "default"}') but no cache_control breakpoint will be emitted (${reason}). Set promptParts.cacheControl explicitly, switch outputFormat to 'stream-json', or enable [cache_awareness].emit_anthropic_cache_control.`,
+                stablePrefixTokens,
+                threshold,
+                reason,
+            });
+        }
+    }
+    // Slice κ: switch from the legacy positional `-p <prompt>` emission
+    // to `claude -p --input-format stream-json` and feed a JSON
+    // content-blocks payload via stdin. Non-κ callers (no cacheControl,
+    // or cacheControl with all flags false) take the existing positional
+    // path bit-for-bit. The κ path activates on EITHER an explicit caller
+    // opt-in (`cacheControlRequestedEarly`) OR a gateway-driven auto-emit
+    // (`autoEmittedCacheControlBlock`).
+    const cacheControlRequested = cacheControlRequestedEarly || autoEmittedCacheControlBlock !== null;
+    let stdinPayload;
+    let cacheControlBlocks;
+    if (cacheControlRequested) {
+        if (params.outputFormat !== "stream-json") {
+            return createErrorResponse(params.operation, 1, "", corrId, new Error("promptParts.cacheControl requires outputFormat: 'stream-json' (slice κ pipes the cache_control blocks over --input-format stream-json; text/json output formats cannot carry the required NDJSON usage events)."));
+        }
+        // promptParts is non-null whenever cacheControlRequested is true
+        // (explicit opt-in lives in PromptParts; auto-emit guard requires
+        // promptParts to be defined).
+        const effectiveParts = autoEmittedCacheControlBlock !== null
+            ? {
+                ...params.promptParts,
+                cacheControl: {
+                    ...(params.promptParts.cacheControl ?? {}),
+                    [autoEmittedCacheControlBlock]: true,
+                },
+            }
+            : params.promptParts;
+        const built = assembleClaudeCacheBlocks(effectiveParts);
+        stdinPayload = `${JSON.stringify(built.payload)}\n`;
+        cacheControlBlocks = built.markedBlockCount;
+    }
+    const args = cacheControlRequested
+        ? [
+            "-p",
+            "--input-format",
+            "stream-json",
+            "--output-format",
+            "stream-json",
+            "--include-partial-messages",
+            "--verbose",
+        ]
+        : ["-p", effectivePrompt];
     if (resolvedModel)
         args.push("--model", resolvedModel);
-    if (params.outputFormat === "json") {
-        args.push("--output-format", "json");
-    }
-    else if (params.outputFormat === "stream-json") {
-        // Claude CLI 2.x rejects `--print --output-format stream-json` without
-        // `--verbose`: "When using --print, --output-format=stream-json requires
-        // --verbose". --verbose only affects what claude logs to stderr; the
-        // stream-json stdout payload is unchanged, so the gateway's NDJSON
-        // parser is unaffected.
-        args.push("--output-format", "stream-json", "--include-partial-messages", "--verbose");
+    if (!cacheControlRequested) {
+        if (params.outputFormat === "json") {
+            args.push("--output-format", "json");
+        }
+        else if (params.outputFormat === "stream-json") {
+            // Claude CLI 2.x rejects `--print --output-format stream-json` without
+            // `--verbose`: "When using --print, --output-format=stream-json requires
+            // --verbose". --verbose only affects what claude logs to stderr; the
+            // stream-json stdout payload is unchanged, so the gateway's NDJSON
+            // parser is unaffected.
+            args.push("--output-format", "stream-json", "--include-partial-messages", "--verbose");
+        }
     }
     if (params.allowedTools && params.allowedTools.length > 0) {
         sanitizeCliArgValues(params.allowedTools, "allowedTools");
@@ -1025,6 +1280,9 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
         args,
         stablePrefixHash,
         stablePrefixTokens,
+        stdinPayload,
+        cacheControlBlocks,
+        warnings: warnings.length > 0 ? warnings : undefined,
     };
 }
 export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntime()) {
@@ -1722,8 +1980,15 @@ export async function handleGeminiRequest(deps, params) {
         args.push(...sessionPlan.args);
         const userProvidedSession = sessionPlan.resumed;
         const effectiveSessionIdHint = sessionPlan.resumed ? params.sessionId : undefined;
+        let worktreeResolution = {};
+        try {
+            worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionIdHint, runtime);
+        }
+        catch (err) {
+            return createErrorResponse("gemini_request", 1, "", corrId, err);
+        }
         const geminiFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, params.sessionId, params.outputFormat);
-        const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, geminiFrHandoff.flightRecorderEntry, geminiFrHandoff.extractUsage);
+        const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, geminiFrHandoff.flightRecorderEntry, geminiFrHandoff.extractUsage, worktreeResolution.cwd);
         // Deferred — job still running, return async reference
         if (isDeferredResponse(result)) {
             return buildDeferredToolResponse(result, effectiveSessionIdHint);
@@ -1765,6 +2030,12 @@ export async function handleGeminiRequest(deps, params) {
         }
         deps.logger.info(`[${corrId}] gemini_request completed successfully in ${durationMs}ms`);
         const response = buildCliResponse("gemini", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, userProvidedSession, params.outputFormat);
+        if (worktreeResolution.worktreePath) {
+            const first = response.content[0];
+            if (first && first.type === "text") {
+                first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
+            }
+        }
         const geminiUsage = extractUsageAndCost("gemini", stdout, params.outputFormat);
         safeFlightComplete(corrId, {
             response: stdout,
@@ -1852,6 +2123,13 @@ export async function handleGeminiRequestAsync(deps, params) {
             }
             await deps.sessionManager.updateSessionUsage(effectiveSessionId);
         }
+        let worktreeResolution = {};
+        try {
+            worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
+        }
+        catch (err) {
+            return createErrorResponse("gemini_request_async", 1, "", corrId, err);
+        }
         // Start job only after all session I/O succeeds. U23: forward outputFormat
         // so AsyncJobManager records it in the durable store (the manager also
         // surfaces it in the snapshot).
@@ -1860,7 +2138,7 @@ export async function handleGeminiRequestAsync(deps, params) {
         // Slice 1.5: pure async path — no upstream safeFlightStart, so the
         // manager owns both logStart and logComplete for this corrId.
         const geminiAsyncFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, effectiveSessionId, params.outputFormat);
-        const job = deps.asyncJobManager.startJob("gemini", args, corrId, undefined, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, geminiAsyncFrHandoff.flightRecorderEntry, geminiAsyncFrHandoff.extractUsage, true);
+        const job = deps.asyncJobManager.startJob("gemini", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, geminiAsyncFrHandoff.flightRecorderEntry, geminiAsyncFrHandoff.extractUsage, true);
         deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
         const asyncResponse = {
             success: true,
@@ -1873,6 +2151,9 @@ export async function handleGeminiRequestAsync(deps, params) {
         if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
             asyncResponse.reviewIntegrity = prep.reviewIntegrity;
         }
+        if (worktreeResolution.worktreePath) {
+            asyncResponse.worktreePath = worktreeResolution.worktreePath;
+        }
         return {
             content: [
                 {
@@ -1937,8 +2218,15 @@ export async function handleGrokRequest(deps, params) {
             createNewSession: params.createNewSession,
         });
         args.push(...sessionResult.resumeArgs);
+        let worktreeResolution = {};
+        try {
+            worktreeResolution = await resolveWorktreeForRequest(params.worktree, sessionResult.effectiveSessionId, runtime);
+        }
+        catch (err) {
+            return createErrorResponse("grok_request", 1, "", corrId, err);
+        }
         const grokFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, params.sessionId, params.outputFormat);
-        const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, grokFrHandoff.flightRecorderEntry, grokFrHandoff.extractUsage);
+        const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, grokFrHandoff.flightRecorderEntry, grokFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
         // Deferred — job still running, return async reference
         if (isDeferredResponse(result)) {
             return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
@@ -1982,6 +2270,12 @@ export async function handleGrokRequest(deps, params) {
         }
         deps.logger.info(`[${corrId}] grok_request completed successfully in ${durationMs}ms`);
         const response = buildCliResponse("grok", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession, params.outputFormat);
+        if (worktreeResolution.worktreePath) {
+            const first = response.content[0];
+            if (first && first.type === "text") {
+                first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
+            }
+        }
         safeFlightComplete(corrId, {
             response: stdout,
             durationMs,
@@ -2072,11 +2366,18 @@ export async function handleGrokRequestAsync(deps, params) {
             const newSession = await deps.sessionManager.createSession("grok", "Grok Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
             effectiveSessionId = newSession.id;
         }
+        let worktreeResolution = {};
+        try {
+            worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
+        }
+        catch (err) {
+            return createErrorResponse("grok_request_async", 1, "", corrId, err);
+        }
         // Start job only after all session I/O succeeds
         assertUpstreamCliArgs("grok", args);
         assertUpstreamCliEnv("grok", undefined);
         const grokAsyncFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, effectiveSessionId, params.outputFormat);
-        const job = deps.asyncJobManager.startJob("grok", args, corrId, undefined, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, grokAsyncFrHandoff.flightRecorderEntry, grokAsyncFrHandoff.extractUsage, true);
+        const job = deps.asyncJobManager.startJob("grok", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, grokAsyncFrHandoff.flightRecorderEntry, grokAsyncFrHandoff.extractUsage, true);
         deps.logger.info(`[${corrId}] grok_request_async started job ${job.id}`);
         const asyncResponse = {
             success: true,
@@ -2089,6 +2390,9 @@ export async function handleGrokRequestAsync(deps, params) {
         if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
             asyncResponse.reviewIntegrity = prep.reviewIntegrity;
         }
+        if (worktreeResolution.worktreePath) {
+            asyncResponse.worktreePath = worktreeResolution.worktreePath;
+        }
         return {
             content: [
                 {
@@ -2149,8 +2453,15 @@ export async function handleMistralRequest(deps, params) {
             createNewSession: params.createNewSession,
         });
         args.push(...sessionResult.resumeArgs);
+        let worktreeResolution = {};
+        try {
+            worktreeResolution = await resolveWorktreeForRequest(params.worktree, sessionResult.effectiveSessionId, runtime);
+        }
+        catch (err) {
+            return createErrorResponse("mistral_request", 1, "", corrId, err);
+        }
         const mistralFrHandoff = buildAsyncFlightRecorderHandoff("mistral", prep, params.sessionId, params.outputFormat);
-        let result = await awaitJobOrDefer("mistral", args, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, mistralEnv, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage);
+        let result = await awaitJobOrDefer("mistral", args, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, mistralEnv, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
         if (isDeferredResponse(result)) {
             return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
         }
@@ -2162,7 +2473,7 @@ export async function handleMistralRequest(deps, params) {
                 const retryArgs = [...retryPrep.args, ...sessionResult.resumeArgs];
                 // Reuse the FR handoff built above — the retry preserves corrId,
                 // so the manager's logComplete still updates the original row.
-                result = await awaitJobOrDefer("mistral", retryArgs, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, true, runtime, retryPrep.env, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage);
+                result = await awaitJobOrDefer("mistral", retryArgs, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, true, runtime, retryPrep.env, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
                 if (isDeferredResponse(result)) {
                     return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
                 }
@@ -2208,6 +2519,12 @@ export async function handleMistralRequest(deps, params) {
         }
         deps.logger.info(`[${corrId}] mistral_request completed successfully in ${durationMs}ms`);
         const response = buildCliResponse("mistral", stdout, params.optimizeResponse ?? false, corrId, effectiveSessionId, prep, durationMs, sessionResult.userProvidedSession, params.outputFormat);
+        if (worktreeResolution.worktreePath) {
+            const first = response.content[0];
+            if (first && first.type === "text") {
+                first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
+            }
+        }
         safeFlightComplete(corrId, {
             response: stdout,
             durationMs,
@@ -2293,10 +2610,17 @@ export async function handleMistralRequestAsync(deps, params) {
             const newSession = await deps.sessionManager.createSession("mistral", "Mistral Session", `${GATEWAY_SESSION_PREFIX}${randomUUID()}`);
             effectiveSessionId = newSession.id;
         }
+        let worktreeResolution = {};
+        try {
+            worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
+        }
+        catch (err) {
+            return createErrorResponse("mistral_request_async", 1, "", corrId, err);
+        }
         assertUpstreamCliArgs("mistral", args);
         assertUpstreamCliEnv("mistral", mistralEnv);
         const mistralAsyncFrHandoff = buildAsyncFlightRecorderHandoff("mistral", prep, effectiveSessionId, params.outputFormat);
-        const job = deps.asyncJobManager.startJob("mistral", args, corrId, undefined, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, mistralEnv, undefined, mistralAsyncFrHandoff.flightRecorderEntry, mistralAsyncFrHandoff.extractUsage, true);
+        const job = deps.asyncJobManager.startJob("mistral", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, mistralEnv, undefined, mistralAsyncFrHandoff.flightRecorderEntry, mistralAsyncFrHandoff.extractUsage, true);
         deps.logger.info(`[${corrId}] mistral_request_async started job ${job.id}`);
         const asyncResponse = {
             success: true,
@@ -2309,6 +2633,9 @@ export async function handleMistralRequestAsync(deps, params) {
         if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
             asyncResponse.reviewIntegrity = prep.reviewIntegrity;
         }
+        if (worktreeResolution.worktreePath) {
+            asyncResponse.worktreePath = worktreeResolution.worktreePath;
+        }
         return {
             content: [
                 {
@@ -2395,6 +2722,17 @@ export async function handleCodexRequestAsync(deps, params) {
             const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
             effectiveSessionId = newSession.id;
         }
+        // Slice λ: resolve worktree directive after session I/O so resume reuse
+        // can read metadata.worktreePath. A pre-startJob failure here means
+        // prepCleanup is still owned locally; run it before returning.
+        let worktreeResolution = {};
+        try {
+            worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
+        }
+        catch (err) {
+            runPrepCleanupLocally();
+            return createErrorResponse("codex_request_async", 1, "", corrId, err);
+        }
         // Start job only after all session I/O succeeds. If startJob throws before
         // registering the record, ownership stays here and we run it in the catch.
         assertUpstreamCliArgs("codex", args);
@@ -2402,7 +2740,7 @@ export async function handleCodexRequestAsync(deps, params) {
         const codexAsyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, effectiveSessionId, params.outputFormat);
         let job;
         try {
-            job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup, codexAsyncFrHandoff.flightRecorderEntry, codexAsyncFrHandoff.extractUsage, true);
+            job = deps.asyncJobManager.startJob("codex", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup, codexAsyncFrHandoff.flightRecorderEntry, codexAsyncFrHandoff.extractUsage, true);
             // Handoff succeeded: AsyncJobManager will fire prepCleanup on terminal
             // status. Release our local ownership claim so the catch path doesn't
             // double-fire.
@@ -2424,6 +2762,9 @@ export async function handleCodexRequestAsync(deps, params) {
         if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
             asyncResponse.reviewIntegrity = prep.reviewIntegrity;
         }
+        if (worktreeResolution.worktreePath) {
+            asyncResponse.worktreePath = worktreeResolution.worktreePath;
+        }
         return {
             content: [
                 {
@@ -2481,15 +2822,15 @@ export function createGatewayServer(deps = {}) {
             .max(100000, "Prompt too long (max 100k chars)")
             .optional()
             .describe("Prompt text for Claude (mutually exclusive with promptParts)"),
-        promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
+        promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task, cacheControl? }. Use for repeated calls that share a stable prefix — `system`/`tools`/`context` are the stable head; `task` is the volatile tail (never marked). Set `cacheControl: { system?: boolean, tools?: boolean, context?: boolean }` to opt into explicit Anthropic prefix caching via `--input-format stream-json` (slice κ). Requires `outputFormat: 'stream-json'` and hard-codes `ttl='1h'` (Anthropic rejects 5m blocks after Claude Code's 1h-marked session-wrap content). Mutually exclusive with `prompt`. The stable prefix hash is logged to the flight recorder for cache_state aggregates."),
         model: z
             .string()
             .optional()
             .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
         outputFormat: z
             .enum(["text", "json", "stream-json"])
-            .default("text")
-            .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
+            .default("stream-json")
+            .describe("Output format (text|json|stream-json). DEFAULT: stream-json — the gateway parses NDJSON usage events to extract input/output/cache_read/cache_creation tokens + cost + model, persists them to the flight recorder for cache_state aggregates, and still returns the assistant text. Override to 'text' only when you truly want unparsed stdout (loses observability)."),
         sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
         continueSession: z.boolean().default(false).describe("Continue active session"),
         createNewSession: z.boolean().default(false).describe("Force new session"),
@@ -2561,6 +2902,7 @@ export function createGatewayServer(deps = {}) {
             .array(z.string())
             .optional()
             .describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
+        worktree: WORKTREE_SCHEMA.optional(),
         approvalStrategy: z
             .enum(["legacy", "mcp_managed"])
             .default("legacy")
@@ -2591,7 +2933,7 @@ export function createGatewayServer(deps = {}) {
             .boolean()
             .default(false)
             .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-    }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
+    }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
         const startTime = Date.now();
         if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
             return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
@@ -2665,7 +3007,11 @@ export function createGatewayServer(deps = {}) {
             sessionId: effectiveSessionId,
             cli: "claude",
         });
-        const warnings = ttlWarning ? [ttlWarning] : [];
+        // Rec #4: include any prep-time warnings (e.g. cacheable_prefix_uncached).
+        const warnings = [
+            ...(ttlWarning ? [ttlWarning] : []),
+            ...(prep.warnings ?? []),
+        ];
         safeFlightStart({
             correlationId: corrId,
             cli: "claude",
@@ -2674,8 +3020,9 @@ export function createGatewayServer(deps = {}) {
             sessionId: effectiveSessionId,
             stablePrefixHash: prep.stablePrefixHash ?? undefined,
             stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
+            cacheControlBlocks: prep.cacheControlBlocks,
         }, runtime);
-        logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prep.effectivePrompt.length}, sessionId=${effectiveSessionId}`);
+        logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prep.effectivePrompt.length}, sessionId=${effectiveSessionId}, cacheControlBlocks=${prep.cacheControlBlocks ?? 0}`);
         try {
             if (useContinue) {
                 args.push("--continue");
@@ -2684,10 +3031,19 @@ export function createGatewayServer(deps = {}) {
                 args.push("--session-id", effectiveSessionId);
                 await sessionManager.updateSessionUsage(effectiveSessionId);
             }
+            // Slice λ: resolve worktree directive into spawn cwd. Done after
+            // session resolution so resume reuse can read metadata.worktreePath.
+            let worktreeResolution = {};
+            try {
+                worktreeResolution = await resolveWorktreeForRequest(worktree, effectiveSessionId, runtime);
+            }
+            catch (err) {
+                return createErrorResponse("claude_request", 1, "", corrId, err);
+            }
             // Idle timeout only for stream-json (text/json produce no output until done)
             const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
             const claudeSyncFrHandoff = buildAsyncFlightRecorderHandoff("claude", prep, effectiveSessionId, outputFormat);
-            const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh, runtime, undefined, undefined, claudeSyncFrHandoff.flightRecorderEntry, claudeSyncFrHandoff.extractUsage);
+            const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh, runtime, undefined, undefined, claudeSyncFrHandoff.flightRecorderEntry, claudeSyncFrHandoff.extractUsage, prep.stdinPayload, worktreeResolution.cwd);
             // Deferred — job still running, return async reference
             if (isDeferredResponse(result)) {
                 return buildDeferredToolResponse(result, effectiveSessionId);
@@ -2744,7 +3100,14 @@ export function createGatewayServer(deps = {}) {
                     exitCode: 0,
                     status: "completed",
                 }, runtime);
-                return buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat, warnings);
+                const streamResponse = buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat, warnings);
+                if (worktreeResolution.worktreePath) {
+                    const first = streamResponse.content[0];
+                    if (first && first.type === "text") {
+                        first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
+                    }
+                }
+                return streamResponse;
             }
             safeFlightComplete(corrId, {
                 response: stdout,
@@ -2755,7 +3118,14 @@ export function createGatewayServer(deps = {}) {
                 exitCode: 0,
                 status: "completed",
             }, runtime);
-            return buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat, warnings);
+            const nonStreamResponse = buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat, warnings);
+            if (worktreeResolution.worktreePath) {
+                const first = nonStreamResponse.content[0];
+                if (first && first.type === "text") {
+                    first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
+                }
+            }
+            return nonStreamResponse;
         }
         catch (error) {
             const elapsedMs = Math.max(0, Date.now() - startTime);
@@ -2888,7 +3258,8 @@ export function createGatewayServer(deps = {}) {
             .array(z.string())
             .optional()
             .describe("Codex --add-dir <DIR>: additional writable workspace directories. Emitted once per entry on new sessions only; resume inherits the original session's writable-dir policy."),
-    }, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, workingDir, addDir, }) => {
+        worktree: WORKTREE_SCHEMA.optional(),
+    }, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, workingDir, addDir, worktree, }) => {
         const startTime = Date.now();
         const prep = prepareCodexRequest({
             prompt,
@@ -2940,9 +3311,20 @@ export function createGatewayServer(deps = {}) {
         // execution, on terminal status for the job-backed path (sync
         // completion or deferred). The outer finally MUST NOT clean again.
         const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
+        // Slice λ: resolve worktree directive into spawn cwd. Codex has no
+        // in-handler session resolution prior to spawn (session lookup is
+        // lazy via `codex exec resume`), so the user-supplied sessionId is
+        // the only reuse key.
+        let worktreeResolution = {};
+        try {
+            worktreeResolution = await resolveWorktreeForRequest(worktree, sessionId, runtime);
+        }
+        catch (err) {
+            return createErrorResponse("codex_request", 1, "", corrId, err);
+        }
         try {
             const codexSyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, sessionId, outputFormat);
-            const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), outputFormat, forceRefresh, runtime, undefined, prepCleanup, codexSyncFrHandoff.flightRecorderEntry, codexSyncFrHandoff.extractUsage);
+            const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), outputFormat, forceRefresh, runtime, undefined, prepCleanup, codexSyncFrHandoff.flightRecorderEntry, codexSyncFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
             // Deferred — job still running, return async reference. Cleanup
             // ownership belongs to AsyncJobManager via onComplete.
             if (isDeferredResponse(result)) {
@@ -3000,7 +3382,14 @@ export function createGatewayServer(deps = {}) {
                 cacheCreationTokens: codexUsage.cacheCreationTokens,
                 costUsd: codexUsage.costUsd,
             }, runtime);
-            return buildCliResponse("codex", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
+            const codexResponse = buildCliResponse("codex", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
+            if (worktreeResolution.worktreePath) {
+                const first = codexResponse.content[0];
+                if (first && first.type === "text") {
+                    first.text = formatWorktreePrefix(worktreeResolution.worktreePath) + first.text;
+                }
+            }
+            return codexResponse;
         }
         catch (error) {
             const elapsedMs = Math.max(0, Date.now() - startTime);
@@ -3190,7 +3579,8 @@ export function createGatewayServer(deps = {}) {
             .boolean()
             .default(false)
             .describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
-    }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, }) => {
+        worktree: WORKTREE_SCHEMA.optional(),
+    }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, worktree, }) => {
         return handleGeminiRequest({ sessionManager, logger, runtime }, {
             prompt,
             promptParts,
@@ -3215,6 +3605,7 @@ export function createGatewayServer(deps = {}) {
             adminPolicyFiles,
             attachments,
             skipTrust,
+            worktree,
         });
     });
     //──────────────────────────────────────────────────────────────────────────────
@@ -3320,7 +3711,8 @@ export function createGatewayServer(deps = {}) {
             .array(z.string())
             .optional()
             .describe('Grok --deny <RULE>: permission deny rules. Each entry is emitted as its own --deny instance (per `grok --help`: "Repeat to add multiple rules").'),
-    }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, }) => {
+        worktree: WORKTREE_SCHEMA.optional(),
+    }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, worktree, }) => {
         return handleGrokRequest({ sessionManager, logger, runtime }, {
             prompt,
             promptParts,
@@ -3350,6 +3742,7 @@ export function createGatewayServer(deps = {}) {
             systemPromptOverride,
             allow,
             deny,
+            worktree,
         });
     });
     //──────────────────────────────────────────────────────────────────────────────
@@ -3439,7 +3832,8 @@ export function createGatewayServer(deps = {}) {
             .array(z.string())
             .optional()
             .describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance (Vibe states this flag may be specified multiple times)."),
-    }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, workingDir, addDir, }) => {
+        worktree: WORKTREE_SCHEMA.optional(),
+    }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, workingDir, addDir, worktree, }) => {
         return handleMistralRequest({ sessionManager, logger, runtime }, {
             prompt,
             promptParts,
@@ -3466,6 +3860,7 @@ export function createGatewayServer(deps = {}) {
             maxPrice,
             workingDir,
             addDir,
+            worktree,
         });
     });
     //──────────────────────────────────────────────────────────────────────────────
@@ -3486,15 +3881,15 @@ export function createGatewayServer(deps = {}) {
                 .max(100000, "Prompt too long (max 100k chars)")
                 .optional()
                 .describe("Prompt text for Claude (mutually exclusive with promptParts)"),
-            promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
+            promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task, cacheControl? }. Same semantics as claude_request: stable head (system/tools/context) + volatile tail (task). Set `cacheControl: { system?, tools?, context?: boolean }` to opt into explicit Anthropic prefix caching via `--input-format stream-json` (slice κ); requires `outputFormat: 'stream-json'` and hard-codes `ttl='1h'`. Mutually exclusive with `prompt`. Stable prefix hash logged to flight recorder."),
             model: z
                 .string()
                 .optional()
                 .describe("Model name or alias (e.g. sonnet, claude-sonnet-4-5-20250929, latest)"),
             outputFormat: z
                 .enum(["text", "json", "stream-json"])
-                .default("text")
-                .describe("Output format (text|json|stream-json). stream-json: NDJSON with idle timeout."),
+                .default("stream-json")
+                .describe("Output format (text|json|stream-json). DEFAULT: stream-json — same rationale as claude_request: keeps usage/cache/cost observable for cache_state aggregates. Override to 'text' only when raw stdout is required (loses observability)."),
             sessionId: z.string().optional().describe("Session ID (uses active if omitted)"),
             continueSession: z.boolean().default(false).describe("Continue active session"),
             createNewSession: z.boolean().default(false).describe("Force new session"),
@@ -3566,6 +3961,7 @@ export function createGatewayServer(deps = {}) {
                 .array(z.string())
                 .optional()
                 .describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
+            worktree: WORKTREE_SCHEMA.optional(),
             approvalStrategy: z
                 .enum(["legacy", "mcp_managed"])
                 .default("legacy")
@@ -3595,7 +3991,7 @@ export function createGatewayServer(deps = {}) {
                 .boolean()
                 .default(false)
                 .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-        }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
+        }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
             if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
                 return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
             }
@@ -3662,6 +4058,15 @@ export function createGatewayServer(deps = {}) {
                     sessionId: effectiveSessionId,
                     cli: "claude",
                 });
+                // Slice λ: resolve worktree directive after session metadata is
+                // settled so resume reuse can read metadata.worktreePath.
+                let worktreeResolution = {};
+                try {
+                    worktreeResolution = await resolveWorktreeForRequest(worktree, effectiveSessionId, runtime);
+                }
+                catch (err) {
+                    return createErrorResponse("claude_request_async", 1, "", corrId, err);
+                }
                 // Idle timeout only for stream-json (text/json produce no output until done)
                 const effectiveIdleTimeout = outputFormat === "stream-json"
                     ? resolveIdleTimeout("claude", idleTimeoutMs)
@@ -3669,7 +4074,7 @@ export function createGatewayServer(deps = {}) {
                 assertUpstreamCliArgs("claude", args);
                 assertUpstreamCliEnv("claude", undefined);
                 const claudeAsyncFrHandoff = buildAsyncFlightRecorderHandoff("claude", prep, effectiveSessionId, outputFormat);
-                const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat, forceRefresh, undefined, undefined, claudeAsyncFrHandoff.flightRecorderEntry, claudeAsyncFrHandoff.extractUsage, true);
+                const job = asyncJobManager.startJob("claude", args, corrId, worktreeResolution.cwd, effectiveIdleTimeout, outputFormat, forceRefresh, undefined, undefined, claudeAsyncFrHandoff.flightRecorderEntry, claudeAsyncFrHandoff.extractUsage, true, prep.stdinPayload);
                 logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
                 const asyncResponse = {
                     success: true,
@@ -3685,8 +4090,17 @@ export function createGatewayServer(deps = {}) {
                 if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
                     asyncResponse.reviewIntegrity = prep.reviewIntegrity;
                 }
-                if (ttlWarning) {
-                    asyncResponse.warnings = [ttlWarning];
+                if (worktreeResolution.worktreePath) {
+                    asyncResponse.worktreePath = worktreeResolution.worktreePath;
+                }
+                // Rec #4: include any prep-time warnings (e.g.
+                // cacheable_prefix_uncached) alongside ttlWarning.
+                const mergedWarnings = [
+                    ...(ttlWarning ? [ttlWarning] : []),
+                    ...(prep.warnings ?? []),
+                ];
+                if (mergedWarnings.length > 0) {
+                    asyncResponse.warnings = mergedWarnings;
                 }
                 return {
                     content: [
@@ -3791,7 +4205,8 @@ export function createGatewayServer(deps = {}) {
                 .array(z.string())
                 .optional()
                 .describe("Codex --add-dir <DIR>: additional writable workspace directories (repeat per entry). New sessions only."),
-        }, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, workingDir, addDir, }) => {
+            worktree: WORKTREE_SCHEMA.optional(),
+        }, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, workingDir, addDir, worktree, }) => {
             return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
                 prompt,
                 promptParts,
@@ -3822,6 +4237,7 @@ export function createGatewayServer(deps = {}) {
                 ignoreRules,
                 workingDir,
                 addDir,
+                worktree,
             });
         });
         server.tool("gemini_request_async", {
@@ -3893,7 +4309,8 @@ export function createGatewayServer(deps = {}) {
                 .boolean()
                 .default(false)
                 .describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
-        }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, }) => {
+            worktree: WORKTREE_SCHEMA.optional(),
+        }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, worktree, }) => {
             return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
                 prompt,
                 promptParts,
@@ -3917,6 +4334,7 @@ export function createGatewayServer(deps = {}) {
                 adminPolicyFiles,
                 attachments,
                 skipTrust,
+                worktree,
             });
         });
         server.tool("grok_request_async", {
@@ -4018,7 +4436,8 @@ export function createGatewayServer(deps = {}) {
                 .array(z.string())
                 .optional()
                 .describe("Grok --deny <RULE>: permission deny rules. Each entry → its own --deny instance."),
-        }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, }) => {
+            worktree: WORKTREE_SCHEMA.optional(),
+        }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, worktree, }) => {
             return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
                 prompt,
                 promptParts,
@@ -4047,6 +4466,7 @@ export function createGatewayServer(deps = {}) {
                 systemPromptOverride,
                 allow,
                 deny,
+                worktree,
             });
         });
         server.tool("mistral_request_async", {
@@ -4132,7 +4552,8 @@ export function createGatewayServer(deps = {}) {
                 .array(z.string())
                 .optional()
                 .describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance."),
-        }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, workingDir, addDir, }) => {
+            worktree: WORKTREE_SCHEMA.optional(),
+        }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, workingDir, addDir, worktree, }) => {
             return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
                 prompt,
                 promptParts,
@@ -4158,6 +4579,7 @@ export function createGatewayServer(deps = {}) {
                 maxPrice,
                 workingDir,
                 addDir,
+                worktree,
             });
         });
         server.tool("llm_job_status", {
@@ -4673,6 +5095,12 @@ export function createGatewayServer(deps = {}) {
 //──────────────────────────────────────────────────────────────────────────────
 async function initializeSessionManager() {
     const config = loadConfig();
+    // Slice λ: file-backed sessions get a cleanup hook that tears down any
+    // git worktrees recorded on session.metadata.worktreePath. PG-backed
+    // sessions skip the hook (multi-tenant deployments don't necessarily
+    // own a single filesystem); revisit if/when worktree support extends
+    // there.
+    const worktreeCleanupHook = createWorktreeSessionCleanupHook(logger);
     if (config.database && config.redis) {
         logger.info("Initializing PostgreSQL + Redis session manager");
         const { createDatabaseConnection } = await import("./db.js");
@@ -4682,7 +5110,9 @@ async function initializeSessionManager() {
     }
     else {
         logger.info("Initializing file-based session manager");
-        sessionManager = await createSessionManager(config, undefined, logger);
+        sessionManager = await createSessionManager(config, undefined, logger, {
+            cleanupHook: worktreeCleanupHook,
+        });
         logger.info("File-based session manager initialized");
     }
     resourceProvider = new ResourceProvider(sessionManager, performanceMetrics, getFlightRecorder(logger), getCacheAwarenessConfig(logger));