llm-cli-gateway 1.10.0 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,181 @@
2
2
 
3
3
  All notable changes to the llm-cli-gateway project.
4
4
 
5
+ ## [1.12.0] - 2026-05-27 — Phase 4 slice ζ (working-dir + add-dir cross-provider)
6
+
7
+ Ships the seventh Phase 4 slice: working-directory and additional-directory
8
+ flags are now reachable across four CLIs in a single bundled PR. Three
9
+ commits land together (feature wiring, contract registration, test-veracity
10
+ regressions) plus this release commit.
11
+
12
+ ### Added — working-dir + add-dir parity for four CLIs
13
+
14
+ - **Claude** — `claude_request` and `claude_request_async` accept a new
15
+ `addDir: string[]` field. Threaded through `prepareClaudeRequest` →
16
+ `prepareClaudeHighImpactFlags` (`src/request-helpers.ts:687`). Each
17
+ entry emits its own `--add-dir` instance per `claude --help` ("Additional
18
+ directories to allow tool access to"). Claude has no working-dir flag
19
+ (uses the process cwd).
20
+ - **Codex** — `codex_request` and `codex_request_async` accept new
21
+ `workingDir: string` (min 1) and `addDir: string[]` fields. Both flags
22
+ are already in `CODEX_RESUME_FILTERED_FLAGS` (the original session's cwd
23
+ and writable-dir policy are inherited on resume), so `prepareCodexRequest`
24
+ gates emission on `sessionPlan.mode === "new"` — resume argv stays clean
25
+ rather than emitting then stripping. Emits `-C <DIR>` (one) and
26
+ `--add-dir <DIR>` (one instance per entry).
27
+ - **Grok** — `grok_request` and `grok_request_async` accept a new
28
+ `workingDir: string` (min 1) field. `prepareGrokRequest` emits
29
+ `--cwd <DIR>`. Grok has no `--add-dir` analogue.
30
+ - **Vibe (Mistral)** — `mistral_request` and `mistral_request_async`
31
+ accept new `workingDir: string` (min 1) and `addDir: string[]` fields.
32
+ `prepareMistralRequest` (the `request-helpers.ts` helper) emits
33
+ `--workdir <DIR>` (one) and `--add-dir <DIR>` (one per entry; Vibe's
34
+ `--help` states the flag "Can be specified multiple times").
35
+ `buildMistralRetryPrep` threads both fields through to the stale-model
36
+ recovery argv per the slice-δ retry-path invariant.
37
+ - **Gemini** is not re-wired: `--include-directories` was wired in master
38
+ before this slice. A regression-guard test in REGRESSIONS Zε asserts
39
+ the existing wiring stays intact while adjacent contract entries
40
+ changed.
41
+
42
+ ### Out of scope — worktree flags
43
+
44
+ Worktree flags (`-w/--worktree` on Claude, Gemini, Grok) create new git
45
+ worktree directories on disk with lifecycle implications and are
46
+ explicitly deferred to a later slice with explicit cleanup semantics.
47
+
48
+ ### Contract surface
49
+
50
+ `UPSTREAM_CLI_CONTRACTS` updates:
51
+
52
+ - `claude.flags["--add-dir"]` (arity:"one"; repeated instances accepted)
53
+ - `codex.flags["-C"]` (the gateway only emits the short form; codex
54
+ 0.134.0 accepts `--cd` as an alias but the contract registers exactly
55
+ what we emit — a future code path that emitted `--cd` would correctly
56
+ fail the contract check).
57
+ - `codex.flags["--add-dir"]`
58
+ - `grok.flags["--cwd"]`
59
+ - `mistral.flags["--workdir"]`
60
+ - `mistral.flags["--add-dir"]`
61
+ - `mcpParameters` arrays updated for all four CLIs.
62
+ - Six new passing conformance fixtures (`claude-add-dir`,
63
+ `codex-working-dir`, `codex-add-dir`, `grok-working-dir`,
64
+ `mistral-working-dir`, `mistral-add-dir`); each is mechanically
65
+ validated against `validateUpstreamCliArgs` in the REGRESSIONS Zε
66
+ suite, closing the gap class identified in slice ε round 1.
67
+
68
+ ### Test-veracity audit
69
+
70
+ Per the standing protocol (`feedback_test_veracity_audit_protocol`),
71
+ this slice's tests were audited by all five LLM reviewers (Codex,
72
+ Gemini, Grok, Mistral, Claude) in async parallel with mandatory
73
+ mutation-probe execution against `docs/plans/test-veracity-audit-slice-zeta.spec.md`.
74
+
75
+ **Round 1 outcomes:**
76
+
77
+ - Codex: UNCONDITIONAL APPROVE — all 13 probes [as predicted], all 37
78
+ tests VERIFIED. Baseline (`npx vitest run` on the slice file: 37/37;
79
+ `npm test`: 54 files / 853 tests; build + format:check clean).
80
+ - Grok: UNCONDITIONAL APPROVE — all 13 probes [as predicted].
81
+ - Mistral: UNCONDITIONAL APPROVE — all 13 probes [as predicted].
82
+ - Claude: UNCONDITIONAL APPROVE — all 13 probes red as predicted; ran
83
+ in an isolated `/tmp/zeta-audit-claude` worktree because the four
84
+ parallel reviewers were concurrently mutating the live tree.
85
+ - Gemini: UNCONDITIONAL APPROVE — all 13 probes [as predicted].
86
+
87
+ First unanimous round-1 pass on a multi-CLI slice. The 37 new tests
88
+ (816 → 853 total) cover every new field/flag/fixture across REGRESSIONS
89
+ Zα/β/ε:
90
+
91
+ - **Zα** — Registered tool inputSchema for every new field on every
92
+ tool (sync + async), including `.min(1)` empty-string rejection on
93
+ `workingDir`.
94
+ - **Zβ** — `prepare*Request` end-to-end argv emission per CLI. The
95
+ Codex resume branch asserts NEITHER `-C` NOR `--add-dir` appears
96
+ in resume argv. `buildMistralRetryPrep` regression catches the
97
+ slice-δ retry-path bug class. Prepare → contract end-to-end
98
+ consistency covers all four CLIs.
99
+ - **Zε** — `UPSTREAM_CLI_CONTRACTS` introspection + mechanical
100
+ fixture validation in the same `it()` block (slice-ε round-1 gap
101
+ class). Includes a regression guard for the pre-existing Gemini
102
+ `--include-directories` wiring.
103
+
104
+ ### Mechanical anchors (verify with `rg` before relying)
105
+
106
+ - `src/request-helpers.ts` — `ClaudeHighImpactFlagsInput.addDir`
107
+ (`:610`), `prepareClaudeHighImpactFlags` emission (`:686-690`).
108
+ `PrepareMistralRequestInput.workingDir`/`.addDir` (`:248-264`),
109
+ `prepareMistralRequest` emission (`:300-307`).
110
+ - `src/index.ts` — `prepareClaudeRequest` (`:1338`),
111
+ `prepareCodexRequest` new-session gate (`:1687-1700`),
112
+ `prepareGrokRequest` `--cwd` emission (`:2065-2067`),
113
+ `prepareMistralRequest` wrapper (`:2153-2168`),
114
+ `buildMistralRetryPrep` (`:2249-2289`).
115
+ - `src/upstream-contracts.ts` — flag registrations and conformance
116
+ fixtures for the four CLIs (`:146-149`, `:281-292`, `:438-441`,
117
+ `:524-533`, plus `mcpParameters` entries).
118
+
119
+ ## [1.11.0] - 2026-05-27 — Phase 4 slice η (Claude `--fallback-model` + `--json-schema`)
120
+
121
+ Ships the sixth Phase 4 slice: Claude's reliability fallback and
122
+ structured-output JSON-Schema constraint flags are now reachable from
123
+ `claude_request` and `claude_request_async`. Three commits land together
124
+ (feature wiring, contract registration, test-veracity regressions) plus
125
+ this release commit.
126
+
127
+ ### Added — `--fallback-model` and `--json-schema` for Claude
128
+
129
+ - `claude_request` and `claude_request_async` accept a new `fallbackModel`
130
+ field (non-empty string, validated via `z.string().min(1)`). Threaded
131
+ through `prepareClaudeRequest` → `prepareClaudeHighImpactFlags`
132
+ (`src/request-helpers.ts:651`) → `--fallback-model <model>` argv pair.
133
+ Effective only with Claude `--print`; the gateway always passes `-p`,
134
+ so no extra gating required.
135
+ - Both tools accept a new `jsonSchema` field
136
+ (`string | Record<string, unknown>`). Per `claude --help`, the CLI
137
+ argument is the JSON Schema *literal* (not a path; contrast with Codex
138
+ `--output-schema`). Object values are `JSON.stringify`-d; string values
139
+ pass verbatim. Use with `outputFormat: "json"` for structured output
140
+ validation. Achieves Codex parity for structured-output validation
141
+ in a single slice.
142
+ - `UPSTREAM_CLI_CONTRACTS.claude.flags` registers `--fallback-model` and
143
+ `--json-schema` with `arity: "one"`. `mcpParameters` includes both new
144
+ field names. Two new passing conformance fixtures
145
+ (`claude-fallback-model`, `claude-json-schema`) pin the contract; both
146
+ are mechanically validated against `validateUpstreamCliArgs` in the
147
+ REGRESSIONS Hε suite.
148
+
149
+ ### Test-veracity audit
150
+
151
+ Per the standing protocol (`feedback_test_veracity_audit_protocol`),
152
+ this slice's tests were audited by Codex + Gemini + Grok + Mistral in
153
+ async parallel with mandatory mutation-probe execution. Spec at
154
+ `docs/plans/test-veracity-audit-slice-eta.spec.md`. Round 1 outcomes:
155
+ Grok + Mistral unanimous UNCONDITIONAL APPROVE; Gemini stalled at 682B
156
+ stderr for 15+ minutes (cancelled, documented quota/stall-class
157
+ blocker); Codex initially REJECTED on P-Hβ-4 with an invalid claim
158
+ ("removing sync `jsonSchema` left the test green") — pre-verification
159
+ on a clean tree confirmed the mutation does turn `Hα-4` + `Hα-6` RED as
160
+ the spec predicts. Round-2 pushback with the verbatim vitest output:
161
+ Codex self-corrected, reproduced the mutation in a worktree, observed
162
+ the predicted red, restored, and issued UNCONDITIONAL APPROVE.
163
+
164
+ Three substantive reviewer approves (Grok, Mistral, Codex) from
165
+ independent vendor families satisfy the multi-LLM gate; Gemini stall
166
+ documented.
167
+
168
+ Test count: 816 → 837 (21 new across one file:
169
+ `src/__tests__/test-veracity-regressions-slice-eta.test.ts`).
170
+
171
+ ### Known caveats
172
+
173
+ - `npm run check` still excludes `format:check` (gap first flagged in
174
+ v1.8.0). Run both locally before pushing.
175
+ - Claude `--fallback-model` and `--json-schema` are CLI-side gated to
176
+ `--print` mode by Claude itself; both gateway tools always pass `-p`,
177
+ so this is invisible to callers but worth noting if the upstream CLI
178
+ flag semantics change.
179
+
5
180
  ## [1.10.0] - 2026-05-27 — Phase 4 slice ε (Gemini `-o stream-json` enum widening)
6
181
 
7
182
  Ships the fifth Phase 4 slice: Gemini's NDJSON event-stream output format
package/dist/index.d.ts CHANGED
@@ -155,6 +155,9 @@ export declare function prepareClaudeRequest(params: {
155
155
  maxTurns?: number;
156
156
  effort?: ClaudeEffortLevel;
157
157
  excludeDynamicSystemPromptSections?: boolean;
158
+ fallbackModel?: string;
159
+ jsonSchema?: string | Record<string, unknown>;
160
+ addDir?: string[];
158
161
  }, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
159
162
  export interface CodexRequestPrep extends CliRequestPrep {
160
163
  /**
@@ -197,6 +200,8 @@ export declare function prepareCodexRequest(params: {
197
200
  images?: string[];
198
201
  ignoreUserConfig?: boolean;
199
202
  ignoreRules?: boolean;
203
+ workingDir?: string;
204
+ addDir?: string[];
200
205
  }, runtime?: GatewayServerRuntime): CodexRequestPrep | ExtendedToolResponse;
201
206
  export declare function prepareGeminiRequest(params: {
202
207
  prompt?: string;
@@ -252,6 +257,11 @@ export declare function prepareGrokRequest(params: {
252
257
  * iterations for cost / latency control. Mirrors Claude's wiring.
253
258
  */
254
259
  maxTurns?: number;
260
+ /**
261
+ * Phase 4 slice ζ: emit `--cwd <DIR>` so headless callers can set Grok's
262
+ * working directory without depending on the gateway process's cwd.
263
+ */
264
+ workingDir?: string;
255
265
  }, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
256
266
  export declare function prepareMistralRequest(params: {
257
267
  prompt?: string;
@@ -278,6 +288,10 @@ export declare function prepareMistralRequest(params: {
278
288
  maxTurns?: number;
279
289
  /** Phase 4 slice δ: Vibe `--max-price DOLLARS` cumulative-cost cap. */
280
290
  maxPrice?: number;
291
+ /** Phase 4 slice ζ: Vibe `--workdir <DIR>` working-directory parity. */
292
+ workingDir?: string;
293
+ /** Phase 4 slice ζ: Vibe `--add-dir <DIR>` repeatable add-dir parity. */
294
+ addDir?: string[];
281
295
  }, runtime?: GatewayServerRuntime): (CliRequestPrep & {
282
296
  mistralEnv: Record<string, string>;
283
297
  }) | ExtendedToolResponse;
@@ -290,7 +304,7 @@ export declare function prepareMistralRequest(params: {
290
304
  * through here, or a fresh-workspace / budgeted run can degrade on
291
305
  * the second attempt.
292
306
  */
293
- export declare function buildMistralRetryPrep(params: Pick<MistralRequestParams, "outputFormat" | "permissionMode" | "effort" | "reasoningEffort" | "allowedTools" | "disallowedTools" | "approvalStrategy" | "trust" | "maxTurns" | "maxPrice"> & {
307
+ export declare function buildMistralRetryPrep(params: Pick<MistralRequestParams, "outputFormat" | "permissionMode" | "effort" | "reasoningEffort" | "allowedTools" | "disallowedTools" | "approvalStrategy" | "trust" | "maxTurns" | "maxPrice" | "workingDir" | "addDir"> & {
294
308
  effectivePrompt: string;
295
309
  }, recoveryModel: string): {
296
310
  args: string[];
@@ -366,6 +380,8 @@ export interface GrokRequestParams {
366
380
  forceRefresh?: boolean;
367
381
  /** Phase 4 slice δ: cap agent-loop iterations via `--max-turns N`. */
368
382
  maxTurns?: number;
383
+ /** Phase 4 slice ζ: emit `--cwd <DIR>` so the CLI uses the specified working directory. */
384
+ workingDir?: string;
369
385
  }
370
386
  export declare function handleGrokRequest(deps: HandlerDeps, params: GrokRequestParams): Promise<ExtendedToolResponse>;
371
387
  export declare function handleGrokRequestAsync(deps: AsyncHandlerDeps, params: Omit<GrokRequestParams, "optimizeResponse">): Promise<ExtendedToolResponse>;
@@ -396,6 +412,10 @@ export interface MistralRequestParams {
396
412
  maxTurns?: number;
397
413
  /** Phase 4 slice δ: Vibe `--max-price DOLLARS` cumulative-cost cap. */
398
414
  maxPrice?: number;
415
+ /** Phase 4 slice ζ: Vibe `--workdir <DIR>` working-directory parity. */
416
+ workingDir?: string;
417
+ /** Phase 4 slice ζ: Vibe `--add-dir <DIR>` repeatable add-dir parity. */
418
+ addDir?: string[];
399
419
  }
400
420
  export declare function handleMistralRequest(deps: HandlerDeps, params: MistralRequestParams): Promise<ExtendedToolResponse>;
401
421
  export declare function handleMistralRequestAsync(deps: AsyncHandlerDeps, params: Omit<MistralRequestParams, "optimizeResponse">): Promise<ExtendedToolResponse>;
@@ -428,6 +448,8 @@ export declare function handleCodexRequestAsync(deps: AsyncHandlerDeps, params:
428
448
  images?: string[];
429
449
  ignoreUserConfig?: boolean;
430
450
  ignoreRules?: boolean;
451
+ workingDir?: string;
452
+ addDir?: string[];
431
453
  }): Promise<ExtendedToolResponse>;
432
454
  export declare function createGatewayServer(deps?: GatewayServerDeps): McpServer;
433
455
  export {};
package/dist/index.js CHANGED
@@ -1005,6 +1005,9 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1005
1005
  maxTurns: params.maxTurns,
1006
1006
  effort: params.effort,
1007
1007
  excludeDynamicSystemPromptSections: params.excludeDynamicSystemPromptSections,
1008
+ fallbackModel: params.fallbackModel,
1009
+ jsonSchema: params.jsonSchema,
1010
+ addDir: params.addDir,
1008
1011
  }));
1009
1012
  return {
1010
1013
  corrId,
@@ -1124,6 +1127,19 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
1124
1127
  // and are emitted in both branches.
1125
1128
  let highImpactCleanup;
1126
1129
  if (sessionPlan.mode === "new") {
1130
+ // Phase 4 slice ζ: emit working-dir and add-dir on new sessions only.
1131
+ // Both flags are listed in CODEX_RESUME_FILTERED_FLAGS — resume inherits
1132
+ // the original session's cwd and writable-dir policy, so emitting them
1133
+ // on resume would be silently stripped (wasteful + misleading on argv
1134
+ // logs). Gating here mirrors `--search` / `--sandbox` / `--full-auto`.
1135
+ if (params.workingDir) {
1136
+ args.push("-C", params.workingDir);
1137
+ }
1138
+ if (params.addDir && params.addDir.length > 0) {
1139
+ for (const dir of params.addDir) {
1140
+ args.push("--add-dir", dir);
1141
+ }
1142
+ }
1127
1143
  const high = prepareCodexHighImpactFlags({
1128
1144
  outputSchema: params.outputSchema,
1129
1145
  search: params.search,
@@ -1379,6 +1395,9 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
1379
1395
  if (params.maxTurns !== undefined) {
1380
1396
  args.push("--max-turns", String(params.maxTurns));
1381
1397
  }
1398
+ if (params.workingDir) {
1399
+ args.push("--cwd", params.workingDir);
1400
+ }
1382
1401
  return {
1383
1402
  corrId,
1384
1403
  effectivePrompt,
@@ -1465,6 +1484,8 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
1465
1484
  trust: params.trust,
1466
1485
  maxTurns: params.maxTurns,
1467
1486
  maxPrice: params.maxPrice,
1487
+ workingDir: params.workingDir,
1488
+ addDir: params.addDir,
1468
1489
  });
1469
1490
  if (prep.ignoredDisallowedTools) {
1470
1491
  runtime.logger.info(`[${corrId}] Mistral does not support disallowedTools; ignoring (caller passed ${params.disallowedTools?.length ?? 0} entries)`);
@@ -1519,6 +1540,8 @@ export function buildMistralRetryPrep(params, recoveryModel) {
1519
1540
  trust: params.trust,
1520
1541
  maxTurns: params.maxTurns,
1521
1542
  maxPrice: params.maxPrice,
1543
+ workingDir: params.workingDir,
1544
+ addDir: params.addDir,
1522
1545
  });
1523
1546
  }
1524
1547
  function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat, warnings) {
@@ -1860,6 +1883,7 @@ export async function handleGrokRequest(deps, params) {
1860
1883
  optimizePrompt: params.optimizePrompt,
1861
1884
  operation: "grok_request",
1862
1885
  maxTurns: params.maxTurns,
1886
+ workingDir: params.workingDir,
1863
1887
  }, runtime);
1864
1888
  if (!("args" in prep))
1865
1889
  return prep;
@@ -1981,6 +2005,7 @@ export async function handleGrokRequestAsync(deps, params) {
1981
2005
  optimizePrompt: params.optimizePrompt,
1982
2006
  operation: "grok_request_async",
1983
2007
  maxTurns: params.maxTurns,
2008
+ workingDir: params.workingDir,
1984
2009
  }, runtime);
1985
2010
  if (!("args" in prep))
1986
2011
  return prep;
@@ -2065,6 +2090,8 @@ export async function handleMistralRequest(deps, params) {
2065
2090
  trust: params.trust,
2066
2091
  maxTurns: params.maxTurns,
2067
2092
  maxPrice: params.maxPrice,
2093
+ workingDir: params.workingDir,
2094
+ addDir: params.addDir,
2068
2095
  }, runtime);
2069
2096
  if (!("args" in prep))
2070
2097
  return prep;
@@ -2200,6 +2227,8 @@ export async function handleMistralRequestAsync(deps, params) {
2200
2227
  trust: params.trust,
2201
2228
  maxTurns: params.maxTurns,
2202
2229
  maxPrice: params.maxPrice,
2230
+ workingDir: params.workingDir,
2231
+ addDir: params.addDir,
2203
2232
  }, runtime);
2204
2233
  if (!("args" in prep))
2205
2234
  return prep;
@@ -2288,6 +2317,8 @@ export async function handleCodexRequestAsync(deps, params) {
2288
2317
  images: params.images,
2289
2318
  ignoreUserConfig: params.ignoreUserConfig,
2290
2319
  ignoreRules: params.ignoreRules,
2320
+ workingDir: params.workingDir,
2321
+ addDir: params.addDir,
2291
2322
  }, runtime);
2292
2323
  if (!("args" in prep))
2293
2324
  return prep;
@@ -2481,6 +2512,21 @@ export function createGatewayServer(deps = {}) {
2481
2512
  .boolean()
2482
2513
  .optional()
2483
2514
  .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
2515
+ // Phase 4 slice η — Claude reliability + structured-output parity
2516
+ fallbackModel: z
2517
+ .string()
2518
+ .min(1)
2519
+ .optional()
2520
+ .describe("Claude --fallback-model: model name to auto-fallback to when the default model is overloaded (effective only with --print, which the gateway always uses)."),
2521
+ jsonSchema: z
2522
+ .union([z.string(), z.record(z.unknown())])
2523
+ .optional()
2524
+ .describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
2525
+ // Phase 4 slice ζ — Claude additional-workspace-dirs parity
2526
+ addDir: z
2527
+ .array(z.string())
2528
+ .optional()
2529
+ .describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
2484
2530
  approvalStrategy: z
2485
2531
  .enum(["legacy", "mcp_managed"])
2486
2532
  .default("legacy")
@@ -2511,7 +2557,7 @@ export function createGatewayServer(deps = {}) {
2511
2557
  .boolean()
2512
2558
  .default(false)
2513
2559
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2514
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2560
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2515
2561
  const startTime = Date.now();
2516
2562
  if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
2517
2563
  return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
@@ -2541,6 +2587,9 @@ export function createGatewayServer(deps = {}) {
2541
2587
  maxTurns,
2542
2588
  effort,
2543
2589
  excludeDynamicSystemPromptSections,
2590
+ fallbackModel,
2591
+ jsonSchema,
2592
+ addDir,
2544
2593
  }, runtime);
2545
2594
  if (!("args" in prep))
2546
2595
  return prep;
@@ -2795,7 +2844,17 @@ export function createGatewayServer(deps = {}) {
2795
2844
  .boolean()
2796
2845
  .optional()
2797
2846
  .describe("Codex --ignore-rules: skip project rule files for this run."),
2798
- }, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
2847
+ // Phase 4 slice ζ Codex working-dir + add-dir parity (new sessions only).
2848
+ workingDir: z
2849
+ .string()
2850
+ .min(1)
2851
+ .optional()
2852
+ .describe("Codex -C/--cd <DIR>: working root for this session. Emitted on new sessions only; resume inherits the original session's cwd via CODEX_RESUME_FILTERED_FLAGS."),
2853
+ addDir: z
2854
+ .array(z.string())
2855
+ .optional()
2856
+ .describe("Codex --add-dir <DIR>: additional writable workspace directories. Emitted once per entry on new sessions only; resume inherits the original session's writable-dir policy."),
2857
+ }, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, workingDir, addDir, }) => {
2799
2858
  const startTime = Date.now();
2800
2859
  const prep = prepareCodexRequest({
2801
2860
  prompt,
@@ -2824,6 +2883,8 @@ export function createGatewayServer(deps = {}) {
2824
2883
  images,
2825
2884
  ignoreUserConfig,
2826
2885
  ignoreRules,
2886
+ workingDir,
2887
+ addDir,
2827
2888
  }, runtime);
2828
2889
  if (!("args" in prep))
2829
2890
  return prep;
@@ -3195,7 +3256,13 @@ export function createGatewayServer(deps = {}) {
3195
3256
  .default(false)
3196
3257
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3197
3258
  maxTurns: MAX_TURNS_SCHEMA.optional().describe("Grok `--max-turns N`: cap on agent-loop iterations for cost / latency control (Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
3198
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, }) => {
3259
+ // Phase 4 slice ζ Grok working-directory parity.
3260
+ workingDir: z
3261
+ .string()
3262
+ .min(1)
3263
+ .optional()
3264
+ .describe("Grok --cwd <DIR>: working directory for this invocation. Lets headless callers run Grok against a directory other than the gateway process's cwd."),
3265
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, workingDir, }) => {
3199
3266
  return handleGrokRequest({ sessionManager, logger, runtime }, {
3200
3267
  prompt,
3201
3268
  promptParts,
@@ -3219,6 +3286,7 @@ export function createGatewayServer(deps = {}) {
3219
3286
  idleTimeoutMs,
3220
3287
  forceRefresh,
3221
3288
  maxTurns,
3289
+ workingDir,
3222
3290
  });
3223
3291
  });
3224
3292
  //──────────────────────────────────────────────────────────────────────────────
@@ -3298,7 +3366,17 @@ export function createGatewayServer(deps = {}) {
3298
3366
  .describe("Emit `--trust` so Vibe trusts the cwd for this invocation only (not persisted to trusted_folders.toml) and skips the interactive trust prompt (Phase 4 slice γ)."),
3299
3367
  maxTurns: MAX_TURNS_SCHEMA.optional().describe("Vibe `--max-turns N`: cap the agent-loop iteration count (programmatic mode only, Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
3300
3368
  maxPrice: MAX_PRICE_SCHEMA.optional().describe("Vibe `--max-price DOLLARS`: interrupt the session when cumulative cost crosses this cap (programmatic mode only, Phase 4 slice δ). Bounded to finite values ≤ 10000 USD."),
3301
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, }) => {
3369
+ // Phase 4 slice ζ Vibe working-directory + additional-dirs parity.
3370
+ workingDir: z
3371
+ .string()
3372
+ .min(1)
3373
+ .optional()
3374
+ .describe("Vibe --workdir <DIR>: change to this directory before running. Single value (Vibe accepts one --workdir per invocation)."),
3375
+ addDir: z
3376
+ .array(z.string())
3377
+ .optional()
3378
+ .describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance (Vibe states this flag may be specified multiple times)."),
3379
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, workingDir, addDir, }) => {
3302
3380
  return handleMistralRequest({ sessionManager, logger, runtime }, {
3303
3381
  prompt,
3304
3382
  promptParts,
@@ -3323,6 +3401,8 @@ export function createGatewayServer(deps = {}) {
3323
3401
  trust,
3324
3402
  maxTurns,
3325
3403
  maxPrice,
3404
+ workingDir,
3405
+ addDir,
3326
3406
  });
3327
3407
  });
3328
3408
  //──────────────────────────────────────────────────────────────────────────────
@@ -3408,6 +3488,21 @@ export function createGatewayServer(deps = {}) {
3408
3488
  .boolean()
3409
3489
  .optional()
3410
3490
  .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
3491
+ // Phase 4 slice η — Claude reliability + structured-output parity
3492
+ fallbackModel: z
3493
+ .string()
3494
+ .min(1)
3495
+ .optional()
3496
+ .describe("Claude --fallback-model: model name to auto-fallback to when the default model is overloaded (effective only with --print, which the gateway always uses)."),
3497
+ jsonSchema: z
3498
+ .union([z.string(), z.record(z.unknown())])
3499
+ .optional()
3500
+ .describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
3501
+ // Phase 4 slice ζ — Claude additional-workspace-dirs parity
3502
+ addDir: z
3503
+ .array(z.string())
3504
+ .optional()
3505
+ .describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
3411
3506
  approvalStrategy: z
3412
3507
  .enum(["legacy", "mcp_managed"])
3413
3508
  .default("legacy")
@@ -3437,7 +3532,7 @@ export function createGatewayServer(deps = {}) {
3437
3532
  .boolean()
3438
3533
  .default(false)
3439
3534
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3440
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3535
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3441
3536
  if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
3442
3537
  return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
3443
3538
  }
@@ -3466,6 +3561,9 @@ export function createGatewayServer(deps = {}) {
3466
3561
  maxTurns,
3467
3562
  effort,
3468
3563
  excludeDynamicSystemPromptSections,
3564
+ fallbackModel,
3565
+ jsonSchema,
3566
+ addDir,
3469
3567
  }, runtime);
3470
3568
  if (!("args" in prep))
3471
3569
  return prep;
@@ -3620,7 +3718,17 @@ export function createGatewayServer(deps = {}) {
3620
3718
  images: z.array(z.string()).optional().describe("Codex -i <path>: image attachments."),
3621
3719
  ignoreUserConfig: z.boolean().optional().describe("Codex --ignore-user-config."),
3622
3720
  ignoreRules: z.boolean().optional().describe("Codex --ignore-rules."),
3623
- }, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
3721
+ // Phase 4 slice ζ Codex working-dir + add-dir parity (new sessions only).
3722
+ workingDir: z
3723
+ .string()
3724
+ .min(1)
3725
+ .optional()
3726
+ .describe("Codex -C/--cd <DIR>: working root for this session. New sessions only; resume inherits the original session's cwd."),
3727
+ addDir: z
3728
+ .array(z.string())
3729
+ .optional()
3730
+ .describe("Codex --add-dir <DIR>: additional writable workspace directories (repeat per entry). New sessions only."),
3731
+ }, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, workingDir, addDir, }) => {
3624
3732
  return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3625
3733
  prompt,
3626
3734
  promptParts,
@@ -3649,6 +3757,8 @@ export function createGatewayServer(deps = {}) {
3649
3757
  images,
3650
3758
  ignoreUserConfig,
3651
3759
  ignoreRules,
3760
+ workingDir,
3761
+ addDir,
3652
3762
  });
3653
3763
  });
3654
3764
  server.tool("gemini_request_async", {
@@ -3815,7 +3925,13 @@ export function createGatewayServer(deps = {}) {
3815
3925
  .default(false)
3816
3926
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3817
3927
  maxTurns: MAX_TURNS_SCHEMA.optional().describe("Grok `--max-turns N`: cap on agent-loop iterations for cost / latency control (Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
3818
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, maxTurns, }) => {
3928
+ // Phase 4 slice ζ Grok working-directory parity.
3929
+ workingDir: z
3930
+ .string()
3931
+ .min(1)
3932
+ .optional()
3933
+ .describe("Grok --cwd <DIR>: working directory for this invocation. Lets headless callers run Grok against a directory other than the gateway process's cwd."),
3934
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, maxTurns, workingDir, }) => {
3819
3935
  return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3820
3936
  prompt,
3821
3937
  promptParts,
@@ -3838,6 +3954,7 @@ export function createGatewayServer(deps = {}) {
3838
3954
  idleTimeoutMs,
3839
3955
  forceRefresh,
3840
3956
  maxTurns,
3957
+ workingDir,
3841
3958
  });
3842
3959
  });
3843
3960
  server.tool("mistral_request_async", {
@@ -3913,7 +4030,17 @@ export function createGatewayServer(deps = {}) {
3913
4030
  .describe("Emit `--trust` so Vibe trusts the cwd for this invocation only (not persisted to trusted_folders.toml) and skips the interactive trust prompt (Phase 4 slice γ)."),
3914
4031
  maxTurns: MAX_TURNS_SCHEMA.optional().describe("Vibe `--max-turns N`: cap the agent-loop iteration count (programmatic mode only, Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
3915
4032
  maxPrice: MAX_PRICE_SCHEMA.optional().describe("Vibe `--max-price DOLLARS`: interrupt the session when cumulative cost crosses this cap (programmatic mode only, Phase 4 slice δ). Bounded to finite values ≤ 10000 USD."),
3916
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, }) => {
4033
+ // Phase 4 slice ζ Vibe working-directory + additional-dirs parity.
4034
+ workingDir: z
4035
+ .string()
4036
+ .min(1)
4037
+ .optional()
4038
+ .describe("Vibe --workdir <DIR>: change to this directory before running. Single value per invocation."),
4039
+ addDir: z
4040
+ .array(z.string())
4041
+ .optional()
4042
+ .describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance."),
4043
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, workingDir, addDir, }) => {
3917
4044
  return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
3918
4045
  prompt,
3919
4046
  promptParts,
@@ -3937,6 +4064,8 @@ export function createGatewayServer(deps = {}) {
3937
4064
  trust,
3938
4065
  maxTurns,
3939
4066
  maxPrice,
4067
+ workingDir,
4068
+ addDir,
3940
4069
  });
3941
4070
  });
3942
4071
  server.tool("llm_job_status", {
@@ -125,6 +125,17 @@ export interface PrepareMistralRequestInput {
125
125
  * only).
126
126
  */
127
127
  maxPrice?: number;
128
+ /**
129
+ * Phase 4 slice ζ: emit `--workdir <DIR>` so Vibe changes into the named
130
+ * directory before running. Single value (Vibe accepts one --workdir).
131
+ */
132
+ workingDir?: string;
133
+ /**
134
+ * Phase 4 slice ζ: emit `--add-dir <DIR>` per directory. Vibe's `--help`
135
+ * states the flag "Can be specified multiple times" — each entry is its
136
+ * own argv pair.
137
+ */
138
+ addDir?: string[];
128
139
  }
129
140
  export interface PrepareMistralRequestResult {
130
141
  args: string[];
@@ -350,6 +361,29 @@ export interface ClaudeHighImpactFlagsInput {
350
361
  maxTurns?: number;
351
362
  effort?: ClaudeEffortLevel;
352
363
  excludeDynamicSystemPromptSections?: boolean;
364
+ /**
365
+ * Phase 4 slice η — Claude `--fallback-model <model>`. Routes overloaded-model
366
+ * requests to the named fallback. Only effective with `--print` (we always pass
367
+ * `-p`, so no extra gating required here).
368
+ */
369
+ fallbackModel?: string;
370
+ /**
371
+ * Phase 4 slice η — Claude `--json-schema <schema>`. Per `claude --help`, the
372
+ * argument is the JSON Schema *literal*, not a path. Object values are
373
+ * `JSON.stringify`-d; string values are passed verbatim (caller already wrote
374
+ * a JSON literal). No temp file lifecycle needed (contrast with Codex
375
+ * `--output-schema`, which takes a path).
376
+ */
377
+ jsonSchema?: string | Record<string, unknown>;
378
+ /**
379
+ * Phase 4 slice ζ — Claude `--add-dir <dirs...>`. Additional directories the
380
+ * Claude CLI is allowed to read/write beyond the process cwd. The CLI accepts
381
+ * a single variadic flag (space-separated values) per `claude --help`; we
382
+ * emit one `--add-dir` instance per directory so each path is its own argv
383
+ * token (survives any future tightening of the variadic parser without
384
+ * changing the call site).
385
+ */
386
+ addDir?: string[];
353
387
  }
354
388
  /**
355
389
  * Emit Claude high-impact feature flags (U25) as a flat argv segment.
@@ -185,6 +185,14 @@ export function prepareMistralRequest(input) {
185
185
  if (input.maxPrice !== undefined) {
186
186
  args.push("--max-price", String(input.maxPrice));
187
187
  }
188
+ if (input.workingDir) {
189
+ args.push("--workdir", input.workingDir);
190
+ }
191
+ if (input.addDir && input.addDir.length > 0) {
192
+ for (const dir of input.addDir) {
193
+ args.push("--add-dir", dir);
194
+ }
195
+ }
188
196
  const ignoredDisallowedTools = Boolean(input.disallowedTools && input.disallowedTools.length > 0);
189
197
  return { args, env, ignoredDisallowedTools };
190
198
  }
@@ -438,6 +446,18 @@ export function prepareClaudeHighImpactFlags(input) {
438
446
  if (input.excludeDynamicSystemPromptSections) {
439
447
  args.push("--exclude-dynamic-system-prompt-sections");
440
448
  }
449
+ if (input.fallbackModel !== undefined) {
450
+ args.push("--fallback-model", input.fallbackModel);
451
+ }
452
+ if (input.jsonSchema !== undefined) {
453
+ const schemaArg = typeof input.jsonSchema === "string" ? input.jsonSchema : JSON.stringify(input.jsonSchema);
454
+ args.push("--json-schema", schemaArg);
455
+ }
456
+ if (input.addDir && input.addDir.length > 0) {
457
+ for (const dir of input.addDir) {
458
+ args.push("--add-dir", dir);
459
+ }
460
+ }
441
461
  return args;
442
462
  }
443
463
  //──────────────────────────────────────────────────────────────────────────────
@@ -37,6 +37,10 @@ export const UPSTREAM_CLI_CONTRACTS = {
37
37
  "maxTurns",
38
38
  "effort",
39
39
  "excludeDynamicSystemPromptSections",
40
+ "fallbackModel",
41
+ "jsonSchema",
42
+ // Phase 4 slice ζ
43
+ "addDir",
40
44
  "approvalStrategy",
41
45
  "mcpServers",
42
46
  "strictMcpConfig",
@@ -78,6 +82,18 @@ export const UPSTREAM_CLI_CONTRACTS = {
78
82
  arity: "none",
79
83
  description: "Trim dynamic system prompt sections",
80
84
  },
85
+ "--fallback-model": {
86
+ arity: "one",
87
+ description: "Auto-fallback model when default is overloaded (Claude --print only)",
88
+ },
89
+ "--json-schema": {
90
+ arity: "one",
91
+ description: "JSON Schema literal constraining structured output",
92
+ },
93
+ "--add-dir": {
94
+ arity: "one",
95
+ description: "Additional workspace directory (Phase 4 slice ζ; repeat once per directory)",
96
+ },
81
97
  "--continue": { arity: "none", description: "Continue active session" },
82
98
  "--session-id": { arity: "one", description: "Session id" },
83
99
  },
@@ -95,6 +111,37 @@ export const UPSTREAM_CLI_CONTRACTS = {
95
111
  args: ["-p", "hello", "--not-a-claude-flag"],
96
112
  expect: "fail",
97
113
  },
114
+ {
115
+ // Phase 4 slice η: --fallback-model wired through prepareClaudeRequest.
116
+ id: "claude-fallback-model",
117
+ description: "Phase 4 slice η: --fallback-model accepted",
118
+ args: ["-p", "hello", "--fallback-model", "claude-haiku-4-5-20251001"],
119
+ expect: "pass",
120
+ },
121
+ {
122
+ // Phase 4 slice η: --json-schema accepts an inline JSON Schema literal
123
+ // (per `claude --help` example), not a path. Codex parity for
124
+ // structured-output validation in one slice.
125
+ id: "claude-json-schema",
126
+ description: "Phase 4 slice η: --json-schema accepts inline JSON literal",
127
+ args: [
128
+ "-p",
129
+ "hello",
130
+ "--output-format",
131
+ "json",
132
+ "--json-schema",
133
+ '{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}',
134
+ ],
135
+ expect: "pass",
136
+ },
137
+ {
138
+ // Phase 4 slice ζ: --add-dir wired through prepareClaudeHighImpactFlags.
139
+ // Repeated once per directory; each instance has arity:"one".
140
+ id: "claude-add-dir",
141
+ description: "Phase 4 slice ζ: repeated --add-dir is accepted",
142
+ args: ["-p", "hello", "--add-dir", "/tmp/a", "--add-dir", "/tmp/b"],
143
+ expect: "pass",
144
+ },
98
145
  ],
99
146
  },
100
147
  codex: {
@@ -131,6 +178,9 @@ export const UPSTREAM_CLI_CONTRACTS = {
131
178
  "images",
132
179
  "ignoreUserConfig",
133
180
  "ignoreRules",
181
+ // Phase 4 slice ζ
182
+ "workingDir",
183
+ "addDir",
134
184
  ],
135
185
  resumeOnlyFlags: ["--last"],
136
186
  // Phase 4 slice α (v1.8.0) verified that `codex exec resume` accepts
@@ -170,6 +220,18 @@ export const UPSTREAM_CLI_CONTRACTS = {
170
220
  "-i": { arity: "one", description: "Image path" },
171
221
  "--ignore-user-config": { arity: "none", description: "Ignore user config" },
172
222
  "--ignore-rules": { arity: "none", description: "Ignore rule files" },
223
+ // The gateway only ever emits the short form `-C` (codex 0.134.0 accepts
224
+ // both `-C` and `--cd` as aliases). The contract registers exactly what
225
+ // we emit; if a future code path emits `--cd` instead, the contract
226
+ // check will fail loudly — which is the intended catch.
227
+ "-C": {
228
+ arity: "one",
229
+ description: "Working root for the session (Phase 4 slice ζ; new sessions only)",
230
+ },
231
+ "--add-dir": {
232
+ arity: "one",
233
+ description: "Additional writable workspace directory (Phase 4 slice ζ; repeat once per directory; new sessions only)",
234
+ },
173
235
  },
174
236
  env: {},
175
237
  conformanceFixtures: [
@@ -206,6 +268,26 @@ export const UPSTREAM_CLI_CONTRACTS = {
206
268
  args: ["exec", "resume", "--search", "session-id", "hello"],
207
269
  expect: "fail",
208
270
  },
271
+ {
272
+ id: "codex-working-dir",
273
+ description: "Phase 4 slice ζ: -C <DIR> accepted on a new session",
274
+ args: ["exec", "--skip-git-repo-check", "-C", "/tmp/work", "hello"],
275
+ expect: "pass",
276
+ },
277
+ {
278
+ id: "codex-add-dir",
279
+ description: "Phase 4 slice ζ: repeated --add-dir accepted on a new session",
280
+ args: [
281
+ "exec",
282
+ "--skip-git-repo-check",
283
+ "--add-dir",
284
+ "/tmp/a",
285
+ "--add-dir",
286
+ "/tmp/b",
287
+ "hello",
288
+ ],
289
+ expect: "pass",
290
+ },
209
291
  ],
210
292
  },
211
293
  gemini: {
@@ -317,6 +399,8 @@ export const UPSTREAM_CLI_CONTRACTS = {
317
399
  "disallowedTools",
318
400
  // Phase 4 slice δ
319
401
  "maxTurns",
402
+ // Phase 4 slice ζ
403
+ "workingDir",
320
404
  ],
321
405
  flags: {
322
406
  "-p": { arity: "one", description: "Prompt text" },
@@ -346,6 +430,10 @@ export const UPSTREAM_CLI_CONTRACTS = {
346
430
  pattern: /^[1-9][0-9]*$/,
347
431
  description: "Agent-loop iteration cap (Phase 4 slice δ)",
348
432
  },
433
+ "--cwd": {
434
+ arity: "one",
435
+ description: "Working directory for the invocation (Phase 4 slice ζ)",
436
+ },
349
437
  },
350
438
  env: {},
351
439
  conformanceFixtures: [
@@ -373,6 +461,12 @@ export const UPSTREAM_CLI_CONTRACTS = {
373
461
  args: ["-p", "hello", "--max-turns", "0"],
374
462
  expect: "fail",
375
463
  },
464
+ {
465
+ id: "grok-working-dir",
466
+ description: "Phase 4 slice ζ: --cwd <DIR> is accepted",
467
+ args: ["-p", "hello", "--cwd", "/tmp/work"],
468
+ expect: "pass",
469
+ },
376
470
  ],
377
471
  },
378
472
  mistral: {
@@ -401,6 +495,9 @@ export const UPSTREAM_CLI_CONTRACTS = {
401
495
  // Phase 4 slice δ
402
496
  "maxTurns",
403
497
  "maxPrice",
498
+ // Phase 4 slice ζ
499
+ "workingDir",
500
+ "addDir",
404
501
  ],
405
502
  flags: {
406
503
  "-p": { arity: "one", description: "Prompt text" },
@@ -435,6 +532,14 @@ export const UPSTREAM_CLI_CONTRACTS = {
435
532
  pattern: /^(0|[1-9][0-9]*)(\.[0-9]+)?$/,
436
533
  description: "Cumulative cost cap in USD (Phase 4 slice δ, programmatic mode only)",
437
534
  },
535
+ "--workdir": {
536
+ arity: "one",
537
+ description: "Working directory for the invocation (Phase 4 slice ζ)",
538
+ },
539
+ "--add-dir": {
540
+ arity: "one",
541
+ description: "Additional writable workspace directory (Phase 4 slice ζ; repeat once per directory)",
542
+ },
438
543
  },
439
544
  env: {
440
545
  VIBE_ACTIVE_MODEL: {
@@ -479,6 +584,29 @@ export const UPSTREAM_CLI_CONTRACTS = {
479
584
  env: { VIBE_ACTIVE_MODEL: "mistral-medium-3.5" },
480
585
  expect: "fail",
481
586
  },
587
+ {
588
+ id: "mistral-working-dir",
589
+ description: "Phase 4 slice ζ: --workdir <DIR> is accepted",
590
+ args: ["-p", "hello", "--agent", "auto-approve", "--workdir", "/tmp/work"],
591
+ env: { VIBE_ACTIVE_MODEL: "mistral-medium-3.5" },
592
+ expect: "pass",
593
+ },
594
+ {
595
+ id: "mistral-add-dir",
596
+ description: "Phase 4 slice ζ: repeated --add-dir is accepted",
597
+ args: [
598
+ "-p",
599
+ "hello",
600
+ "--agent",
601
+ "auto-approve",
602
+ "--add-dir",
603
+ "/tmp/a",
604
+ "--add-dir",
605
+ "/tmp/b",
606
+ ],
607
+ env: { VIBE_ACTIVE_MODEL: "mistral-medium-3.5" },
608
+ expect: "pass",
609
+ },
482
610
  ],
483
611
  },
484
612
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-cli-gateway",
3
- "version": "1.10.0",
3
+ "version": "1.12.0",
4
4
  "mcpName": "io.github.verivus-oss/llm-cli-gateway",
5
5
  "description": "MCP server providing unified access to Claude Code, Codex, Gemini, Grok, and Mistral Vibe CLIs with session management, retry logic, async job orchestration, durable job results, and cross-LLM validation.",
6
6
  "license": "MIT",