llm-cli-gateway 1.10.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,67 @@
2
2
 
3
3
  All notable changes to the llm-cli-gateway project.
4
4
 
5
+ ## [1.11.0] - 2026-05-27 — Phase 4 slice η (Claude `--fallback-model` + `--json-schema`)
6
+
7
+ Ships the sixth Phase 4 slice: Claude's reliability fallback and
8
+ structured-output JSON-Schema constraint flags are now reachable from
9
+ `claude_request` and `claude_request_async`. Three commits land together
10
+ (feature wiring, contract registration, test-veracity regressions) plus
11
+ this release commit.
12
+
13
+ ### Added — `--fallback-model` and `--json-schema` for Claude
14
+
15
+ - `claude_request` and `claude_request_async` accept a new `fallbackModel`
16
+ field (non-empty string, validated via `z.string().min(1)`). Threaded
17
+ through `prepareClaudeRequest` → `prepareClaudeHighImpactFlags`
18
+ (`src/request-helpers.ts:651`) → `--fallback-model <model>` argv pair.
19
+ Effective only with Claude `--print`; the gateway always passes `-p`,
20
+ so no extra gating required.
21
+ - Both tools accept a new `jsonSchema` field
22
+ (`string | Record<string, unknown>`). Per `claude --help`, the CLI
23
+ argument is the JSON Schema *literal* (not a path; contrast with Codex
24
+ `--output-schema`). Object values are `JSON.stringify`-d; string values
25
+ pass verbatim. Use with `outputFormat: "json"` for structured output
26
+ validation. Achieves Codex parity for structured-output validation
27
+ in a single slice.
28
+ - `UPSTREAM_CLI_CONTRACTS.claude.flags` registers `--fallback-model` and
29
+ `--json-schema` with `arity: "one"`. `mcpParameters` includes both new
30
+ field names. Two new passing conformance fixtures
31
+ (`claude-fallback-model`, `claude-json-schema`) pin the contract; both
32
+ are mechanically validated against `validateUpstreamCliArgs` in the
33
+ REGRESSIONS Hε suite.
34
+
35
+ ### Test-veracity audit
36
+
37
+ Per the standing protocol (`feedback_test_veracity_audit_protocol`),
38
+ this slice's tests were audited by Codex + Gemini + Grok + Mistral in
39
+ async parallel with mandatory mutation-probe execution. Spec at
40
+ `docs/plans/test-veracity-audit-slice-eta.spec.md`. Round 1 outcomes:
41
+ Grok + Mistral unanimous UNCONDITIONAL APPROVE; Gemini stalled at 682B
42
+ stderr for 15+ minutes (cancelled, documented quota/stall-class
43
+ blocker); Codex initially REJECTED on P-Hβ-4 with an invalid claim
44
+ ("removing sync `jsonSchema` left the test green") — pre-verification
45
+ on a clean tree confirmed the mutation does turn `Hα-4` + `Hα-6` RED as
46
+ the spec predicts. Round-2 pushback with the verbatim vitest output:
47
+ Codex self-corrected, reproduced the mutation in a worktree, observed
48
+ the predicted red, restored, and issued UNCONDITIONAL APPROVE.
49
+
50
+ Three substantive reviewer approves (Grok, Mistral, Codex) from
51
+ independent vendor families satisfy the multi-LLM gate; Gemini stall
52
+ documented.
53
+
54
+ Test count: 816 → 837 (21 new across one file:
55
+ `src/__tests__/test-veracity-regressions-slice-eta.test.ts`).
56
+
57
+ ### Known caveats
58
+
59
+ - `npm run check` still excludes `format:check` (gap first flagged in
60
+ v1.8.0). Run both locally before pushing.
61
+ - Claude `--fallback-model` and `--json-schema` are CLI-side gated to
62
+ `--print` mode by Claude itself; both gateway tools always pass `-p`,
63
+ so this is invisible to callers but worth noting if the upstream CLI
64
+ flag semantics change.
65
+
5
66
  ## [1.10.0] - 2026-05-27 — Phase 4 slice ε (Gemini `-o stream-json` enum widening)
6
67
 
7
68
  Ships the fifth Phase 4 slice: Gemini's NDJSON event-stream output format
package/dist/index.d.ts CHANGED
@@ -155,6 +155,8 @@ export declare function prepareClaudeRequest(params: {
155
155
  maxTurns?: number;
156
156
  effort?: ClaudeEffortLevel;
157
157
  excludeDynamicSystemPromptSections?: boolean;
158
+ fallbackModel?: string;
159
+ jsonSchema?: string | Record<string, unknown>;
158
160
  }, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
159
161
  export interface CodexRequestPrep extends CliRequestPrep {
160
162
  /**
package/dist/index.js CHANGED
@@ -1005,6 +1005,8 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1005
1005
  maxTurns: params.maxTurns,
1006
1006
  effort: params.effort,
1007
1007
  excludeDynamicSystemPromptSections: params.excludeDynamicSystemPromptSections,
1008
+ fallbackModel: params.fallbackModel,
1009
+ jsonSchema: params.jsonSchema,
1008
1010
  }));
1009
1011
  return {
1010
1012
  corrId,
@@ -2481,6 +2483,16 @@ export function createGatewayServer(deps = {}) {
2481
2483
  .boolean()
2482
2484
  .optional()
2483
2485
  .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
2486
+ // Phase 4 slice η — Claude reliability + structured-output parity
2487
+ fallbackModel: z
2488
+ .string()
2489
+ .min(1)
2490
+ .optional()
2491
+ .describe("Claude --fallback-model: model name to auto-fallback to when the default model is overloaded (effective only with --print, which the gateway always uses)."),
2492
+ jsonSchema: z
2493
+ .union([z.string(), z.record(z.unknown())])
2494
+ .optional()
2495
+ .describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
2484
2496
  approvalStrategy: z
2485
2497
  .enum(["legacy", "mcp_managed"])
2486
2498
  .default("legacy")
@@ -2511,7 +2523,7 @@ export function createGatewayServer(deps = {}) {
2511
2523
  .boolean()
2512
2524
  .default(false)
2513
2525
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2514
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2526
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2515
2527
  const startTime = Date.now();
2516
2528
  if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
2517
2529
  return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
@@ -2541,6 +2553,8 @@ export function createGatewayServer(deps = {}) {
2541
2553
  maxTurns,
2542
2554
  effort,
2543
2555
  excludeDynamicSystemPromptSections,
2556
+ fallbackModel,
2557
+ jsonSchema,
2544
2558
  }, runtime);
2545
2559
  if (!("args" in prep))
2546
2560
  return prep;
@@ -3408,6 +3422,16 @@ export function createGatewayServer(deps = {}) {
3408
3422
  .boolean()
3409
3423
  .optional()
3410
3424
  .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
3425
+ // Phase 4 slice η — Claude reliability + structured-output parity
3426
+ fallbackModel: z
3427
+ .string()
3428
+ .min(1)
3429
+ .optional()
3430
+ .describe("Claude --fallback-model: model name to auto-fallback to when the default model is overloaded (effective only with --print, which the gateway always uses)."),
3431
+ jsonSchema: z
3432
+ .union([z.string(), z.record(z.unknown())])
3433
+ .optional()
3434
+ .describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
3411
3435
  approvalStrategy: z
3412
3436
  .enum(["legacy", "mcp_managed"])
3413
3437
  .default("legacy")
@@ -3437,7 +3461,7 @@ export function createGatewayServer(deps = {}) {
3437
3461
  .boolean()
3438
3462
  .default(false)
3439
3463
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3440
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3464
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3441
3465
  if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
3442
3466
  return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
3443
3467
  }
@@ -3466,6 +3490,8 @@ export function createGatewayServer(deps = {}) {
3466
3490
  maxTurns,
3467
3491
  effort,
3468
3492
  excludeDynamicSystemPromptSections,
3493
+ fallbackModel,
3494
+ jsonSchema,
3469
3495
  }, runtime);
3470
3496
  if (!("args" in prep))
3471
3497
  return prep;
@@ -350,6 +350,20 @@ export interface ClaudeHighImpactFlagsInput {
350
350
  maxTurns?: number;
351
351
  effort?: ClaudeEffortLevel;
352
352
  excludeDynamicSystemPromptSections?: boolean;
353
+ /**
354
+ * Phase 4 slice η — Claude `--fallback-model <model>`. Routes overloaded-model
355
+ * requests to the named fallback. Only effective with `--print` (we always pass
356
+ * `-p`, so no extra gating required here).
357
+ */
358
+ fallbackModel?: string;
359
+ /**
360
+ * Phase 4 slice η — Claude `--json-schema <schema>`. Per `claude --help`, the
361
+ * argument is the JSON Schema *literal*, not a path. Object values are
362
+ * `JSON.stringify`-d; string values are passed verbatim (caller already wrote
363
+ * a JSON literal). No temp file lifecycle needed (contrast with Codex
364
+ * `--output-schema`, which takes a path).
365
+ */
366
+ jsonSchema?: string | Record<string, unknown>;
353
367
  }
354
368
  /**
355
369
  * Emit Claude high-impact feature flags (U25) as a flat argv segment.
@@ -438,6 +438,13 @@ export function prepareClaudeHighImpactFlags(input) {
438
438
  if (input.excludeDynamicSystemPromptSections) {
439
439
  args.push("--exclude-dynamic-system-prompt-sections");
440
440
  }
441
+ if (input.fallbackModel !== undefined) {
442
+ args.push("--fallback-model", input.fallbackModel);
443
+ }
444
+ if (input.jsonSchema !== undefined) {
445
+ const schemaArg = typeof input.jsonSchema === "string" ? input.jsonSchema : JSON.stringify(input.jsonSchema);
446
+ args.push("--json-schema", schemaArg);
447
+ }
441
448
  return args;
442
449
  }
443
450
  //──────────────────────────────────────────────────────────────────────────────
@@ -37,6 +37,8 @@ export const UPSTREAM_CLI_CONTRACTS = {
37
37
  "maxTurns",
38
38
  "effort",
39
39
  "excludeDynamicSystemPromptSections",
40
+ "fallbackModel",
41
+ "jsonSchema",
40
42
  "approvalStrategy",
41
43
  "mcpServers",
42
44
  "strictMcpConfig",
@@ -78,6 +80,14 @@ export const UPSTREAM_CLI_CONTRACTS = {
78
80
  arity: "none",
79
81
  description: "Trim dynamic system prompt sections",
80
82
  },
83
+ "--fallback-model": {
84
+ arity: "one",
85
+ description: "Auto-fallback model when default is overloaded (Claude --print only)",
86
+ },
87
+ "--json-schema": {
88
+ arity: "one",
89
+ description: "JSON Schema literal constraining structured output",
90
+ },
81
91
  "--continue": { arity: "none", description: "Continue active session" },
82
92
  "--session-id": { arity: "one", description: "Session id" },
83
93
  },
@@ -95,6 +105,29 @@ export const UPSTREAM_CLI_CONTRACTS = {
95
105
  args: ["-p", "hello", "--not-a-claude-flag"],
96
106
  expect: "fail",
97
107
  },
108
+ {
109
+ // Phase 4 slice η: --fallback-model wired through prepareClaudeRequest.
110
+ id: "claude-fallback-model",
111
+ description: "Phase 4 slice η: --fallback-model accepted",
112
+ args: ["-p", "hello", "--fallback-model", "claude-haiku-4-5-20251001"],
113
+ expect: "pass",
114
+ },
115
+ {
116
+ // Phase 4 slice η: --json-schema accepts an inline JSON Schema literal
117
+ // (per `claude --help` example), not a path. Codex parity for
118
+ // structured-output validation in one slice.
119
+ id: "claude-json-schema",
120
+ description: "Phase 4 slice η: --json-schema accepts inline JSON literal",
121
+ args: [
122
+ "-p",
123
+ "hello",
124
+ "--output-format",
125
+ "json",
126
+ "--json-schema",
127
+ '{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}',
128
+ ],
129
+ expect: "pass",
130
+ },
98
131
  ],
99
132
  },
100
133
  codex: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-cli-gateway",
3
- "version": "1.10.0",
3
+ "version": "1.11.0",
4
4
  "mcpName": "io.github.verivus-oss/llm-cli-gateway",
5
5
  "description": "MCP server providing unified access to Claude Code, Codex, Gemini, Grok, and Mistral Vibe CLIs with session management, retry logic, async job orchestration, durable job results, and cross-LLM validation.",
6
6
  "license": "MIT",