llm-cli-gateway 1.10.0 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +61 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +28 -2
- package/dist/request-helpers.d.ts +14 -0
- package/dist/request-helpers.js +7 -0
- package/dist/upstream-contracts.js +33 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,67 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to the llm-cli-gateway project.
|
|
4
4
|
|
|
5
|
+
## [1.11.0] - 2026-05-27 — Phase 4 slice η (Claude `--fallback-model` + `--json-schema`)
|
|
6
|
+
|
|
7
|
+
Ships the sixth Phase 4 slice: Claude's reliability fallback and
|
|
8
|
+
structured-output JSON-Schema constraint flags are now reachable from
|
|
9
|
+
`claude_request` and `claude_request_async`. Three commits land together
|
|
10
|
+
(feature wiring, contract registration, test-veracity regressions) plus
|
|
11
|
+
this release commit.
|
|
12
|
+
|
|
13
|
+
### Added — `--fallback-model` and `--json-schema` for Claude
|
|
14
|
+
|
|
15
|
+
- `claude_request` and `claude_request_async` accept a new `fallbackModel`
|
|
16
|
+
field (non-empty string, validated via `z.string().min(1)`). Threaded
|
|
17
|
+
through `prepareClaudeRequest` → `prepareClaudeHighImpactFlags`
|
|
18
|
+
(`src/request-helpers.ts:651`) → `--fallback-model <model>` argv pair.
|
|
19
|
+
Effective only with Claude `--print`; the gateway always passes `-p`,
|
|
20
|
+
so no extra gating required.
|
|
21
|
+
- Both tools accept a new `jsonSchema` field
|
|
22
|
+
(`string | Record<string, unknown>`). Per `claude --help`, the CLI
|
|
23
|
+
argument is the JSON Schema *literal* (not a path; contrast with Codex
|
|
24
|
+
`--output-schema`). Object values are `JSON.stringify`-d; string values
|
|
25
|
+
pass verbatim. Use with `outputFormat: "json"` for structured output
|
|
26
|
+
validation. Achieves Codex parity for structured-output validation
|
|
27
|
+
in a single slice.
|
|
28
|
+
- `UPSTREAM_CLI_CONTRACTS.claude.flags` registers `--fallback-model` and
|
|
29
|
+
`--json-schema` with `arity: "one"`. `mcpParameters` includes both new
|
|
30
|
+
field names. Two new passing conformance fixtures
|
|
31
|
+
(`claude-fallback-model`, `claude-json-schema`) pin the contract; both
|
|
32
|
+
are mechanically validated against `validateUpstreamCliArgs` in the
|
|
33
|
+
REGRESSIONS Hε suite.
|
|
34
|
+
|
|
35
|
+
### Test-veracity audit
|
|
36
|
+
|
|
37
|
+
Per the standing protocol (`feedback_test_veracity_audit_protocol`),
|
|
38
|
+
this slice's tests were audited by Codex + Gemini + Grok + Mistral in
|
|
39
|
+
async parallel with mandatory mutation-probe execution. Spec at
|
|
40
|
+
`docs/plans/test-veracity-audit-slice-eta.spec.md`. Round 1 outcomes:
|
|
41
|
+
Grok + Mistral unanimous UNCONDITIONAL APPROVE; Gemini stalled at 682B
|
|
42
|
+
stderr for 15+ minutes (cancelled, documented quota/stall-class
|
|
43
|
+
blocker); Codex initially REJECTED on P-Hβ-4 with an invalid claim
|
|
44
|
+
("removing sync `jsonSchema` left the test green") — pre-verification
|
|
45
|
+
on a clean tree confirmed the mutation does turn `Hα-4` + `Hα-6` RED as
|
|
46
|
+
the spec predicts. Round-2 pushback with the verbatim vitest output:
|
|
47
|
+
Codex self-corrected, reproduced the mutation in a worktree, observed
|
|
48
|
+
the predicted red, restored, and issued UNCONDITIONAL APPROVE.
|
|
49
|
+
|
|
50
|
+
Three substantive reviewer approves (Grok, Mistral, Codex) from
|
|
51
|
+
independent vendor families satisfy the multi-LLM gate; Gemini stall
|
|
52
|
+
documented.
|
|
53
|
+
|
|
54
|
+
Test count: 816 → 837 (21 new across one file:
|
|
55
|
+
`src/__tests__/test-veracity-regressions-slice-eta.test.ts`).
|
|
56
|
+
|
|
57
|
+
### Known caveats
|
|
58
|
+
|
|
59
|
+
- `npm run check` still excludes `format:check` (gap first flagged in
|
|
60
|
+
v1.8.0). Run both locally before pushing.
|
|
61
|
+
- Claude `--fallback-model` and `--json-schema` are CLI-side gated to
|
|
62
|
+
`--print` mode by Claude itself; both gateway tools always pass `-p`,
|
|
63
|
+
so this is invisible to callers but worth noting if the upstream CLI
|
|
64
|
+
flag semantics change.
|
|
65
|
+
|
|
5
66
|
## [1.10.0] - 2026-05-27 — Phase 4 slice ε (Gemini `-o stream-json` enum widening)
|
|
6
67
|
|
|
7
68
|
Ships the fifth Phase 4 slice: Gemini's NDJSON event-stream output format
|
package/dist/index.d.ts
CHANGED
|
@@ -155,6 +155,8 @@ export declare function prepareClaudeRequest(params: {
|
|
|
155
155
|
maxTurns?: number;
|
|
156
156
|
effort?: ClaudeEffortLevel;
|
|
157
157
|
excludeDynamicSystemPromptSections?: boolean;
|
|
158
|
+
fallbackModel?: string;
|
|
159
|
+
jsonSchema?: string | Record<string, unknown>;
|
|
158
160
|
}, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
|
|
159
161
|
export interface CodexRequestPrep extends CliRequestPrep {
|
|
160
162
|
/**
|
package/dist/index.js
CHANGED
|
@@ -1005,6 +1005,8 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1005
1005
|
maxTurns: params.maxTurns,
|
|
1006
1006
|
effort: params.effort,
|
|
1007
1007
|
excludeDynamicSystemPromptSections: params.excludeDynamicSystemPromptSections,
|
|
1008
|
+
fallbackModel: params.fallbackModel,
|
|
1009
|
+
jsonSchema: params.jsonSchema,
|
|
1008
1010
|
}));
|
|
1009
1011
|
return {
|
|
1010
1012
|
corrId,
|
|
@@ -2481,6 +2483,16 @@ export function createGatewayServer(deps = {}) {
|
|
|
2481
2483
|
.boolean()
|
|
2482
2484
|
.optional()
|
|
2483
2485
|
.describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
|
|
2486
|
+
// Phase 4 slice η — Claude reliability + structured-output parity
|
|
2487
|
+
fallbackModel: z
|
|
2488
|
+
.string()
|
|
2489
|
+
.min(1)
|
|
2490
|
+
.optional()
|
|
2491
|
+
.describe("Claude --fallback-model: model name to auto-fallback to when the default model is overloaded (effective only with --print, which the gateway always uses)."),
|
|
2492
|
+
jsonSchema: z
|
|
2493
|
+
.union([z.string(), z.record(z.unknown())])
|
|
2494
|
+
.optional()
|
|
2495
|
+
.describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
|
|
2484
2496
|
approvalStrategy: z
|
|
2485
2497
|
.enum(["legacy", "mcp_managed"])
|
|
2486
2498
|
.default("legacy")
|
|
@@ -2511,7 +2523,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
2511
2523
|
.boolean()
|
|
2512
2524
|
.default(false)
|
|
2513
2525
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
2514
|
-
}, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
2526
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
2515
2527
|
const startTime = Date.now();
|
|
2516
2528
|
if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
|
|
2517
2529
|
return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
|
|
@@ -2541,6 +2553,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
2541
2553
|
maxTurns,
|
|
2542
2554
|
effort,
|
|
2543
2555
|
excludeDynamicSystemPromptSections,
|
|
2556
|
+
fallbackModel,
|
|
2557
|
+
jsonSchema,
|
|
2544
2558
|
}, runtime);
|
|
2545
2559
|
if (!("args" in prep))
|
|
2546
2560
|
return prep;
|
|
@@ -3408,6 +3422,16 @@ export function createGatewayServer(deps = {}) {
|
|
|
3408
3422
|
.boolean()
|
|
3409
3423
|
.optional()
|
|
3410
3424
|
.describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
|
|
3425
|
+
// Phase 4 slice η — Claude reliability + structured-output parity
|
|
3426
|
+
fallbackModel: z
|
|
3427
|
+
.string()
|
|
3428
|
+
.min(1)
|
|
3429
|
+
.optional()
|
|
3430
|
+
.describe("Claude --fallback-model: model name to auto-fallback to when the default model is overloaded (effective only with --print, which the gateway always uses)."),
|
|
3431
|
+
jsonSchema: z
|
|
3432
|
+
.union([z.string(), z.record(z.unknown())])
|
|
3433
|
+
.optional()
|
|
3434
|
+
.describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
|
|
3411
3435
|
approvalStrategy: z
|
|
3412
3436
|
.enum(["legacy", "mcp_managed"])
|
|
3413
3437
|
.default("legacy")
|
|
@@ -3437,7 +3461,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3437
3461
|
.boolean()
|
|
3438
3462
|
.default(false)
|
|
3439
3463
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3440
|
-
}, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3464
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3441
3465
|
if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
|
|
3442
3466
|
return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
|
|
3443
3467
|
}
|
|
@@ -3466,6 +3490,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
3466
3490
|
maxTurns,
|
|
3467
3491
|
effort,
|
|
3468
3492
|
excludeDynamicSystemPromptSections,
|
|
3493
|
+
fallbackModel,
|
|
3494
|
+
jsonSchema,
|
|
3469
3495
|
}, runtime);
|
|
3470
3496
|
if (!("args" in prep))
|
|
3471
3497
|
return prep;
|
|
@@ -350,6 +350,20 @@ export interface ClaudeHighImpactFlagsInput {
|
|
|
350
350
|
maxTurns?: number;
|
|
351
351
|
effort?: ClaudeEffortLevel;
|
|
352
352
|
excludeDynamicSystemPromptSections?: boolean;
|
|
353
|
+
/**
|
|
354
|
+
* Phase 4 slice η — Claude `--fallback-model <model>`. Routes overloaded-model
|
|
355
|
+
* requests to the named fallback. Only effective with `--print` (we always pass
|
|
356
|
+
* `-p`, so no extra gating required here).
|
|
357
|
+
*/
|
|
358
|
+
fallbackModel?: string;
|
|
359
|
+
/**
|
|
360
|
+
* Phase 4 slice η — Claude `--json-schema <schema>`. Per `claude --help`, the
|
|
361
|
+
* argument is the JSON Schema *literal*, not a path. Object values are
|
|
362
|
+
* `JSON.stringify`-d; string values are passed verbatim (caller already wrote
|
|
363
|
+
* a JSON literal). No temp file lifecycle needed (contrast with Codex
|
|
364
|
+
* `--output-schema`, which takes a path).
|
|
365
|
+
*/
|
|
366
|
+
jsonSchema?: string | Record<string, unknown>;
|
|
353
367
|
}
|
|
354
368
|
/**
|
|
355
369
|
* Emit Claude high-impact feature flags (U25) as a flat argv segment.
|
package/dist/request-helpers.js
CHANGED
|
@@ -438,6 +438,13 @@ export function prepareClaudeHighImpactFlags(input) {
|
|
|
438
438
|
if (input.excludeDynamicSystemPromptSections) {
|
|
439
439
|
args.push("--exclude-dynamic-system-prompt-sections");
|
|
440
440
|
}
|
|
441
|
+
if (input.fallbackModel !== undefined) {
|
|
442
|
+
args.push("--fallback-model", input.fallbackModel);
|
|
443
|
+
}
|
|
444
|
+
if (input.jsonSchema !== undefined) {
|
|
445
|
+
const schemaArg = typeof input.jsonSchema === "string" ? input.jsonSchema : JSON.stringify(input.jsonSchema);
|
|
446
|
+
args.push("--json-schema", schemaArg);
|
|
447
|
+
}
|
|
441
448
|
return args;
|
|
442
449
|
}
|
|
443
450
|
//──────────────────────────────────────────────────────────────────────────────
|
|
@@ -37,6 +37,8 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
37
37
|
"maxTurns",
|
|
38
38
|
"effort",
|
|
39
39
|
"excludeDynamicSystemPromptSections",
|
|
40
|
+
"fallbackModel",
|
|
41
|
+
"jsonSchema",
|
|
40
42
|
"approvalStrategy",
|
|
41
43
|
"mcpServers",
|
|
42
44
|
"strictMcpConfig",
|
|
@@ -78,6 +80,14 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
78
80
|
arity: "none",
|
|
79
81
|
description: "Trim dynamic system prompt sections",
|
|
80
82
|
},
|
|
83
|
+
"--fallback-model": {
|
|
84
|
+
arity: "one",
|
|
85
|
+
description: "Auto-fallback model when default is overloaded (Claude --print only)",
|
|
86
|
+
},
|
|
87
|
+
"--json-schema": {
|
|
88
|
+
arity: "one",
|
|
89
|
+
description: "JSON Schema literal constraining structured output",
|
|
90
|
+
},
|
|
81
91
|
"--continue": { arity: "none", description: "Continue active session" },
|
|
82
92
|
"--session-id": { arity: "one", description: "Session id" },
|
|
83
93
|
},
|
|
@@ -95,6 +105,29 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
95
105
|
args: ["-p", "hello", "--not-a-claude-flag"],
|
|
96
106
|
expect: "fail",
|
|
97
107
|
},
|
|
108
|
+
{
|
|
109
|
+
// Phase 4 slice η: --fallback-model wired through prepareClaudeRequest.
|
|
110
|
+
id: "claude-fallback-model",
|
|
111
|
+
description: "Phase 4 slice η: --fallback-model accepted",
|
|
112
|
+
args: ["-p", "hello", "--fallback-model", "claude-haiku-4-5-20251001"],
|
|
113
|
+
expect: "pass",
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
// Phase 4 slice η: --json-schema accepts an inline JSON Schema literal
|
|
117
|
+
// (per `claude --help` example), not a path. Codex parity for
|
|
118
|
+
// structured-output validation in one slice.
|
|
119
|
+
id: "claude-json-schema",
|
|
120
|
+
description: "Phase 4 slice η: --json-schema accepts inline JSON literal",
|
|
121
|
+
args: [
|
|
122
|
+
"-p",
|
|
123
|
+
"hello",
|
|
124
|
+
"--output-format",
|
|
125
|
+
"json",
|
|
126
|
+
"--json-schema",
|
|
127
|
+
'{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}',
|
|
128
|
+
],
|
|
129
|
+
expect: "pass",
|
|
130
|
+
},
|
|
98
131
|
],
|
|
99
132
|
},
|
|
100
133
|
codex: {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "llm-cli-gateway",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.11.0",
|
|
4
4
|
"mcpName": "io.github.verivus-oss/llm-cli-gateway",
|
|
5
5
|
"description": "MCP server providing unified access to Claude Code, Codex, Gemini, Grok, and Mistral Vibe CLIs with session management, retry logic, async job orchestration, durable job results, and cross-LLM validation.",
|
|
6
6
|
"license": "MIT",
|