npm - llm-cli-gateway - Versions diffs - 1.10.0 → 1.11.0 - Mend

llm-cli-gateway 1.10.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/CHANGELOG.md +61 -0
package/dist/index.d.ts +2 -0
package/dist/index.js +28 -2
package/dist/request-helpers.d.ts +14 -0
package/dist/request-helpers.js +7 -0
package/dist/upstream-contracts.js +33 -0
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,67 @@
 All notable changes to the llm-cli-gateway project.
+## [1.11.0] - 2026-05-27 — Phase 4 slice η (Claude `--fallback-model` + `--json-schema`)
+Ships the sixth Phase 4 slice: Claude's reliability fallback and
+structured-output JSON-Schema constraint flags are now reachable from
+`claude_request` and `claude_request_async`. Three commits land together
+(feature wiring, contract registration, test-veracity regressions) plus
+this release commit.
+### Added — `--fallback-model` and `--json-schema` for Claude
+- `claude_request` and `claude_request_async` accept a new `fallbackModel`
+  field (non-empty string, validated via `z.string().min(1)`). Threaded
+  through `prepareClaudeRequest` → `prepareClaudeHighImpactFlags`
+  (`src/request-helpers.ts:651`) → `--fallback-model <model>` argv pair.
+  Effective only with Claude `--print`; the gateway always passes `-p`,
+  so no extra gating required.
+- Both tools accept a new `jsonSchema` field
+  (`string | Record<string, unknown>`). Per `claude --help`, the CLI
+  argument is the JSON Schema *literal* (not a path; contrast with Codex
+  `--output-schema`). Object values are `JSON.stringify`-d; string values
+  pass verbatim. Use with `outputFormat: "json"` for structured output
+  validation. Achieves Codex parity for structured-output validation
+  in a single slice.
+- `UPSTREAM_CLI_CONTRACTS.claude.flags` registers `--fallback-model` and
+  `--json-schema` with `arity: "one"`. `mcpParameters` includes both new
+  field names. Two new passing conformance fixtures
+  (`claude-fallback-model`, `claude-json-schema`) pin the contract; both
+  are mechanically validated against `validateUpstreamCliArgs` in the
+  REGRESSIONS Hε suite.
+### Test-veracity audit
+Per the standing protocol (`feedback_test_veracity_audit_protocol`),
+this slice's tests were audited by Codex + Gemini + Grok + Mistral in
+async parallel with mandatory mutation-probe execution. Spec at
+`docs/plans/test-veracity-audit-slice-eta.spec.md`. Round 1 outcomes:
+Grok + Mistral unanimous UNCONDITIONAL APPROVE; Gemini stalled at 682B
+stderr for 15+ minutes (cancelled, documented quota/stall-class
+blocker); Codex initially REJECTED on P-Hβ-4 with an invalid claim
+("removing sync `jsonSchema` left the test green") — pre-verification
+on a clean tree confirmed the mutation does turn `Hα-4` + `Hα-6` RED as
+the spec predicts. Round-2 pushback with the verbatim vitest output:
+Codex self-corrected, reproduced the mutation in a worktree, observed
+the predicted red, restored, and issued UNCONDITIONAL APPROVE.
+Three substantive reviewer approves (Grok, Mistral, Codex) from
+independent vendor families satisfy the multi-LLM gate; Gemini stall
+documented.
+Test count: 816 → 837 (21 new across one file:
+`src/__tests__/test-veracity-regressions-slice-eta.test.ts`).
+### Known caveats
+- `npm run check` still excludes `format:check` (gap first flagged in
+  v1.8.0). Run both locally before pushing.
+- Claude `--fallback-model` and `--json-schema` are CLI-side gated to
+  `--print` mode by Claude itself; both gateway tools always pass `-p`,
+  so this is invisible to callers but worth noting if the upstream CLI
+  flag semantics change.
 ## [1.10.0] - 2026-05-27 — Phase 4 slice ε (Gemini `-o stream-json` enum widening)
 Ships the fifth Phase 4 slice: Gemini's NDJSON event-stream output format

package/dist/index.d.ts CHANGED Viewed

@@ -155,6 +155,8 @@ export declare function prepareClaudeRequest(params: {
     maxTurns?: number;
     effort?: ClaudeEffortLevel;
     excludeDynamicSystemPromptSections?: boolean;
+    fallbackModel?: string;
+    jsonSchema?: string | Record<string, unknown>;
 }, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
 export interface CodexRequestPrep extends CliRequestPrep {
     /**

package/dist/index.js CHANGED Viewed

@@ -1005,6 +1005,8 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
         maxTurns: params.maxTurns,
         effort: params.effort,
         excludeDynamicSystemPromptSections: params.excludeDynamicSystemPromptSections,
+        fallbackModel: params.fallbackModel,
+        jsonSchema: params.jsonSchema,
     }));
     return {
         corrId,
@@ -2481,6 +2483,16 @@ export function createGatewayServer(deps = {}) {
             .boolean()
             .optional()
             .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
+        // Phase 4 slice η — Claude reliability + structured-output parity
+        fallbackModel: z
+            .string()
+            .min(1)
+            .optional()
+            .describe("Claude --fallback-model: model name to auto-fallback to when the default model is overloaded (effective only with --print, which the gateway always uses)."),
+        jsonSchema: z
+            .union([z.string(), z.record(z.unknown())])
+            .optional()
+            .describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
         approvalStrategy: z
             .enum(["legacy", "mcp_managed"])
             .default("legacy")
@@ -2511,7 +2523,7 @@ export function createGatewayServer(deps = {}) {
             .boolean()
             .default(false)
             .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-    }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
+    }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
         const startTime = Date.now();
         if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
             return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
@@ -2541,6 +2553,8 @@ export function createGatewayServer(deps = {}) {
             maxTurns,
             effort,
             excludeDynamicSystemPromptSections,
+            fallbackModel,
+            jsonSchema,
         }, runtime);
         if (!("args" in prep))
             return prep;
@@ -3408,6 +3422,16 @@ export function createGatewayServer(deps = {}) {
                 .boolean()
                 .optional()
                 .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
+            // Phase 4 slice η — Claude reliability + structured-output parity
+            fallbackModel: z
+                .string()
+                .min(1)
+                .optional()
+                .describe("Claude --fallback-model: model name to auto-fallback to when the default model is overloaded (effective only with --print, which the gateway always uses)."),
+            jsonSchema: z
+                .union([z.string(), z.record(z.unknown())])
+                .optional()
+                .describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
             approvalStrategy: z
                 .enum(["legacy", "mcp_managed"])
                 .default("legacy")
@@ -3437,7 +3461,7 @@ export function createGatewayServer(deps = {}) {
                 .boolean()
                 .default(false)
                 .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-        }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
+        }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
             if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
                 return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
             }
@@ -3466,6 +3490,8 @@ export function createGatewayServer(deps = {}) {
                 maxTurns,
                 effort,
                 excludeDynamicSystemPromptSections,
+                fallbackModel,
+                jsonSchema,
             }, runtime);
             if (!("args" in prep))
                 return prep;

package/dist/request-helpers.d.ts CHANGED Viewed

@@ -350,6 +350,20 @@ export interface ClaudeHighImpactFlagsInput {
     maxTurns?: number;
     effort?: ClaudeEffortLevel;
     excludeDynamicSystemPromptSections?: boolean;
+    /**
+     * Phase 4 slice η — Claude `--fallback-model <model>`. Routes overloaded-model
+     * requests to the named fallback. Only effective with `--print` (we always pass
+     * `-p`, so no extra gating required here).
+     */
+    fallbackModel?: string;
+    /**
+     * Phase 4 slice η — Claude `--json-schema <schema>`. Per `claude --help`, the
+     * argument is the JSON Schema *literal*, not a path. Object values are
+     * `JSON.stringify`-d; string values are passed verbatim (caller already wrote
+     * a JSON literal). No temp file lifecycle needed (contrast with Codex
+     * `--output-schema`, which takes a path).
+     */
+    jsonSchema?: string | Record<string, unknown>;
 }
 /**
  * Emit Claude high-impact feature flags (U25) as a flat argv segment.

package/dist/request-helpers.js CHANGED Viewed

@@ -438,6 +438,13 @@ export function prepareClaudeHighImpactFlags(input) {
     if (input.excludeDynamicSystemPromptSections) {
         args.push("--exclude-dynamic-system-prompt-sections");
     }
+    if (input.fallbackModel !== undefined) {
+        args.push("--fallback-model", input.fallbackModel);
+    }
+    if (input.jsonSchema !== undefined) {
+        const schemaArg = typeof input.jsonSchema === "string" ? input.jsonSchema : JSON.stringify(input.jsonSchema);
+        args.push("--json-schema", schemaArg);
+    }
     return args;
 }
 //──────────────────────────────────────────────────────────────────────────────

package/dist/upstream-contracts.js CHANGED Viewed

@@ -37,6 +37,8 @@ export const UPSTREAM_CLI_CONTRACTS = {
             "maxTurns",
             "effort",
             "excludeDynamicSystemPromptSections",
+            "fallbackModel",
+            "jsonSchema",
             "approvalStrategy",
             "mcpServers",
             "strictMcpConfig",
@@ -78,6 +80,14 @@ export const UPSTREAM_CLI_CONTRACTS = {
                 arity: "none",
                 description: "Trim dynamic system prompt sections",
             },
+            "--fallback-model": {
+                arity: "one",
+                description: "Auto-fallback model when default is overloaded (Claude --print only)",
+            },
+            "--json-schema": {
+                arity: "one",
+                description: "JSON Schema literal constraining structured output",
+            },
             "--continue": { arity: "none", description: "Continue active session" },
             "--session-id": { arity: "one", description: "Session id" },
         },
@@ -95,6 +105,29 @@ export const UPSTREAM_CLI_CONTRACTS = {
                 args: ["-p", "hello", "--not-a-claude-flag"],
                 expect: "fail",
             },
+            {
+                // Phase 4 slice η: --fallback-model wired through prepareClaudeRequest.
+                id: "claude-fallback-model",
+                description: "Phase 4 slice η: --fallback-model accepted",
+                args: ["-p", "hello", "--fallback-model", "claude-haiku-4-5-20251001"],
+                expect: "pass",
+            },
+            {
+                // Phase 4 slice η: --json-schema accepts an inline JSON Schema literal
+                // (per `claude --help` example), not a path. Codex parity for
+                // structured-output validation in one slice.
+                id: "claude-json-schema",
+                description: "Phase 4 slice η: --json-schema accepts inline JSON literal",
+                args: [
+                    "-p",
+                    "hello",
+                    "--output-format",
+                    "json",
+                    "--json-schema",
+                    '{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}',
+                ],
+                expect: "pass",
+            },
         ],
     },
     codex: {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "llm-cli-gateway",
-  "version": "1.10.0",
+  "version": "1.11.0",
   "mcpName": "io.github.verivus-oss/llm-cli-gateway",
   "description": "MCP server providing unified access to Claude Code, Codex, Gemini, Grok, and Mistral Vibe CLIs with session management, retry logic, async job orchestration, durable job results, and cross-LLM validation.",
   "license": "MIT",