llm-cli-gateway 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,56 @@
2
2
 
3
3
  All notable changes to the llm-cli-gateway project.
4
4
 
5
+ ## [1.10.0] - 2026-05-27 — Phase 4 slice ε (Gemini `-o stream-json` enum widening)
6
+
7
+ Ships the fifth Phase 4 slice: Gemini's NDJSON event-stream output format
8
+ (`-o stream-json`) is now reachable from `gemini_request` and
9
+ `gemini_request_async`. Four commits land together: the feature wiring, a
10
+ contract-table widening, a test-veracity regression suite, and a follow-up
11
+ test fix driven by the multi-LLM round-1 audit.
12
+
13
+ ### Added — `outputFormat: "stream-json"` for Gemini
14
+
15
+ - `gemini_request` and `gemini_request_async` `outputFormat` enums widened
16
+ from `text | json` to `text | json | stream-json`.
17
+ - `prepareGeminiRequest` emits `-o stream-json` when the new value is set.
18
+ No `--include-partial-messages` analogue is required: Gemini already
19
+ streams stdout in real time across all output modes (covered by
20
+ `CLI_IDLE_TIMEOUTS.gemini = 600_000`).
21
+ - New `parseGeminiStreamJson` parser consumes the NDJSON event stream
22
+ (`init` / `message` / `result` lines), concatenates assistant `delta`
23
+ messages into the response, and extracts
24
+ `input_tokens` / `output_tokens` / `cached` → `cache_read_tokens` from
25
+ the terminal `result.stats` event.
26
+ - `extractUsageAndCost("gemini", _, "stream-json")` routes to the new
27
+ parser so usage tokens reach the flight recorder on the stream-json
28
+ path, matching the existing `-o json` behaviour.
29
+ - `UPSTREAM_CLI_CONTRACTS.gemini.flags["-o"].values` widened to
30
+ `["json", "stream-json"]`; two new conformance fixtures
31
+ (`gemini-stream-json` passing, `gemini-output-format-invalid` failing
32
+ for `-o ndjson`) pin the enum bound.
33
+
34
+ ### Test-veracity audit
35
+
36
+ Per the standing protocol established with v1.9.0
37
+ (`feedback_test_veracity_audit_protocol`), this slice's tests were
38
+ audited by Codex + Gemini + Grok + Mistral in async parallel with
39
+ mandatory mutation-probe execution. Round 1 found one real gap
40
+ (`Eε-4` only checked fixture presence/shape — P-Eε-1 left it green);
41
+ closed in commit `4a78f9c` by running the fixture's args through
42
+ `validateUpstreamCliArgs` inside the same `it()` block. Round 2
43
+ delivered unanimous UNCONDITIONAL APPROVE across all four reviewers,
44
+ with site-by-site probe evidence for the contested `Eα` registered-schema
45
+ helper. Spec at `docs/plans/test-veracity-audit-slice-epsilon.spec.md`.
46
+
47
+ Test count: 771 → 795 → 796 (24 + 1 new across two files).
48
+
49
+ ### Known caveats
50
+
51
+ - The `npm run check` script still does not include `format:check` (a
52
+ gap first flagged in the v1.8.0 release notes). Run both locally
53
+ before pushing; CI runs format:check separately.
54
+
5
55
  ## [1.9.0] - 2026-05-27 — Phase 4 slice δ (budget/max-turns parity) + retroactive α/γ contract closure
6
56
 
7
57
  Ships the fourth Phase 4 slice (budget/max-turns parity for Grok and Mistral),
@@ -1,13 +1,22 @@
1
1
  /**
2
- * Parser for Gemini CLI `-o json` output.
2
+ * Parsers for Gemini CLI `-o json` (single object) and `-o stream-json`
3
+ * (NDJSON event stream) output.
3
4
  *
4
- * Gemini emits a single JSON object with:
5
+ * `-o json` emits a single JSON object with:
5
6
  * - `response`: string final model output
6
7
  * - `usageMetadata`: { promptTokenCount, candidatesTokenCount,
7
8
  * cachedContentTokenCount?, totalTokenCount }
8
9
  *
9
- * Returns null when stdout is not parseable as JSON. Returns an object with
10
- * only `response` when usageMetadata is missing.
10
+ * `-o stream-json` emits one JSON object per line:
11
+ * - `{ "type": "init", "session_id": "...", "model": "..." }`
12
+ * - `{ "type": "message", "role": "user", "content": "..." }`
13
+ * - `{ "type": "message", "role": "assistant", "content": "...", "delta": true }` (repeated)
14
+ * - `{ "type": "result", "status": "success", "stats": { "input_tokens": N,
15
+ * "output_tokens": N, "cached": N, ... } }`
16
+ *
17
+ * Both parsers return null when stdout is unparseable. Both populate the same
18
+ * `GeminiJsonParseResult` shape so `extractUsageAndCost` can branch on
19
+ * outputFormat without further dispatch.
11
20
  */
12
21
  export interface GeminiUsage {
13
22
  input_tokens: number;
@@ -19,3 +28,9 @@ export interface GeminiJsonParseResult {
19
28
  response?: string;
20
29
  }
21
30
  export declare function parseGeminiJson(stdout: string): GeminiJsonParseResult | null;
31
+ /**
32
+ * Parse Gemini `-o stream-json` NDJSON output. Concatenates assistant `delta`
33
+ * message content into `response`, extracts the terminal `result.stats` payload
34
+ * into `usage`. Returns null when stdout contains no parseable JSON line.
35
+ */
36
+ export declare function parseGeminiStreamJson(stdout: string): GeminiJsonParseResult | null;
@@ -1,13 +1,22 @@
1
1
  /**
2
- * Parser for Gemini CLI `-o json` output.
2
+ * Parsers for Gemini CLI `-o json` (single object) and `-o stream-json`
3
+ * (NDJSON event stream) output.
3
4
  *
4
- * Gemini emits a single JSON object with:
5
+ * `-o json` emits a single JSON object with:
5
6
  * - `response`: string final model output
6
7
  * - `usageMetadata`: { promptTokenCount, candidatesTokenCount,
7
8
  * cachedContentTokenCount?, totalTokenCount }
8
9
  *
9
- * Returns null when stdout is not parseable as JSON. Returns an object with
10
- * only `response` when usageMetadata is missing.
10
+ * `-o stream-json` emits one JSON object per line:
11
+ * - `{ "type": "init", "session_id": "...", "model": "..." }`
12
+ * - `{ "type": "message", "role": "user", "content": "..." }`
13
+ * - `{ "type": "message", "role": "assistant", "content": "...", "delta": true }` (repeated)
14
+ * - `{ "type": "result", "status": "success", "stats": { "input_tokens": N,
15
+ * "output_tokens": N, "cached": N, ... } }`
16
+ *
17
+ * Both parsers return null when stdout is unparseable. Both populate the same
18
+ * `GeminiJsonParseResult` shape so `extractUsageAndCost` can branch on
19
+ * outputFormat without further dispatch.
11
20
  */
12
21
  export function parseGeminiJson(stdout) {
13
22
  const trimmed = stdout.trim();
@@ -45,3 +54,63 @@ export function parseGeminiJson(stdout) {
45
54
  }
46
55
  return result;
47
56
  }
57
+ /**
58
+ * Parse Gemini `-o stream-json` NDJSON output. Concatenates assistant `delta`
59
+ * message content into `response`, extracts the terminal `result.stats` payload
60
+ * into `usage`. Returns null when stdout contains no parseable JSON line.
61
+ */
62
+ export function parseGeminiStreamJson(stdout) {
63
+ if (!stdout) {
64
+ return null;
65
+ }
66
+ const lines = stdout.split(/\r?\n/);
67
+ const result = {};
68
+ const assistantChunks = [];
69
+ let sawAnyLine = false;
70
+ for (const line of lines) {
71
+ const trimmed = line.trim();
72
+ if (!trimmed)
73
+ continue;
74
+ // Gemini stream-json lines are individual JSON objects; non-JSON
75
+ // chatter (warnings, "Ripgrep not available", etc.) is silently
76
+ // ignored so a stray banner line doesn't poison usage extraction.
77
+ let event;
78
+ try {
79
+ event = JSON.parse(trimmed);
80
+ }
81
+ catch {
82
+ continue;
83
+ }
84
+ if (!event || typeof event !== "object")
85
+ continue;
86
+ sawAnyLine = true;
87
+ if (event.type === "message" &&
88
+ event.role === "assistant" &&
89
+ typeof event.content === "string") {
90
+ assistantChunks.push(event.content);
91
+ continue;
92
+ }
93
+ if (event.type === "result" && event.stats && typeof event.stats === "object") {
94
+ const stats = event.stats;
95
+ const input = typeof stats.input_tokens === "number" ? stats.input_tokens : undefined;
96
+ const output = typeof stats.output_tokens === "number" ? stats.output_tokens : undefined;
97
+ if (input !== undefined || output !== undefined) {
98
+ const usage = {
99
+ input_tokens: input ?? 0,
100
+ output_tokens: output ?? 0,
101
+ };
102
+ if (typeof stats.cached === "number") {
103
+ usage.cache_read_tokens = stats.cached;
104
+ }
105
+ result.usage = usage;
106
+ }
107
+ }
108
+ }
109
+ if (!sawAnyLine) {
110
+ return null;
111
+ }
112
+ if (assistantChunks.length > 0) {
113
+ result.response = assistantChunks.join("");
114
+ }
115
+ return result;
116
+ }
package/dist/index.d.ts CHANGED
@@ -212,11 +212,13 @@ export declare function prepareGeminiRequest(params: {
212
212
  optimizePrompt: boolean;
213
213
  operation: string;
214
214
  /**
215
- * U23: output format. When set to "json", emits `-o json` so Gemini emits
216
- * the JSON object containing usageMetadata that `parseGeminiJson` (and
217
- * downstream `extractUsageAndCost`) can consume. Defaults to "text".
215
+ * U23 + Phase 4 slice ε: output format. `json` emits `-o json` (single
216
+ * JSON object with usageMetadata). `stream-json` emits `-o stream-json`
217
+ * (NDJSON event stream — `init` / `message` / `result` lines). Both
218
+ * route through `extractUsageAndCost` so usage tokens reach the flight
219
+ * recorder. Defaults to "text".
218
220
  */
219
- outputFormat?: "text" | "json";
221
+ outputFormat?: "text" | "json" | "stream-json";
220
222
  sandbox?: boolean;
221
223
  policyFiles?: string[];
222
224
  adminPolicyFiles?: string[];
@@ -313,8 +315,11 @@ export interface GeminiRequestParams {
313
315
  optimizeResponse?: boolean;
314
316
  idleTimeoutMs?: number;
315
317
  forceRefresh?: boolean;
316
- /** U23: "json" emits `-o json` so token usage is parsed and reported. */
317
- outputFormat?: "text" | "json";
318
+ /**
319
+ * U23 + Phase 4 slice ε: "json" emits `-o json`; "stream-json" emits
320
+ * `-o stream-json` (NDJSON event stream). Both are usage-extracted.
321
+ */
322
+ outputFormat?: "text" | "json" | "stream-json";
318
323
  sandbox?: boolean;
319
324
  policyFiles?: string[];
320
325
  adminPolicyFiles?: string[];
package/dist/index.js CHANGED
@@ -9,7 +9,7 @@ import { z } from "zod";
9
9
  import { executeCli, killAllProcessGroups } from "./executor.js";
10
10
  import { parseStreamJson } from "./stream-json-parser.js";
11
11
  import { parseCodexJsonStream } from "./codex-json-parser.js";
12
- import { parseGeminiJson } from "./gemini-json-parser.js";
12
+ import { parseGeminiJson, parseGeminiStreamJson } from "./gemini-json-parser.js";
13
13
  import { parseVibeMetaJson } from "./mistral-meta-json-parser.js";
14
14
  import { homedir } from "os";
15
15
  import { createSessionManager } from "./session-manager.js";
@@ -530,8 +530,8 @@ ctx) {
530
530
  costUsd: parsed.usage.cost_usd,
531
531
  };
532
532
  }
533
- if (cli === "gemini" && outputFormat === "json") {
534
- const parsed = parseGeminiJson(output);
533
+ if (cli === "gemini" && (outputFormat === "json" || outputFormat === "stream-json")) {
534
+ const parsed = outputFormat === "stream-json" ? parseGeminiStreamJson(output) : parseGeminiJson(output);
535
535
  if (!parsed || !parsed.usage) {
536
536
  return {};
537
537
  }
@@ -1271,9 +1271,19 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
1271
1271
  // U23 fix: emit `-o json` when the caller asked for JSON output. The Gemini
1272
1272
  // JSON parser is otherwise unreachable from the tool surface and the
1273
1273
  // structured usageMetadata is silently dropped.
1274
+ //
1275
+ // Phase 4 slice ε: same wiring for `-o stream-json` (NDJSON event stream).
1276
+ // Gemini already streams stdout in real-time so the existing 10-minute
1277
+ // idle timeout (CLI_IDLE_TIMEOUTS.gemini) covers both modes without
1278
+ // adjustment — unlike Claude, no `--include-partial-messages` companion
1279
+ // flag is required because Gemini emits assistant `delta` events as part
1280
+ // of the default stream-json shape.
1274
1281
  if (params.outputFormat === "json") {
1275
1282
  args.push("-o", "json");
1276
1283
  }
1284
+ else if (params.outputFormat === "stream-json") {
1285
+ args.push("-o", "stream-json");
1286
+ }
1277
1287
  // Phase 4 slice γ: opt-in trust-prompt bypass for fresh workspaces.
1278
1288
  if (params.skipTrust) {
1279
1289
  args.push("--skip-trust");
@@ -3069,11 +3079,14 @@ export function createGatewayServer(deps = {}) {
3069
3079
  .default(false)
3070
3080
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3071
3081
  // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
3072
- // remains text so existing callers see no behavior change.
3082
+ // remains text so existing callers see no behavior change. Phase 4 slice
3083
+ // ε adds `stream-json` (NDJSON event stream parsed by
3084
+ // parseGeminiStreamJson — `init`/`message`/`result` lines, idle-timeout
3085
+ // semantics covered by Gemini's existing real-time stdout streaming).
3073
3086
  outputFormat: z
3074
- .enum(["text", "json"])
3087
+ .enum(["text", "json", "stream-json"])
3075
3088
  .default("text")
3076
- .describe("Gemini output format. `json` emits `-o json` so usageMetadata is parsed and reported."),
3089
+ .describe("Gemini output format. `json` emits `-o json` (single JSON with usageMetadata). `stream-json` emits `-o stream-json` (NDJSON event stream — `init`/`message`/`result` lines, usage extracted from the terminal `result.stats` event). Both report usage to the flight recorder."),
3077
3090
  sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
3078
3091
  policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
3079
3092
  adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
@@ -3691,11 +3704,14 @@ export function createGatewayServer(deps = {}) {
3691
3704
  .default(false)
3692
3705
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3693
3706
  // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
3694
- // remains text so existing callers see no behavior change.
3707
+ // remains text so existing callers see no behavior change. Phase 4 slice
3708
+ // ε adds `stream-json` (NDJSON event stream parsed by
3709
+ // parseGeminiStreamJson — `init`/`message`/`result` lines, idle-timeout
3710
+ // semantics covered by Gemini's existing real-time stdout streaming).
3695
3711
  outputFormat: z
3696
- .enum(["text", "json"])
3712
+ .enum(["text", "json", "stream-json"])
3697
3713
  .default("text")
3698
- .describe("Gemini output format. `json` emits `-o json` so usageMetadata is parsed and reported."),
3714
+ .describe("Gemini output format. `json` emits `-o json` (single JSON with usageMetadata). `stream-json` emits `-o stream-json` (NDJSON event stream — `init`/`message`/`result` lines, usage extracted from the terminal `result.stats` event). Both report usage to the flight recorder."),
3699
3715
  sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
3700
3716
  policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
3701
3717
  adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
@@ -248,7 +248,11 @@ export const UPSTREAM_CLI_CONTRACTS = {
248
248
  "-s": { arity: "none", description: "Sandbox mode" },
249
249
  "--policy": { arity: "one", description: "Policy file path" },
250
250
  "--admin-policy": { arity: "one", description: "Admin policy file path" },
251
- "-o": { arity: "one", values: ["json"], description: "Output format" },
251
+ "-o": {
252
+ arity: "one",
253
+ values: ["json", "stream-json"],
254
+ description: "Output format (Phase 4 slice ε adds stream-json)",
255
+ },
252
256
  "--resume": { arity: "one", description: "Resume session" },
253
257
  "--skip-trust": {
254
258
  arity: "none",
@@ -275,6 +279,18 @@ export const UPSTREAM_CLI_CONTRACTS = {
275
279
  args: ["-p", "hello", "--skip-trust"],
276
280
  expect: "pass",
277
281
  },
282
+ {
283
+ id: "gemini-stream-json",
284
+ description: "Phase 4 slice ε: -o stream-json is accepted",
285
+ args: ["-p", "hello", "-o", "stream-json"],
286
+ expect: "pass",
287
+ },
288
+ {
289
+ id: "gemini-output-format-invalid",
290
+ description: "Phase 4 slice ε: -o ndjson is rejected (not in contract enum)",
291
+ args: ["-p", "hello", "-o", "ndjson"],
292
+ expect: "fail",
293
+ },
278
294
  ],
279
295
  },
280
296
  grok: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-cli-gateway",
3
- "version": "1.9.0",
3
+ "version": "1.10.0",
4
4
  "mcpName": "io.github.verivus-oss/llm-cli-gateway",
5
5
  "description": "MCP server providing unified access to Claude Code, Codex, Gemini, Grok, and Mistral Vibe CLIs with session management, retry logic, async job orchestration, durable job results, and cross-LLM validation.",
6
6
  "license": "MIT",