llm-cli-gateway 1.9.0 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +50 -0
- package/dist/gemini-json-parser.d.ts +19 -4
- package/dist/gemini-json-parser.js +73 -4
- package/dist/index.d.ts +11 -6
- package/dist/index.js +25 -9
- package/dist/upstream-contracts.js +17 -1
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,56 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to the llm-cli-gateway project.
|
|
4
4
|
|
|
5
|
+
## [1.10.0] - 2026-05-27 — Phase 4 slice ε (Gemini `-o stream-json` enum widening)
|
|
6
|
+
|
|
7
|
+
Ships the fifth Phase 4 slice: Gemini's NDJSON event-stream output format
|
|
8
|
+
(`-o stream-json`) is now reachable from `gemini_request` and
|
|
9
|
+
`gemini_request_async`. Four commits land together: the feature wiring, a
|
|
10
|
+
contract-table widening, a test-veracity regression suite, and a follow-up
|
|
11
|
+
test fix driven by the multi-LLM round-1 audit.
|
|
12
|
+
|
|
13
|
+
### Added — `outputFormat: "stream-json"` for Gemini
|
|
14
|
+
|
|
15
|
+
- `gemini_request` and `gemini_request_async` `outputFormat` enums widened
|
|
16
|
+
from `text | json` to `text | json | stream-json`.
|
|
17
|
+
- `prepareGeminiRequest` emits `-o stream-json` when the new value is set.
|
|
18
|
+
No `--include-partial-messages` analogue is required: Gemini already
|
|
19
|
+
streams stdout in real time across all output modes (covered by
|
|
20
|
+
`CLI_IDLE_TIMEOUTS.gemini = 600_000`).
|
|
21
|
+
- New `parseGeminiStreamJson` parser consumes the NDJSON event stream
|
|
22
|
+
(`init` / `message` / `result` lines), concatenates assistant `delta`
|
|
23
|
+
messages into the response, and extracts
|
|
24
|
+
`input_tokens` / `output_tokens` / `cached` → `cache_read_tokens` from
|
|
25
|
+
the terminal `result.stats` event.
|
|
26
|
+
- `extractUsageAndCost("gemini", _, "stream-json")` routes to the new
|
|
27
|
+
parser so usage tokens reach the flight recorder on the stream-json
|
|
28
|
+
path, matching the existing `-o json` behaviour.
|
|
29
|
+
- `UPSTREAM_CLI_CONTRACTS.gemini.flags["-o"].values` widened to
|
|
30
|
+
`["json", "stream-json"]`; two new conformance fixtures
|
|
31
|
+
(`gemini-stream-json` passing, `gemini-output-format-invalid` failing
|
|
32
|
+
for `-o ndjson`) pin the enum bound.
|
|
33
|
+
|
|
34
|
+
### Test-veracity audit
|
|
35
|
+
|
|
36
|
+
Per the standing protocol established with v1.9.0
|
|
37
|
+
(`feedback_test_veracity_audit_protocol`), this slice's tests were
|
|
38
|
+
audited by Codex + Gemini + Grok + Mistral in async parallel with
|
|
39
|
+
mandatory mutation-probe execution. Round 1 found one real gap
|
|
40
|
+
(`Eε-4` only checked fixture presence/shape — P-Eε-1 left it green);
|
|
41
|
+
closed in commit `4a78f9c` by running the fixture's args through
|
|
42
|
+
`validateUpstreamCliArgs` inside the same `it()` block. Round 2
|
|
43
|
+
delivered unanimous UNCONDITIONAL APPROVE across all four reviewers,
|
|
44
|
+
with site-by-site probe evidence for the contested `Eα` registered-schema
|
|
45
|
+
helper. Spec at `docs/plans/test-veracity-audit-slice-epsilon.spec.md`.
|
|
46
|
+
|
|
47
|
+
Test count: 771 → 795 → 796 (24 + 1 new across two files).
|
|
48
|
+
|
|
49
|
+
### Known caveats
|
|
50
|
+
|
|
51
|
+
- The `npm run check` script still does not include `format:check` (a
|
|
52
|
+
gap first flagged in the v1.8.0 release notes). Run both locally
|
|
53
|
+
before pushing; CI runs format:check separately.
|
|
54
|
+
|
|
5
55
|
## [1.9.0] - 2026-05-27 — Phase 4 slice δ (budget/max-turns parity) + retroactive α/γ contract closure
|
|
6
56
|
|
|
7
57
|
Ships the fourth Phase 4 slice (budget/max-turns parity for Grok and Mistral),
|
|
@@ -1,13 +1,22 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Parsers for Gemini CLI `-o json` (single object) and `-o stream-json`
|
|
3
|
+
* (NDJSON event stream) output.
|
|
3
4
|
*
|
|
4
|
-
*
|
|
5
|
+
* `-o json` emits a single JSON object with:
|
|
5
6
|
* - `response`: string final model output
|
|
6
7
|
* - `usageMetadata`: { promptTokenCount, candidatesTokenCount,
|
|
7
8
|
* cachedContentTokenCount?, totalTokenCount }
|
|
8
9
|
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
10
|
+
* `-o stream-json` emits one JSON object per line:
|
|
11
|
+
* - `{ "type": "init", "session_id": "...", "model": "..." }`
|
|
12
|
+
* - `{ "type": "message", "role": "user", "content": "..." }`
|
|
13
|
+
* - `{ "type": "message", "role": "assistant", "content": "...", "delta": true }` (repeated)
|
|
14
|
+
* - `{ "type": "result", "status": "success", "stats": { "input_tokens": N,
|
|
15
|
+
* "output_tokens": N, "cached": N, ... } }`
|
|
16
|
+
*
|
|
17
|
+
* Both parsers return null when stdout is unparseable. Both populate the same
|
|
18
|
+
* `GeminiJsonParseResult` shape so `extractUsageAndCost` can branch on
|
|
19
|
+
* outputFormat without further dispatch.
|
|
11
20
|
*/
|
|
12
21
|
export interface GeminiUsage {
|
|
13
22
|
input_tokens: number;
|
|
@@ -19,3 +28,9 @@ export interface GeminiJsonParseResult {
|
|
|
19
28
|
response?: string;
|
|
20
29
|
}
|
|
21
30
|
export declare function parseGeminiJson(stdout: string): GeminiJsonParseResult | null;
|
|
31
|
+
/**
|
|
32
|
+
* Parse Gemini `-o stream-json` NDJSON output. Concatenates assistant `delta`
|
|
33
|
+
* message content into `response`, extracts the terminal `result.stats` payload
|
|
34
|
+
* into `usage`. Returns null when stdout contains no parseable JSON line.
|
|
35
|
+
*/
|
|
36
|
+
export declare function parseGeminiStreamJson(stdout: string): GeminiJsonParseResult | null;
|
|
@@ -1,13 +1,22 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Parsers for Gemini CLI `-o json` (single object) and `-o stream-json`
|
|
3
|
+
* (NDJSON event stream) output.
|
|
3
4
|
*
|
|
4
|
-
*
|
|
5
|
+
* `-o json` emits a single JSON object with:
|
|
5
6
|
* - `response`: string final model output
|
|
6
7
|
* - `usageMetadata`: { promptTokenCount, candidatesTokenCount,
|
|
7
8
|
* cachedContentTokenCount?, totalTokenCount }
|
|
8
9
|
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
10
|
+
* `-o stream-json` emits one JSON object per line:
|
|
11
|
+
* - `{ "type": "init", "session_id": "...", "model": "..." }`
|
|
12
|
+
* - `{ "type": "message", "role": "user", "content": "..." }`
|
|
13
|
+
* - `{ "type": "message", "role": "assistant", "content": "...", "delta": true }` (repeated)
|
|
14
|
+
* - `{ "type": "result", "status": "success", "stats": { "input_tokens": N,
|
|
15
|
+
* "output_tokens": N, "cached": N, ... } }`
|
|
16
|
+
*
|
|
17
|
+
* Both parsers return null when stdout is unparseable. Both populate the same
|
|
18
|
+
* `GeminiJsonParseResult` shape so `extractUsageAndCost` can branch on
|
|
19
|
+
* outputFormat without further dispatch.
|
|
11
20
|
*/
|
|
12
21
|
export function parseGeminiJson(stdout) {
|
|
13
22
|
const trimmed = stdout.trim();
|
|
@@ -45,3 +54,63 @@ export function parseGeminiJson(stdout) {
|
|
|
45
54
|
}
|
|
46
55
|
return result;
|
|
47
56
|
}
|
|
57
|
+
/**
|
|
58
|
+
* Parse Gemini `-o stream-json` NDJSON output. Concatenates assistant `delta`
|
|
59
|
+
* message content into `response`, extracts the terminal `result.stats` payload
|
|
60
|
+
* into `usage`. Returns null when stdout contains no parseable JSON line.
|
|
61
|
+
*/
|
|
62
|
+
export function parseGeminiStreamJson(stdout) {
|
|
63
|
+
if (!stdout) {
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
const lines = stdout.split(/\r?\n/);
|
|
67
|
+
const result = {};
|
|
68
|
+
const assistantChunks = [];
|
|
69
|
+
let sawAnyLine = false;
|
|
70
|
+
for (const line of lines) {
|
|
71
|
+
const trimmed = line.trim();
|
|
72
|
+
if (!trimmed)
|
|
73
|
+
continue;
|
|
74
|
+
// Gemini stream-json lines are individual JSON objects; non-JSON
|
|
75
|
+
// chatter (warnings, "Ripgrep not available", etc.) is silently
|
|
76
|
+
// ignored so a stray banner line doesn't poison usage extraction.
|
|
77
|
+
let event;
|
|
78
|
+
try {
|
|
79
|
+
event = JSON.parse(trimmed);
|
|
80
|
+
}
|
|
81
|
+
catch {
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
if (!event || typeof event !== "object")
|
|
85
|
+
continue;
|
|
86
|
+
sawAnyLine = true;
|
|
87
|
+
if (event.type === "message" &&
|
|
88
|
+
event.role === "assistant" &&
|
|
89
|
+
typeof event.content === "string") {
|
|
90
|
+
assistantChunks.push(event.content);
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
if (event.type === "result" && event.stats && typeof event.stats === "object") {
|
|
94
|
+
const stats = event.stats;
|
|
95
|
+
const input = typeof stats.input_tokens === "number" ? stats.input_tokens : undefined;
|
|
96
|
+
const output = typeof stats.output_tokens === "number" ? stats.output_tokens : undefined;
|
|
97
|
+
if (input !== undefined || output !== undefined) {
|
|
98
|
+
const usage = {
|
|
99
|
+
input_tokens: input ?? 0,
|
|
100
|
+
output_tokens: output ?? 0,
|
|
101
|
+
};
|
|
102
|
+
if (typeof stats.cached === "number") {
|
|
103
|
+
usage.cache_read_tokens = stats.cached;
|
|
104
|
+
}
|
|
105
|
+
result.usage = usage;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
if (!sawAnyLine) {
|
|
110
|
+
return null;
|
|
111
|
+
}
|
|
112
|
+
if (assistantChunks.length > 0) {
|
|
113
|
+
result.response = assistantChunks.join("");
|
|
114
|
+
}
|
|
115
|
+
return result;
|
|
116
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -212,11 +212,13 @@ export declare function prepareGeminiRequest(params: {
|
|
|
212
212
|
optimizePrompt: boolean;
|
|
213
213
|
operation: string;
|
|
214
214
|
/**
|
|
215
|
-
* U23
|
|
216
|
-
*
|
|
217
|
-
*
|
|
215
|
+
* U23 + Phase 4 slice ε: output format. `json` emits `-o json` (single
|
|
216
|
+
* JSON object with usageMetadata). `stream-json` emits `-o stream-json`
|
|
217
|
+
* (NDJSON event stream — `init` / `message` / `result` lines). Both
|
|
218
|
+
* route through `extractUsageAndCost` so usage tokens reach the flight
|
|
219
|
+
* recorder. Defaults to "text".
|
|
218
220
|
*/
|
|
219
|
-
outputFormat?: "text" | "json";
|
|
221
|
+
outputFormat?: "text" | "json" | "stream-json";
|
|
220
222
|
sandbox?: boolean;
|
|
221
223
|
policyFiles?: string[];
|
|
222
224
|
adminPolicyFiles?: string[];
|
|
@@ -313,8 +315,11 @@ export interface GeminiRequestParams {
|
|
|
313
315
|
optimizeResponse?: boolean;
|
|
314
316
|
idleTimeoutMs?: number;
|
|
315
317
|
forceRefresh?: boolean;
|
|
316
|
-
/**
|
|
317
|
-
|
|
318
|
+
/**
|
|
319
|
+
* U23 + Phase 4 slice ε: "json" emits `-o json`; "stream-json" emits
|
|
320
|
+
* `-o stream-json` (NDJSON event stream). Both are usage-extracted.
|
|
321
|
+
*/
|
|
322
|
+
outputFormat?: "text" | "json" | "stream-json";
|
|
318
323
|
sandbox?: boolean;
|
|
319
324
|
policyFiles?: string[];
|
|
320
325
|
adminPolicyFiles?: string[];
|
package/dist/index.js
CHANGED
|
@@ -9,7 +9,7 @@ import { z } from "zod";
|
|
|
9
9
|
import { executeCli, killAllProcessGroups } from "./executor.js";
|
|
10
10
|
import { parseStreamJson } from "./stream-json-parser.js";
|
|
11
11
|
import { parseCodexJsonStream } from "./codex-json-parser.js";
|
|
12
|
-
import { parseGeminiJson } from "./gemini-json-parser.js";
|
|
12
|
+
import { parseGeminiJson, parseGeminiStreamJson } from "./gemini-json-parser.js";
|
|
13
13
|
import { parseVibeMetaJson } from "./mistral-meta-json-parser.js";
|
|
14
14
|
import { homedir } from "os";
|
|
15
15
|
import { createSessionManager } from "./session-manager.js";
|
|
@@ -530,8 +530,8 @@ ctx) {
|
|
|
530
530
|
costUsd: parsed.usage.cost_usd,
|
|
531
531
|
};
|
|
532
532
|
}
|
|
533
|
-
if (cli === "gemini" && outputFormat === "json") {
|
|
534
|
-
const parsed = parseGeminiJson(output);
|
|
533
|
+
if (cli === "gemini" && (outputFormat === "json" || outputFormat === "stream-json")) {
|
|
534
|
+
const parsed = outputFormat === "stream-json" ? parseGeminiStreamJson(output) : parseGeminiJson(output);
|
|
535
535
|
if (!parsed || !parsed.usage) {
|
|
536
536
|
return {};
|
|
537
537
|
}
|
|
@@ -1271,9 +1271,19 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1271
1271
|
// U23 fix: emit `-o json` when the caller asked for JSON output. The Gemini
|
|
1272
1272
|
// JSON parser is otherwise unreachable from the tool surface and the
|
|
1273
1273
|
// structured usageMetadata is silently dropped.
|
|
1274
|
+
//
|
|
1275
|
+
// Phase 4 slice ε: same wiring for `-o stream-json` (NDJSON event stream).
|
|
1276
|
+
// Gemini already streams stdout in real-time so the existing 10-minute
|
|
1277
|
+
// idle timeout (CLI_IDLE_TIMEOUTS.gemini) covers both modes without
|
|
1278
|
+
// adjustment — unlike Claude, no `--include-partial-messages` companion
|
|
1279
|
+
// flag is required because Gemini emits assistant `delta` events as part
|
|
1280
|
+
// of the default stream-json shape.
|
|
1274
1281
|
if (params.outputFormat === "json") {
|
|
1275
1282
|
args.push("-o", "json");
|
|
1276
1283
|
}
|
|
1284
|
+
else if (params.outputFormat === "stream-json") {
|
|
1285
|
+
args.push("-o", "stream-json");
|
|
1286
|
+
}
|
|
1277
1287
|
// Phase 4 slice γ: opt-in trust-prompt bypass for fresh workspaces.
|
|
1278
1288
|
if (params.skipTrust) {
|
|
1279
1289
|
args.push("--skip-trust");
|
|
@@ -3069,11 +3079,14 @@ export function createGatewayServer(deps = {}) {
|
|
|
3069
3079
|
.default(false)
|
|
3070
3080
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3071
3081
|
// U23: emit `-o json` to extract token usage via parseGeminiJson. Default
|
|
3072
|
-
// remains text so existing callers see no behavior change.
|
|
3082
|
+
// remains text so existing callers see no behavior change. Phase 4 slice
|
|
3083
|
+
// ε adds `stream-json` (NDJSON event stream parsed by
|
|
3084
|
+
// parseGeminiStreamJson — `init`/`message`/`result` lines, idle-timeout
|
|
3085
|
+
// semantics covered by Gemini's existing real-time stdout streaming).
|
|
3073
3086
|
outputFormat: z
|
|
3074
|
-
.enum(["text", "json"])
|
|
3087
|
+
.enum(["text", "json", "stream-json"])
|
|
3075
3088
|
.default("text")
|
|
3076
|
-
.describe("Gemini output format. `json` emits `-o json`
|
|
3089
|
+
.describe("Gemini output format. `json` emits `-o json` (single JSON with usageMetadata). `stream-json` emits `-o stream-json` (NDJSON event stream — `init`/`message`/`result` lines, usage extracted from the terminal `result.stats` event). Both report usage to the flight recorder."),
|
|
3077
3090
|
sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
|
|
3078
3091
|
policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
|
|
3079
3092
|
adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
|
|
@@ -3691,11 +3704,14 @@ export function createGatewayServer(deps = {}) {
|
|
|
3691
3704
|
.default(false)
|
|
3692
3705
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3693
3706
|
// U23: emit `-o json` to extract token usage via parseGeminiJson. Default
|
|
3694
|
-
// remains text so existing callers see no behavior change.
|
|
3707
|
+
// remains text so existing callers see no behavior change. Phase 4 slice
|
|
3708
|
+
// ε adds `stream-json` (NDJSON event stream parsed by
|
|
3709
|
+
// parseGeminiStreamJson — `init`/`message`/`result` lines, idle-timeout
|
|
3710
|
+
// semantics covered by Gemini's existing real-time stdout streaming).
|
|
3695
3711
|
outputFormat: z
|
|
3696
|
-
.enum(["text", "json"])
|
|
3712
|
+
.enum(["text", "json", "stream-json"])
|
|
3697
3713
|
.default("text")
|
|
3698
|
-
.describe("Gemini output format. `json` emits `-o json`
|
|
3714
|
+
.describe("Gemini output format. `json` emits `-o json` (single JSON with usageMetadata). `stream-json` emits `-o stream-json` (NDJSON event stream — `init`/`message`/`result` lines, usage extracted from the terminal `result.stats` event). Both report usage to the flight recorder."),
|
|
3699
3715
|
sandbox: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.sandbox.describe("Run Gemini in sandbox mode (-s)"),
|
|
3700
3716
|
policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
|
|
3701
3717
|
adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
|
|
@@ -248,7 +248,11 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
248
248
|
"-s": { arity: "none", description: "Sandbox mode" },
|
|
249
249
|
"--policy": { arity: "one", description: "Policy file path" },
|
|
250
250
|
"--admin-policy": { arity: "one", description: "Admin policy file path" },
|
|
251
|
-
"-o": {
|
|
251
|
+
"-o": {
|
|
252
|
+
arity: "one",
|
|
253
|
+
values: ["json", "stream-json"],
|
|
254
|
+
description: "Output format (Phase 4 slice ε adds stream-json)",
|
|
255
|
+
},
|
|
252
256
|
"--resume": { arity: "one", description: "Resume session" },
|
|
253
257
|
"--skip-trust": {
|
|
254
258
|
arity: "none",
|
|
@@ -275,6 +279,18 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
275
279
|
args: ["-p", "hello", "--skip-trust"],
|
|
276
280
|
expect: "pass",
|
|
277
281
|
},
|
|
282
|
+
{
|
|
283
|
+
id: "gemini-stream-json",
|
|
284
|
+
description: "Phase 4 slice ε: -o stream-json is accepted",
|
|
285
|
+
args: ["-p", "hello", "-o", "stream-json"],
|
|
286
|
+
expect: "pass",
|
|
287
|
+
},
|
|
288
|
+
{
|
|
289
|
+
id: "gemini-output-format-invalid",
|
|
290
|
+
description: "Phase 4 slice ε: -o ndjson is rejected (not in contract enum)",
|
|
291
|
+
args: ["-p", "hello", "-o", "ndjson"],
|
|
292
|
+
expect: "fail",
|
|
293
|
+
},
|
|
278
294
|
],
|
|
279
295
|
},
|
|
280
296
|
grok: {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "llm-cli-gateway",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.10.0",
|
|
4
4
|
"mcpName": "io.github.verivus-oss/llm-cli-gateway",
|
|
5
5
|
"description": "MCP server providing unified access to Claude Code, Codex, Gemini, Grok, and Mistral Vibe CLIs with session management, retry logic, async job orchestration, durable job results, and cross-LLM validation.",
|
|
6
6
|
"license": "MIT",
|