llm-cli-gateway 2.6.0 → 2.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,33 @@ All notable changes to the llm-cli-gateway project.
4
4
 
5
5
  ## Unreleased
6
6
 
7
+ ## [2.6.3] - 2026-06-12: Claude cache-control veracity and Grok 0.2.50
8
+
9
+ ### Fixed
10
+
11
+ - Claude `promptParts` cache-control stream-json payloads now preserve the
12
+ exact assembled prompt bytes: concatenating emitted Claude content blocks
13
+ matches `assemble(parts).text`, including stable-part separators.
14
+ - Empty or omitted stable-part `cacheControl` markers are now treated as a
15
+ no-op: they do not force the Claude stdin cache-control path, do not suppress
16
+ opt-in auto-emission, and return a `cache_control_noop` warning.
17
+ - Flight-recorder rows now persist the actual emitted
18
+ `cache_control_ttl_seconds`, and cache-state TTL reporting prefers that row
19
+ value while retaining a 1-hour compatibility fallback for legacy
20
+ `cache_control_blocks` rows.
21
+ - Provider cache docs now describe the verified Claude stream-json
22
+ `cache_control` path and the remaining hidden-request limits accurately,
23
+ including async flight-recorder metadata and slice κ TTL handoff.
24
+
25
+ ### Upstream provider maintenance
26
+
27
+ - Grok Build stable `0.2.50` contract refresh: `--debug` and `--debug-file`
28
+ are acknowledged as upstream-only help/probe flags at top level and across
29
+ subcommands without becoming gateway argv allowlist flags.
30
+ - Declared `grok agent leader --relay-on-demand` on the non-exposed agent
31
+ leader subcommand, refreshed `docs/upstream/snapshots/grok.json`, and added
32
+ the 2026-06-12 Grok upstream scan report.
33
+
7
34
  ## [2.6.0] - 2026-06-12: Gemini provider on Google Antigravity CLI
8
35
 
9
36
  ### Changed
@@ -11,6 +11,7 @@ export interface AsyncJobFlightRecorderEntry {
11
11
  stablePrefixHash?: string;
12
12
  stablePrefixTokens?: number;
13
13
  cacheControlBlocks?: number;
14
+ cacheControlTtlSeconds?: number;
14
15
  }
15
16
  export type AsyncJobUsageExtractor = (stdout: string) => {
16
17
  inputTokens?: number;
@@ -515,6 +515,7 @@ export class AsyncJobManager {
515
515
  stablePrefixHash: flightRecorderEntry.stablePrefixHash,
516
516
  stablePrefixTokens: flightRecorderEntry.stablePrefixTokens,
517
517
  cacheControlBlocks: flightRecorderEntry.cacheControlBlocks,
518
+ cacheControlTtlSeconds: flightRecorderEntry.cacheControlTtlSeconds,
518
519
  });
519
520
  }
520
521
  catch (err) {
@@ -12,6 +12,8 @@ export interface SessionCacheStats {
12
12
  lastRequestAt: string | null;
13
13
  estimatedSavingsUsd: number;
14
14
  ttlRemainingMs: number | null;
15
+ latestCacheControlBlocks?: number | null;
16
+ latestCacheControlTtlSeconds?: number | null;
15
17
  }
16
18
  export interface PrefixCacheStats {
17
19
  stablePrefixHash: string;
@@ -10,7 +10,9 @@ export function computeSessionCacheStats(db, sessionId) {
10
10
  COALESCE(cache_read_tokens, 0) AS cache_read_tokens,
11
11
  COALESCE(cache_creation_tokens, 0) AS cache_creation_tokens,
12
12
  stable_prefix_hash,
13
- datetime_utc
13
+ datetime_utc,
14
+ cache_control_blocks,
15
+ cache_control_ttl_seconds
14
16
  FROM requests
15
17
  WHERE session_id = ?
16
18
  ORDER BY datetime_utc DESC`, sessionId);
@@ -51,6 +53,8 @@ export function computeSessionCacheStats(db, sessionId) {
51
53
  lastRequestAt: lastAt,
52
54
  estimatedSavingsUsd,
53
55
  ttlRemainingMs: null,
56
+ latestCacheControlBlocks: rows.length > 0 ? (rows[0].cache_control_blocks ?? null) : null,
57
+ latestCacheControlTtlSeconds: rows.length > 0 ? (rows[0].cache_control_ttl_seconds ?? null) : null,
54
58
  };
55
59
  }
56
60
  export function computeTtlRemaining(stats, cli, ttlPolicy) {
@@ -63,7 +67,14 @@ export function computeTtlRemaining(stats, cli, ttlPolicy) {
63
67
  if (!Number.isFinite(lastWriteMs))
64
68
  return null;
65
69
  const elapsedMs = nowMs - lastWriteMs;
66
- const ttlMs = ttlPolicy.anthropicTtlSeconds * 1000;
70
+ const isExplicit = typeof stats.latestCacheControlBlocks === "number" && stats.latestCacheControlBlocks > 0;
71
+ const recordedTtlSeconds = typeof stats.latestCacheControlTtlSeconds === "number" &&
72
+ Number.isFinite(stats.latestCacheControlTtlSeconds) &&
73
+ stats.latestCacheControlTtlSeconds > 0
74
+ ? stats.latestCacheControlTtlSeconds
75
+ : null;
76
+ const ttlSeconds = recordedTtlSeconds ?? (isExplicit ? 3600 : ttlPolicy.anthropicTtlSeconds);
77
+ const ttlMs = ttlSeconds * 1000;
67
78
  return Math.max(0, ttlMs - elapsedMs);
68
79
  }
69
80
  export function computePrefixCacheStats(db, stablePrefixHash) {
@@ -128,7 +139,8 @@ export function computeGlobalCacheStats(db, opts = {}) {
128
139
  COALESCE(cache_creation_tokens, 0) AS cache_creation_tokens,
129
140
  stable_prefix_hash,
130
141
  datetime_utc,
131
- cache_control_blocks
142
+ cache_control_blocks,
143
+ cache_control_ttl_seconds
132
144
  FROM requests
133
145
  WHERE datetime_utc >= ?`
134
146
  : `SELECT cli, model,
@@ -136,7 +148,8 @@ export function computeGlobalCacheStats(db, opts = {}) {
136
148
  COALESCE(cache_creation_tokens, 0) AS cache_creation_tokens,
137
149
  stable_prefix_hash,
138
150
  datetime_utc,
139
- cache_control_blocks
151
+ cache_control_blocks,
152
+ cache_control_ttl_seconds
140
153
  FROM requests`;
141
154
  const rows = sinceIso ? db.queryRequests(sql, sinceIso) : db.queryRequests(sql);
142
155
  const perCliMap = new Map();
@@ -10,6 +10,7 @@ export interface FlightLogStart {
10
10
  stablePrefixHash?: string;
11
11
  stablePrefixTokens?: number;
12
12
  cacheControlBlocks?: number;
13
+ cacheControlTtlSeconds?: number;
13
14
  }
14
15
  export interface FlightLogResult {
15
16
  response: string;
@@ -31,6 +31,13 @@ function ensureCacheControlBlocksColumn(db) {
31
31
  db.exec("ALTER TABLE requests ADD COLUMN cache_control_blocks INTEGER");
32
32
  }
33
33
  }
34
+ function ensureCacheControlTtlSecondsColumn(db) {
35
+ const rows = db.prepare("PRAGMA table_info(requests)").all();
36
+ const names = new Set(rows.map((row) => (row && typeof row.name === "string" ? row.name : "")));
37
+ if (!names.has("cache_control_ttl_seconds")) {
38
+ db.exec("ALTER TABLE requests ADD COLUMN cache_control_ttl_seconds INTEGER");
39
+ }
40
+ }
34
41
  export function resolveFlightRecorderDbPath() {
35
42
  const configured = process.env.LLM_GATEWAY_LOGS_DB;
36
43
  if (configured !== undefined) {
@@ -144,6 +151,10 @@ export class FlightRecorder {
144
151
  this.db
145
152
  .prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(4, ?)")
146
153
  .run(new Date().toISOString());
154
+ ensureCacheControlTtlSecondsColumn(this.db);
155
+ this.db
156
+ .prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(5, ?)")
157
+ .run(new Date().toISOString());
147
158
  if (process.platform !== "win32") {
148
159
  try {
149
160
  chmodSync(dbPath, 0o600);
@@ -154,10 +165,10 @@ export class FlightRecorder {
154
165
  const insertRequest = this.db.prepare(`
155
166
  INSERT INTO requests (id, cli, model, prompt, system, session_id, datetime_utc,
156
167
  stable_prefix_hash, stable_prefix_tokens,
157
- cache_control_blocks)
168
+ cache_control_blocks, cache_control_ttl_seconds)
158
169
  VALUES (@id, @cli, @model, @prompt, @system, @session_id, @datetime_utc,
159
170
  @stable_prefix_hash, @stable_prefix_tokens,
160
- @cache_control_blocks)
171
+ @cache_control_blocks, @cache_control_ttl_seconds)
161
172
  `);
162
173
  const insertMetadata = this.db.prepare(`
163
174
  INSERT INTO gateway_metadata (request_id, async_job_id, status)
@@ -175,6 +186,7 @@ export class FlightRecorder {
175
186
  stable_prefix_hash: entry.stablePrefixHash ?? null,
176
187
  stable_prefix_tokens: entry.stablePrefixTokens ?? null,
177
188
  cache_control_blocks: entry.cacheControlBlocks ?? null,
189
+ cache_control_ttl_seconds: entry.cacheControlTtlSeconds ?? null,
178
190
  });
179
191
  insertMetadata.run({
180
192
  request_id: entry.correlationId,
package/dist/index.d.ts CHANGED
@@ -134,6 +134,7 @@ interface CliRequestPrep {
134
134
  stablePrefixTokens: number | null;
135
135
  stdinPayload?: string;
136
136
  cacheControlBlocks?: number;
137
+ cacheControlTtlSeconds?: number;
137
138
  warnings?: WarningEntry[];
138
139
  }
139
140
  export declare function prepareClaudeRequest(params: {
package/dist/index.js CHANGED
@@ -761,6 +761,7 @@ function buildAsyncFlightRecorderHandoff(cliName, prep, sessionId, outputFormat)
761
761
  stablePrefixHash: prep.stablePrefixHash ?? undefined,
762
762
  stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
763
763
  cacheControlBlocks: prep.cacheControlBlocks,
764
+ cacheControlTtlSeconds: prep.cacheControlTtlSeconds,
764
765
  },
765
766
  extractUsage: (stdout) => extractUsageAndCost(cli, stdout, fmt, { sessionId: sid, home }),
766
767
  };
@@ -1105,7 +1106,18 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1105
1106
  const ccEarly = params.promptParts?.cacheControl;
1106
1107
  const cacheControlRequestedEarly = !!(ccEarly &&
1107
1108
  (ccEarly.system || ccEarly.tools || ccEarly.context));
1108
- if (params.optimizePrompt && cacheControlRequestedEarly) {
1109
+ const explicitCacheControlBlockCount = params.promptParts && ccEarly
1110
+ ? (ccEarly.system && params.promptParts.system && params.promptParts.system.length > 0
1111
+ ? 1
1112
+ : 0) +
1113
+ (ccEarly.tools && params.promptParts.tools && params.promptParts.tools.length > 0 ? 1 : 0) +
1114
+ (ccEarly.context && params.promptParts.context && params.promptParts.context.length > 0
1115
+ ? 1
1116
+ : 0)
1117
+ : 0;
1118
+ const effectiveExplicitCacheControl = explicitCacheControlBlockCount > 0;
1119
+ const cacheControlNoop = cacheControlRequestedEarly && !effectiveExplicitCacheControl;
1120
+ if (params.optimizePrompt && effectiveExplicitCacheControl) {
1109
1121
  return createErrorResponse(params.operation, 1, "", corrId, new Error("optimizePrompt is incompatible with promptParts.cacheControl (slice κ): optimization rewrites the assembled prompt text the flight recorder logs, while the cache_control payload is built from raw promptParts; the two would desync and break Anthropic prefix-cache reuse. Disable optimizePrompt when opting into cacheControl."));
1110
1122
  }
1111
1123
  let effectivePrompt = assembledPrompt;
@@ -1140,7 +1152,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1140
1152
  }
1141
1153
  }
1142
1154
  let autoEmittedCacheControlBlock = null;
1143
- if (!cacheControlRequestedEarly &&
1155
+ if (!effectiveExplicitCacheControl &&
1144
1156
  runtime.cacheAwareness.emitAnthropicCacheControl &&
1145
1157
  !params.optimizePrompt &&
1146
1158
  params.outputFormat === "stream-json" &&
@@ -1164,7 +1176,14 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1164
1176
  }
1165
1177
  }
1166
1178
  const warnings = [];
1167
- if (!cacheControlRequestedEarly &&
1179
+ if (cacheControlNoop) {
1180
+ warnings.push({
1181
+ code: "cache_control_noop",
1182
+ message: "promptParts.cacheControl only marked empty or omitted stable parts; no cache_control breakpoint will be emitted from the explicit marker.",
1183
+ reason: "cacheControl marker did not match a non-empty stable block",
1184
+ });
1185
+ }
1186
+ if (!effectiveExplicitCacheControl &&
1168
1187
  autoEmittedCacheControlBlock === null &&
1169
1188
  params.promptParts &&
1170
1189
  stablePrefixTokens !== null) {
@@ -1184,9 +1203,10 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1184
1203
  });
1185
1204
  }
1186
1205
  }
1187
- const cacheControlRequested = cacheControlRequestedEarly || autoEmittedCacheControlBlock !== null;
1206
+ const cacheControlRequested = effectiveExplicitCacheControl || autoEmittedCacheControlBlock !== null;
1188
1207
  let stdinPayload;
1189
1208
  let cacheControlBlocks;
1209
+ let cacheControlTtlSeconds;
1190
1210
  if (cacheControlRequested) {
1191
1211
  if (params.outputFormat !== "stream-json") {
1192
1212
  return createErrorResponse(params.operation, 1, "", corrId, new Error("promptParts.cacheControl requires outputFormat: 'stream-json' (slice κ pipes the cache_control blocks over --input-format stream-json; text/json output formats cannot carry the required NDJSON usage events)."));
@@ -1203,6 +1223,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1203
1223
  const built = assembleClaudeCacheBlocks(effectiveParts);
1204
1224
  stdinPayload = `${JSON.stringify(built.payload)}\n`;
1205
1225
  cacheControlBlocks = built.markedBlockCount;
1226
+ cacheControlTtlSeconds = built.markedBlockCount > 0 ? 3600 : undefined;
1206
1227
  }
1207
1228
  const args = cacheControlRequested
1208
1229
  ? [
@@ -1291,6 +1312,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1291
1312
  stablePrefixTokens,
1292
1313
  stdinPayload,
1293
1314
  cacheControlBlocks,
1315
+ cacheControlTtlSeconds,
1294
1316
  warnings: warnings.length > 0 ? warnings : undefined,
1295
1317
  };
1296
1318
  }
@@ -3383,6 +3405,7 @@ export function createGatewayServer(deps = {}) {
3383
3405
  stablePrefixHash: prep.stablePrefixHash ?? undefined,
3384
3406
  stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
3385
3407
  cacheControlBlocks: prep.cacheControlBlocks,
3408
+ cacheControlTtlSeconds: prep.cacheControlTtlSeconds,
3386
3409
  }, runtime);
3387
3410
  logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prep.effectivePrompt.length}, sessionId=${effectiveSessionId}, cacheControlBlocks=${prep.cacheControlBlocks ?? 0}`);
3388
3411
  try {
@@ -60,14 +60,17 @@ export function assembleClaudeCacheBlocks(parts) {
60
60
  for (const [name, value] of stableEntries) {
61
61
  if (value === undefined || value.length === 0)
62
62
  continue;
63
- const block = { type: "text", text: value };
63
+ const block = {
64
+ type: "text",
65
+ text: blocks.length > 0 ? `${SEPARATOR}${value}` : value,
66
+ };
64
67
  if (cc[name]) {
65
68
  block.cache_control = { type: "ephemeral", ttl: "1h" };
66
69
  markedBlockCount += 1;
67
70
  }
68
71
  blocks.push(block);
69
72
  }
70
- blocks.push({ type: "text", text: parts.task });
73
+ blocks.push({ type: "text", text: blocks.length > 0 ? `${SEPARATOR}${parts.task}` : parts.task });
71
74
  return {
72
75
  payload: {
73
76
  type: "user",
@@ -36,8 +36,20 @@ function subcommand(commandPath, summary, risk, flags = [], options = {}) {
36
36
  tokenCost: options.tokenCost ?? "small",
37
37
  summary,
38
38
  conformanceFixtures: options.fixtures ?? [],
39
+ acknowledgedUpstreamFlags: options.acknowledgedUpstreamFlags ?? [],
39
40
  };
40
41
  }
42
+ function acknowledgeSubcommandFlags(subcommands, flags) {
43
+ return Object.fromEntries(Object.entries(subcommands).map(([name, contract]) => [
44
+ name,
45
+ {
46
+ ...contract,
47
+ acknowledgedUpstreamFlags: Array.from(new Set([...(contract.acknowledgedUpstreamFlags ?? []), ...flags])),
48
+ children: acknowledgeSubcommandFlags(contract.children ?? {}, flags),
49
+ },
50
+ ]));
51
+ }
52
+ const GROK_DEBUG_HELP_FLAGS = ["--debug", "--debug-file"];
41
53
  export const UPSTREAM_CLI_CONTRACTS = {
42
54
  claude: {
43
55
  cli: "claude",
@@ -903,7 +915,7 @@ export const UPSTREAM_CLI_CONTRACTS = {
903
915
  watchCategories: ["flags", "permission-modes", "session-resume", "sandbox", "output-formats"],
904
916
  },
905
917
  helpArgs: [["--help"]],
906
- subcommands: {
918
+ subcommands: acknowledgeSubcommandFlags({
907
919
  agent: subcommand(["agent"], "Run Grok agent service helpers.", "executes_agent", [
908
920
  "--agent-profile",
909
921
  "--always-approve",
@@ -935,6 +947,7 @@ export const UPSTREAM_CLI_CONTRACTS = {
935
947
  "--leader-socket",
936
948
  "--no-auto-update",
937
949
  "--no-exit-on-disconnect",
950
+ "--relay-on-demand",
938
951
  ], { exposure: "not_exposed" }),
939
952
  },
940
953
  }),
@@ -981,11 +994,10 @@ export const UPSTREAM_CLI_CONTRACTS = {
981
994
  "--version",
982
995
  ], { exposure: "not_exposed" }),
983
996
  version: subcommand(["version"], "Print Grok version information.", "read_only", ["--json", "--leader-socket"], { tier: "diagnostic" }),
984
- worktree: subcommand(["worktree"], "Manage Grok worktree sessions.", "writes_local_config", [
985
- "--leader-socket",
986
- ]),
987
- },
997
+ worktree: subcommand(["worktree"], "Manage Grok worktree sessions.", "writes_local_config", ["--leader-socket"]),
998
+ }, GROK_DEBUG_HELP_FLAGS),
988
999
  maxPositionals: 0,
1000
+ acknowledgedUpstreamFlags: GROK_DEBUG_HELP_FLAGS,
989
1001
  mcpTools: ["grok_request", "grok_request_async"],
990
1002
  mcpParameters: [
991
1003
  "prompt",
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "llm-cli-gateway",
3
- "version": "2.6.0",
3
+ "version": "2.6.3",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "llm-cli-gateway",
9
- "version": "2.6.0",
9
+ "version": "2.6.3",
10
10
  "license": "MIT",
11
11
  "dependencies": {
12
12
  "@modelcontextprotocol/sdk": "^1.29.0",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-cli-gateway",
3
- "version": "2.6.0",
3
+ "version": "2.6.3",
4
4
  "mcpName": "io.github.verivus-oss/llm-cli-gateway",
5
5
  "description": "MCP server providing unified access to Claude Code, Codex, Gemini, Grok, and Mistral Vibe CLIs with session management, retry logic, async job orchestration, durable job results, and cross-LLM validation.",
6
6
  "license": "MIT",