llm-cli-gateway 2.5.0 → 2.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,69 @@ All notable changes to the llm-cli-gateway project.
4
4
 
5
5
  ## Unreleased
6
6
 
7
+ ## [2.6.3] - 2026-06-12: Claude cache-control veracity and Grok 0.2.50
8
+
9
+ ### Fixed
10
+
11
+ - Claude `promptParts` cache-control stream-json payloads now preserve the
12
+ exact assembled prompt bytes: concatenating emitted Claude content blocks
13
+ matches `assemble(parts).text`, including stable-part separators.
14
+ - Empty or omitted stable-part `cacheControl` markers are now treated as a
15
+ no-op: they do not force the Claude stdin cache-control path, do not suppress
16
+ opt-in auto-emission, and return a `cache_control_noop` warning.
17
+ - Flight-recorder rows now persist the actual emitted
18
+ `cache_control_ttl_seconds`, and cache-state TTL reporting prefers that row
19
+ value while retaining a 1-hour compatibility fallback for legacy
20
+ `cache_control_blocks` rows.
21
+ - Provider cache docs now describe the verified Claude stream-json
22
+ `cache_control` path and the remaining hidden-request limits accurately,
23
+ including async flight-recorder metadata and slice κ TTL handoff.
24
+
25
+ ### Upstream provider maintenance
26
+
27
+ - Grok Build stable `0.2.50` contract refresh: `--debug` and `--debug-file`
28
+ are acknowledged as upstream-only help/probe flags at top level and across
29
+ subcommands without becoming gateway argv allowlist flags.
30
+ - Declared `grok agent leader --relay-on-demand` on the non-exposed agent
31
+ leader subcommand, refreshed `docs/upstream/snapshots/grok.json`, and added
32
+ the 2026-06-12 Grok upstream scan report.
33
+
34
+ ## [2.6.0] - 2026-06-12: Gemini provider on Google Antigravity CLI
35
+
36
+ ### Changed
37
+
38
+ - **Gemini provider now runs through Google Antigravity CLI (`agy`)** instead of
39
+ the Google Gemini CLI. `gemini_request` / `gemini_request_async` spawn `agy`;
40
+ install via `curl -fsSL https://antigravity.google/cli/install.sh | bash`;
41
+ upgrade via `agy update` (explicit version targets unsupported); session resume
42
+ via `--conversation <id>` (`sessionId`) or `--continue` (`resumeLatest`). Models
43
+ pass to `agy --model` (e.g. `gemini-3-pro-preview`, `gemini-2.5-flash`, `pro`,
44
+ `flash`, `latest`).
45
+ - `gemini_request` parameter surface tightened to Antigravity's capabilities:
46
+ `approvalMode` accepts only `default` and `yolo` (`auto_edit`/`plan` are
47
+ rejected); `allowedTools`, `mcpServers`, non-`text` `outputFormat`,
48
+ `policyFiles`, `adminPolicyFiles`, `attachments`, and `skipTrust` are rejected
49
+ with an explanatory error (retained in the schema for caller parity).
50
+ `includeDirs` (`--add-dir`) and `sandbox` (`--sandbox`) remain supported.
51
+ - Customer-facing documentation (README, the llm-cli-gateway.dev site, install
52
+ guide, dev.to tutorial) and the MCP server instructions string updated to match
53
+ the Antigravity-backed behavior. Verified by a four-reviewer cross-LLM evidence
54
+ gate (Codex/Gemini/Grok/Mistral); see
55
+ `docs/reviews/2026-06-12-customer-docs-antigravity.*`.
56
+
57
+ ### Added
58
+
59
+ - Reply text is mirrored into MCP `structuredContent.response` on provider tool
60
+ responses (Issue #1), alongside the unchanged `content[0].text`.
61
+ - Contract-driven code generation for the Grok provider's argv and tool schema
62
+ (`src/provider-codegen.ts`), proven byte-identical to the prior hand-written
63
+ surface by golden/parity tests.
64
+ - Async-job stall telemetry (Issue #21).
65
+
66
+ ### Upstream provider maintenance
67
+
68
+ - Grok Build v0.2.38: local binary upgraded from 0.2.33; full `--probe-installed` contract + subcommand drift scan executed (live source fetch performed in the run that produced the referenced report). 40 top-level flags + 23 subcommand paths all clean (`extraVsContract: []`, `missingFromBinary: []` across the board per the snapshot). Refreshed `docs/upstream/snapshots/grok.json` (new help surface hash capturing 0.2.38 agent subcommand surface) and `docs/upstream/reports/2026-06-09-grok.md`. `UPSTREAM_CLI_CONTRACTS.grok` now has 18 conformance fixtures (added `grok-0.2.38-agent-surface` as a dated top-level example); no flag, enum, arity, permission-mode, sandbox, output-format, or resume-behaviour changes to encode in the primary contract. `npm run upstream:contracts` and targeted grok/upstream tests pass. (Cross-LLM reviews from Claude and Codex independently reproduced the diff, commands, and fixture behaviour via their own tool inspections of the sources.)
69
+
7
70
  ## [2.5.0] - 2026-06-08: Remote connector OAuth and workspaces
8
71
 
9
72
  - Added remote connector OAuth discovery and authorization-code support with
package/README.md CHANGED
@@ -235,11 +235,13 @@ npm install -g @openai/codex
235
235
  codex login
236
236
  ```
237
237
 
238
- ### Gemini CLI
238
+ ### Gemini (Google Antigravity CLI)
239
+
240
+ The Gemini provider runs through Google Antigravity CLI (`agy`).
239
241
 
240
242
  ```bash
241
- npm install -g @google/gemini-cli
242
- # Or: https://github.com/google-gemini/gemini-cli
243
+ curl -fsSL https://antigravity.google/cli/install.sh | bash
244
+ # Docs: https://antigravity.google/docs/cli-overview
243
245
  ```
244
246
 
245
247
  ### Grok Build CLI (xAI)
@@ -477,7 +479,7 @@ Fork an existing Codex session into a new branch (`codex fork <SESSION_ID|--last
477
479
 
478
480
  ##### `gemini_request`
479
481
 
480
- Execute a Gemini CLI request with session support.
482
+ Execute a Google Antigravity CLI (`agy`) request with session support.
481
483
 
482
484
  **Parameters:**
483
485
 
@@ -486,18 +488,14 @@ Execute a Gemini CLI request with session support.
486
488
  - `sessionId` (string, optional): Session ID to resume
487
489
  - `resumeLatest` (boolean, optional): Resume the latest session automatically
488
490
  - `createNewSession` (boolean, optional): Always create a new session
489
- - `approvalMode` (string, optional): Gemini approval mode (`default|auto_edit|yolo|plan`) in legacy mode
491
+ - `approvalMode` (string, optional): Antigravity approval mode in legacy mode. Only `default` (prompted execution) and `yolo` (emits `--dangerously-skip-permissions`) are accepted; `auto_edit` and `plan` are rejected with an error.
490
492
  - `approvalStrategy` (string, optional): `"legacy"` (default) or `"mcp_managed"`
491
493
  - `approvalPolicy` (string, optional): `"strict"`, `"balanced"`, or `"permissive"`
492
- - `mcpServers` (string[], optional): Allowed Gemini MCP server names
493
- - `allowedTools` (string[], optional): Restrict Gemini tools to this allow-list
494
- - `includeDirs` (string[], optional): Additional workspace directories for Gemini
495
- - `outputFormat` (string, optional): `text` (default), `json` (`-o json`), or `stream-json` (`-o stream-json`, NDJSON with usage extraction)
496
- - `sandbox` (boolean, optional): Run Gemini in sandbox mode (`-s`)
497
- - `policyFiles` / `adminPolicyFiles` (string[], optional): Policy / admin-policy file paths (one `--policy`/`--admin-policy` per file; paths must exist)
498
- - `attachments` (string[], optional): Absolute file paths prepended as `@<path>` tokens to the prompt
499
- - `skipTrust` (boolean, optional): Emit `--skip-trust` to trust the workspace for this session (required for headless runs in fresh workspaces)
500
- - `yolo` (boolean, optional): Auto-approve all; equivalent to `approvalMode: "yolo"`. Emits `--yolo` only when `--approval-mode yolo` is not already being emitted (never both)
494
+ - `includeDirs` (string[], optional): Additional workspace directories (passed as `--add-dir`)
495
+ - `sandbox` (boolean, optional): Run Antigravity in sandbox mode (`--sandbox`)
496
+ - `outputFormat` (string, optional): `text` only. Antigravity print mode emits text; `json` and `stream-json` are rejected.
497
+ - `mcpServers`, `allowedTools`, `policyFiles`, `adminPolicyFiles`, `attachments` (string[], optional) and `skipTrust` (boolean, optional): **Unsupported by Antigravity CLI** — non-empty values (or `skipTrust: true`) are rejected with an explanatory error. Retained in the schema for caller parity.
498
+ - `yolo` (boolean, optional): Auto-approve all; equivalent to `approvalMode: "yolo"`. Emits `--dangerously-skip-permissions`
501
499
  - `worktree` (boolean|object, optional): Run inside a gateway-owned git worktree (slice λ)
502
500
  - `promptParts` (object, optional): Cache-aware structured prompt `{ system?, tools?, context?, task }`; mutually exclusive with `prompt`
503
501
  - `optimizePrompt` (boolean, optional): Optimize prompt for token efficiency, default: false
@@ -1046,7 +1044,7 @@ Plan or run an upgrade for one CLI.
1046
1044
  - Claude explicit target: `claude install <target>`
1047
1045
  - Codex latest: `codex update`
1048
1046
  - Codex explicit target: `npm install -g @openai/codex@<target>`
1049
- - Gemini: `npm install -g @google/gemini-cli@<target>`
1047
+ - Gemini latest: `agy update` (Antigravity self-update; explicit version targets are unsupported)
1050
1048
  - Grok latest: `grok update`
1051
1049
  - Grok explicit target: `grok update --version <target>`
1052
1050
  - Mistral (Vibe): dispatches to the detected installer (`pip`/`uv`/`brew`); errors with guidance when none is detected (Vibe ships no self-update command)
@@ -1236,7 +1234,7 @@ Make sure the CLIs are installed and in your PATH:
1236
1234
  ```bash
1237
1235
  which claude
1238
1236
  which codex
1239
- which gemini
1237
+ which agy
1240
1238
  ```
1241
1239
 
1242
1240
  The gateway extends PATH to include common locations:
@@ -1253,7 +1251,7 @@ If you encounter permission errors, ensure the CLI tools have proper permissions
1253
1251
  ```bash
1254
1252
  chmod +x $(which claude)
1255
1253
  chmod +x $(which codex)
1256
- chmod +x $(which gemini)
1254
+ chmod +x $(which agy)
1257
1255
  ```
1258
1256
 
1259
1257
  ### Session Storage Issues
@@ -1306,7 +1304,7 @@ If you're vetting `llm-cli-gateway` through [Socket](https://socket.dev/npm/pack
1306
1304
  | Alert | Where | Why it's bounded |
1307
1305
  | -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
1308
1306
  | **Network access** | `src/http-transport.ts` opens an HTTP MCP transport when started via `npm run start:http`. `src/endpoint-exposure.ts` issues a HEAD probe to verify configured public/tunnel URLs. Socket also flagged `dist/upstream-contracts.js` in v1.17.2 from descriptive text, not a network call. | The transport binds to `127.0.0.1` by default and requires `LLM_GATEWAY_AUTH_TOKEN` to be set. The default stdio MCP entry point (`npm start`) opens no sockets. `src/upstream-contracts.ts` stores provider CLI metadata and imports no HTTP client APIs. |
1309
- | **Shell access** | `src/executor.ts` uses `child_process.spawn(cmd, args, …)` to invoke the underlying LLM CLIs. | `spawn` is called with an argument array and **never** `shell: true`, so there is no shell interpolation path for caller input. The command name is restricted to an allow-list of known CLI binaries (`claude`, `codex`, `gemini`, `grok`, `vibe`). |
1307
+ | **Shell access** | `src/executor.ts` uses `child_process.spawn(cmd, args, …)` to invoke the underlying LLM CLIs. | `spawn` is called with an argument array and **never** `shell: true`, so there is no shell interpolation path for caller input. The command name is restricted to an allow-list of known CLI binaries (`claude`, `codex`, `agy`, `grok`, `vibe`). |
1310
1308
  | **Uses eval** | None in our source. Transitive: `@modelcontextprotocol/sdk` → `ajv@8` uses `new Function(...)` in `ajv/dist/compile/index.js` to compile JSON Schema validators. | This is ajv's standard codegen path. Only known schemas (defined in our source and the MCP SDK) flow into it; no caller-supplied data ever reaches the compiled function body. |
1311
1309
  | **SQLite adapter isolation** | Persistence uses Node's built-in `node:sqlite` module (no native binding, no install scripts) through a single adapter, `src/sqlite-driver.ts`. | `node:sqlite` is touched by exactly one production module (the adapter); every other module talks to SQLite through its typed surface. We never call any `db.pragma()` helper (it does not exist on `node:sqlite`); SQLite setup uses fixed literal `db.exec("PRAGMA ...")` statements. `npm run security:audit` fails the release if production code references `node:sqlite` outside the adapter or reintroduces a `.pragma()` call. |
1312
1310
  | **Dependency ownership** | A handful of small transitive packages (e.g. `media-typer` via `@modelcontextprotocol/sdk`) trip Socket's "unstable ownership" or "obfuscated code" heuristics. | These are pinned, well-known micro-deps in the Node ecosystem with no known issues. We pin direct override versions of `content-type` and `type-is` in `package.json#overrides`. As of 2.0.0 the prod graph carries no native module (`better-sqlite3` moved to devDependencies; `node:sqlite` is built into Node), eliminating the entire `prebuild-install`/`tar-fs`/`tar-stream` install-time chain. Our earlier direct dependency on `toml@3.0.0` was replaced with `smol-toml`. |
@@ -11,6 +11,7 @@ export interface AsyncJobFlightRecorderEntry {
11
11
  stablePrefixHash?: string;
12
12
  stablePrefixTokens?: number;
13
13
  cacheControlBlocks?: number;
14
+ cacheControlTtlSeconds?: number;
14
15
  }
15
16
  export type AsyncJobUsageExtractor = (stdout: string) => {
16
17
  inputTokens?: number;
@@ -61,10 +62,12 @@ export declare class AsyncJobManager {
61
62
  private onJobComplete?;
62
63
  private jobs;
63
64
  private evictionTimer;
65
+ private stallTimer;
64
66
  private processMonitor;
65
67
  private store;
66
68
  private flightRecorder;
67
69
  constructor(logger?: Logger, onJobComplete?: ((cli: LlmCli, durationMs: number, success: boolean) => void) | undefined, store?: JobStore | null, flightRecorder?: FlightRecorderLike);
70
+ checkStalledJobs(now?: number): void;
68
71
  hasStore(): boolean;
69
72
  private emitMetrics;
70
73
  private evictCompletedJobs;
@@ -1,6 +1,6 @@
1
1
  import { randomUUID } from "crypto";
2
- import { envWithExtendedPath, getExtendedPath, killProcessGroup, spawnCliProcess, unregisterProcessGroup, } from "./executor.js";
3
- import { noopLogger } from "./logger.js";
2
+ import { envWithExtendedPath, getExtendedPath, killProcessGroup, providerCommandName, spawnCliProcess, unregisterProcessGroup, } from "./executor.js";
3
+ import { noopLogger, logWarn } from "./logger.js";
4
4
  import { ProcessMonitor } from "./process-monitor.js";
5
5
  import { computeRequestKey } from "./job-store.js";
6
6
  import { NoopFlightRecorder } from "./flight-recorder.js";
@@ -8,6 +8,8 @@ const MAX_OUTPUT_SIZE = 50 * 1024 * 1024;
8
8
  const JOB_TTL_MS = 60 * 60 * 1000;
9
9
  const EVICTION_INTERVAL_MS = 5 * 60 * 1000;
10
10
  const OUTPUT_FLUSH_INTERVAL_MS = 1000;
11
+ const STALL_CHECK_INTERVAL_MS = 60 * 1000;
12
+ const STALL_WARNING_MARKS_MS = [5, 10, 15].map(min => min * 60 * 1000);
11
13
  function describeProcessLaunchError(cli, error) {
12
14
  const code = error.code;
13
15
  if (code === "ENOENT") {
@@ -55,6 +57,7 @@ export class AsyncJobManager {
55
57
  onJobComplete;
56
58
  jobs = new Map();
57
59
  evictionTimer = null;
60
+ stallTimer = null;
58
61
  processMonitor;
59
62
  store;
60
63
  flightRecorder;
@@ -97,6 +100,43 @@ export class AsyncJobManager {
97
100
  if (this.evictionTimer.unref) {
98
101
  this.evictionTimer.unref();
99
102
  }
103
+ this.stallTimer = setInterval(() => this.checkStalledJobs(), STALL_CHECK_INTERVAL_MS);
104
+ if (this.stallTimer.unref) {
105
+ this.stallTimer.unref();
106
+ }
107
+ }
108
+ checkStalledJobs(now = Date.now()) {
109
+ for (const job of this.jobs.values()) {
110
+ if (job.status !== "running")
111
+ continue;
112
+ if (Buffer.byteLength(job.stdout) > 0) {
113
+ job.stallWarnIndex = STALL_WARNING_MARKS_MS.length;
114
+ continue;
115
+ }
116
+ const idx = job.stallWarnIndex ?? 0;
117
+ if (idx >= STALL_WARNING_MARKS_MS.length)
118
+ continue;
119
+ const elapsedMs = now - new Date(job.startedAt).getTime();
120
+ if (elapsedMs < STALL_WARNING_MARKS_MS[idx])
121
+ continue;
122
+ let newIdx = idx;
123
+ while (newIdx < STALL_WARNING_MARKS_MS.length &&
124
+ elapsedMs >= STALL_WARNING_MARKS_MS[newIdx]) {
125
+ newIdx++;
126
+ }
127
+ job.stallWarnIndex = newIdx;
128
+ const crossedMarkMin = Math.round(STALL_WARNING_MARKS_MS[newIdx - 1] / 60000);
129
+ logWarn(this.logger, `Async job ${job.id} (${job.cli}) has produced no stdout after ~${crossedMarkMin}min — possible silent stall (issue #21)`, {
130
+ jobId: job.id,
131
+ cli: job.cli,
132
+ correlationId: job.correlationId,
133
+ elapsedMs,
134
+ stdoutBytes: 0,
135
+ stderrBytes: Buffer.byteLength(job.stderr),
136
+ model: job.flightRecorderEntry?.model,
137
+ promptLength: job.flightRecorderEntry?.prompt?.length,
138
+ });
139
+ }
100
140
  }
101
141
  hasStore() {
102
142
  return this.store !== null;
@@ -399,7 +439,7 @@ export class AsyncJobManager {
399
439
  }
400
440
  const id = randomUUID();
401
441
  const startedAt = new Date().toISOString();
402
- const command = cli === "mistral" ? "vibe" : cli;
442
+ const command = providerCommandName(cli);
403
443
  const baseEnv = envWithExtendedPath(process.env, getExtendedPath());
404
444
  const child = spawnCliProcess(command, args, {
405
445
  cwd,
@@ -475,6 +515,7 @@ export class AsyncJobManager {
475
515
  stablePrefixHash: flightRecorderEntry.stablePrefixHash,
476
516
  stablePrefixTokens: flightRecorderEntry.stablePrefixTokens,
477
517
  cacheControlBlocks: flightRecorderEntry.cacheControlBlocks,
518
+ cacheControlTtlSeconds: flightRecorderEntry.cacheControlTtlSeconds,
478
519
  });
479
520
  }
480
521
  catch (err) {
@@ -12,6 +12,8 @@ export interface SessionCacheStats {
12
12
  lastRequestAt: string | null;
13
13
  estimatedSavingsUsd: number;
14
14
  ttlRemainingMs: number | null;
15
+ latestCacheControlBlocks?: number | null;
16
+ latestCacheControlTtlSeconds?: number | null;
15
17
  }
16
18
  export interface PrefixCacheStats {
17
19
  stablePrefixHash: string;
@@ -10,7 +10,9 @@ export function computeSessionCacheStats(db, sessionId) {
10
10
  COALESCE(cache_read_tokens, 0) AS cache_read_tokens,
11
11
  COALESCE(cache_creation_tokens, 0) AS cache_creation_tokens,
12
12
  stable_prefix_hash,
13
- datetime_utc
13
+ datetime_utc,
14
+ cache_control_blocks,
15
+ cache_control_ttl_seconds
14
16
  FROM requests
15
17
  WHERE session_id = ?
16
18
  ORDER BY datetime_utc DESC`, sessionId);
@@ -51,6 +53,8 @@ export function computeSessionCacheStats(db, sessionId) {
51
53
  lastRequestAt: lastAt,
52
54
  estimatedSavingsUsd,
53
55
  ttlRemainingMs: null,
56
+ latestCacheControlBlocks: rows.length > 0 ? (rows[0].cache_control_blocks ?? null) : null,
57
+ latestCacheControlTtlSeconds: rows.length > 0 ? (rows[0].cache_control_ttl_seconds ?? null) : null,
54
58
  };
55
59
  }
56
60
  export function computeTtlRemaining(stats, cli, ttlPolicy) {
@@ -63,7 +67,14 @@ export function computeTtlRemaining(stats, cli, ttlPolicy) {
63
67
  if (!Number.isFinite(lastWriteMs))
64
68
  return null;
65
69
  const elapsedMs = nowMs - lastWriteMs;
66
- const ttlMs = ttlPolicy.anthropicTtlSeconds * 1000;
70
+ const isExplicit = typeof stats.latestCacheControlBlocks === "number" && stats.latestCacheControlBlocks > 0;
71
+ const recordedTtlSeconds = typeof stats.latestCacheControlTtlSeconds === "number" &&
72
+ Number.isFinite(stats.latestCacheControlTtlSeconds) &&
73
+ stats.latestCacheControlTtlSeconds > 0
74
+ ? stats.latestCacheControlTtlSeconds
75
+ : null;
76
+ const ttlSeconds = recordedTtlSeconds ?? (isExplicit ? 3600 : ttlPolicy.anthropicTtlSeconds);
77
+ const ttlMs = ttlSeconds * 1000;
67
78
  return Math.max(0, ttlMs - elapsedMs);
68
79
  }
69
80
  export function computePrefixCacheStats(db, stablePrefixHash) {
@@ -128,7 +139,8 @@ export function computeGlobalCacheStats(db, opts = {}) {
128
139
  COALESCE(cache_creation_tokens, 0) AS cache_creation_tokens,
129
140
  stable_prefix_hash,
130
141
  datetime_utc,
131
- cache_control_blocks
142
+ cache_control_blocks,
143
+ cache_control_ttl_seconds
132
144
  FROM requests
133
145
  WHERE datetime_utc >= ?`
134
146
  : `SELECT cli, model,
@@ -136,7 +148,8 @@ export function computeGlobalCacheStats(db, opts = {}) {
136
148
  COALESCE(cache_creation_tokens, 0) AS cache_creation_tokens,
137
149
  stable_prefix_hash,
138
150
  datetime_utc,
139
- cache_control_blocks
151
+ cache_control_blocks,
152
+ cache_control_ttl_seconds
140
153
  FROM requests`;
141
154
  const rows = sinceIso ? db.queryRequests(sql, sinceIso) : db.queryRequests(sql);
142
155
  const perCliMap = new Map();
@@ -1,5 +1,5 @@
1
1
  import { spawnSync } from "node:child_process";
2
- import { executeCli } from "./executor.js";
2
+ import { executeCli, providerCommandName } from "./executor.js";
3
3
  import { getProviderRuntimeStatus } from "./provider-status.js";
4
4
  const MISTRAL_VIBE_PACKAGE = "mistral-vibe";
5
5
  const LEGACY_VIBE_PACKAGE = "vibe-cli";
@@ -35,10 +35,7 @@ const VERSION_ARGS = {
35
35
  grok: ["--version"],
36
36
  mistral: ["--version"],
37
37
  };
38
- const NPM_PACKAGES = {
39
- codex: "@openai/codex",
40
- gemini: "@google/gemini-cli",
41
- };
38
+ const CODEX_NPM_PACKAGE = "@openai/codex";
42
39
  export function buildCliUpgradePlan(cli, target = "latest", detectMistral = detectMistralInstallMethod) {
43
40
  const normalizedTarget = normalizeTarget(target);
44
41
  if (cli === "mistral") {
@@ -96,17 +93,28 @@ export function buildCliUpgradePlan(cli, target = "latest", detectMistral = dete
96
93
  requiresNetwork: true,
97
94
  };
98
95
  }
99
- const packageName = cli === "codex" ? NPM_PACKAGES.codex : NPM_PACKAGES.gemini;
96
+ if (cli === "gemini") {
97
+ if (normalizedTarget !== "latest") {
98
+ throw new Error("Antigravity CLI upgrades support only the 'latest' target via 'agy update'.");
99
+ }
100
+ return {
101
+ cli,
102
+ target: normalizedTarget,
103
+ command: "agy",
104
+ args: ["update"],
105
+ strategy: "self-update",
106
+ requiresNetwork: true,
107
+ note: "Gemini provider requests now run through Google Antigravity CLI (`agy`).",
108
+ };
109
+ }
100
110
  return {
101
111
  cli,
102
112
  target: normalizedTarget,
103
113
  command: "npm",
104
- args: ["install", "-g", `${packageName}@${normalizedTarget}`],
114
+ args: ["install", "-g", `${CODEX_NPM_PACKAGE}@${normalizedTarget}`],
105
115
  strategy: "npm-global-install",
106
116
  requiresNetwork: true,
107
- note: cli === "codex"
108
- ? "Explicit Codex targets use the documented npm package path; latest can use 'codex update'."
109
- : "Gemini CLI does not expose a self-update command in the gateway-supported CLI surface, so upgrades use npm.",
117
+ note: "Explicit Codex targets use the documented npm package path; latest can use 'codex update'.",
110
118
  };
111
119
  }
112
120
  export async function getCliVersion(cli) {
@@ -115,7 +123,7 @@ export async function getCliVersion(cli) {
115
123
  const status = getProviderRuntimeStatus(cli);
116
124
  return {
117
125
  cli,
118
- command: cli,
126
+ command: status.command,
119
127
  args,
120
128
  installed: status.installed,
121
129
  version: status.version || undefined,
@@ -191,10 +199,11 @@ function buildMistralUpgradePlan(normalizedTarget, detectMistral) {
191
199
  }
192
200
  async function fallbackCliVersion(cli, args) {
193
201
  try {
194
- const result = await executeCli(cli, args, { timeout: 15_000 });
202
+ const command = providerCommandName(cli);
203
+ const result = await executeCli(command, args, { timeout: 15_000 });
195
204
  return {
196
205
  cli,
197
- command: cli,
206
+ command,
198
207
  args,
199
208
  installed: true,
200
209
  version: extractVersion(result.stdout, result.stderr),
@@ -13,6 +13,7 @@ export interface ExecuteResult {
13
13
  stderr: string;
14
14
  code: number;
15
15
  }
16
+ export declare function providerCommandName(command: string): string;
16
17
  export declare function buildExtendedPath(env?: NodeJS.ProcessEnv, home?: string, nodePath?: string, platform?: NodeJS.Platform): string;
17
18
  export declare function getExtendedPath(): string;
18
19
  export declare function envWithExtendedPath(baseEnv?: NodeJS.ProcessEnv, extendedPath?: string, platform?: NodeJS.Platform): NodeJS.ProcessEnv;
package/dist/executor.js CHANGED
@@ -3,6 +3,13 @@ import { homedir } from "os";
3
3
  import { delimiter, join, dirname, extname, win32 } from "path";
4
4
  import { readdirSync, existsSync } from "fs";
5
5
  import { createCircuitBreaker, withRetry } from "./retry.js";
6
+ export function providerCommandName(command) {
7
+ if (command === "gemini")
8
+ return "agy";
9
+ if (command === "mistral")
10
+ return "vibe";
11
+ return command;
12
+ }
6
13
  const MAX_OUTPUT_SIZE = 50 * 1024 * 1024;
7
14
  const circuitBreakers = new Map();
8
15
  let cachedNvmPath;
@@ -10,6 +10,7 @@ export interface FlightLogStart {
10
10
  stablePrefixHash?: string;
11
11
  stablePrefixTokens?: number;
12
12
  cacheControlBlocks?: number;
13
+ cacheControlTtlSeconds?: number;
13
14
  }
14
15
  export interface FlightLogResult {
15
16
  response: string;
@@ -31,6 +31,13 @@ function ensureCacheControlBlocksColumn(db) {
31
31
  db.exec("ALTER TABLE requests ADD COLUMN cache_control_blocks INTEGER");
32
32
  }
33
33
  }
34
+ function ensureCacheControlTtlSecondsColumn(db) {
35
+ const rows = db.prepare("PRAGMA table_info(requests)").all();
36
+ const names = new Set(rows.map((row) => (row && typeof row.name === "string" ? row.name : "")));
37
+ if (!names.has("cache_control_ttl_seconds")) {
38
+ db.exec("ALTER TABLE requests ADD COLUMN cache_control_ttl_seconds INTEGER");
39
+ }
40
+ }
34
41
  export function resolveFlightRecorderDbPath() {
35
42
  const configured = process.env.LLM_GATEWAY_LOGS_DB;
36
43
  if (configured !== undefined) {
@@ -144,6 +151,10 @@ export class FlightRecorder {
144
151
  this.db
145
152
  .prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(4, ?)")
146
153
  .run(new Date().toISOString());
154
+ ensureCacheControlTtlSecondsColumn(this.db);
155
+ this.db
156
+ .prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(5, ?)")
157
+ .run(new Date().toISOString());
147
158
  if (process.platform !== "win32") {
148
159
  try {
149
160
  chmodSync(dbPath, 0o600);
@@ -154,10 +165,10 @@ export class FlightRecorder {
154
165
  const insertRequest = this.db.prepare(`
155
166
  INSERT INTO requests (id, cli, model, prompt, system, session_id, datetime_utc,
156
167
  stable_prefix_hash, stable_prefix_tokens,
157
- cache_control_blocks)
168
+ cache_control_blocks, cache_control_ttl_seconds)
158
169
  VALUES (@id, @cli, @model, @prompt, @system, @session_id, @datetime_utc,
159
170
  @stable_prefix_hash, @stable_prefix_tokens,
160
- @cache_control_blocks)
171
+ @cache_control_blocks, @cache_control_ttl_seconds)
161
172
  `);
162
173
  const insertMetadata = this.db.prepare(`
163
174
  INSERT INTO gateway_metadata (request_id, async_job_id, status)
@@ -175,6 +186,7 @@ export class FlightRecorder {
175
186
  stable_prefix_hash: entry.stablePrefixHash ?? null,
176
187
  stable_prefix_tokens: entry.stablePrefixTokens ?? null,
177
188
  cache_control_blocks: entry.cacheControlBlocks ?? null,
189
+ cache_control_ttl_seconds: entry.cacheControlTtlSeconds ?? null,
178
190
  });
179
191
  insertMetadata.run({
180
192
  request_id: entry.correlationId,
package/dist/index.d.ts CHANGED
@@ -134,6 +134,7 @@ interface CliRequestPrep {
134
134
  stablePrefixTokens: number | null;
135
135
  stdinPayload?: string;
136
136
  cacheControlBlocks?: number;
137
+ cacheControlTtlSeconds?: number;
137
138
  warnings?: WarningEntry[];
138
139
  }
139
140
  export declare function prepareClaudeRequest(params: {