llm-cli-gateway 2.6.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +73 -0
- package/README.md +50 -8
- package/dist/async-job-manager.d.ts +1 -0
- package/dist/async-job-manager.js +1 -0
- package/dist/cache-stats.d.ts +2 -0
- package/dist/cache-stats.js +17 -4
- package/dist/doctor.d.ts +22 -0
- package/dist/doctor.js +45 -0
- package/dist/flight-recorder.d.ts +1 -0
- package/dist/flight-recorder.js +14 -2
- package/dist/index.d.ts +1 -0
- package/dist/index.js +87 -4
- package/dist/prompt-parts.js +5 -2
- package/dist/provider-tool-capabilities.d.ts +97 -0
- package/dist/provider-tool-capabilities.js +1138 -0
- package/dist/resources.js +51 -0
- package/dist/upstream-contracts.js +17 -5
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
- package/setup/status.schema.json +67 -6
- package/socket.yml +25 -2
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,79 @@ All notable changes to the llm-cli-gateway project.
|
|
|
4
4
|
|
|
5
5
|
## Unreleased
|
|
6
6
|
|
|
7
|
+
## [2.7.0] - 2026-06-12: Provider capability inventory
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- Added `provider_tool_capabilities`, a read-only MCP tool that reports the
|
|
12
|
+
gateway request tools, provider kind, supported controls, feature flags,
|
|
13
|
+
model info, config-surface hints, local skill discovery, provider-native tool
|
|
14
|
+
discovery, unsupported/degraded inputs, warnings, and cache metadata for
|
|
15
|
+
Claude Code, Codex CLI, Gemini/Antigravity (`agy`), Grok CLI, optional
|
|
16
|
+
`grok_api`, and Mistral Vibe.
|
|
17
|
+
- Added `provider-tools://catalog` and per-provider resources
|
|
18
|
+
`provider-tools://claude`, `provider-tools://codex`,
|
|
19
|
+
`provider-tools://gemini`, `provider-tools://grok`,
|
|
20
|
+
`provider-tools://grok_api`, and `provider-tools://mistral` for clients that
|
|
21
|
+
prefer resource discovery over tool calls.
|
|
22
|
+
- Added `doctor --json` `provider_capabilities`, a compact setup-assistant
|
|
23
|
+
summary with schema version, resource URIs, per-provider request tools,
|
|
24
|
+
supported feature names, unsupported input names, config-surface counts,
|
|
25
|
+
discovered skill/tool counts, and warnings without raw local paths.
|
|
26
|
+
- Added bounded, redacted local skill/config discovery for provider capability
|
|
27
|
+
reporting. Grok local/bundled skills can now surface provider-native tools
|
|
28
|
+
such as Imagine `image_gen`, `image_edit`, `image_to_video`, and
|
|
29
|
+
`reference_to_video` when present, while keeping execution routed through
|
|
30
|
+
`grok_request`.
|
|
31
|
+
|
|
32
|
+
### Changed
|
|
33
|
+
|
|
34
|
+
- Updated agent skills so LLM agents have provider-specific gateway usage
|
|
35
|
+
guidance for Claude, Codex, Gemini/Antigravity (`agy`), Grok, and Mistral
|
|
36
|
+
Vibe, and so orchestration skills check `provider_tool_capabilities` before
|
|
37
|
+
assuming tool allowlists, MCP-server semantics, sessions, output formats, or
|
|
38
|
+
provider-native tools.
|
|
39
|
+
- Updated setup assistant guidance and `setup/status.schema.json` so install
|
|
40
|
+
agents treat `doctor.provider_capabilities` as the compact source of truth
|
|
41
|
+
for outbound provider capability claims.
|
|
42
|
+
- Documented the intentionally published prod-only `npm-shrinkwrap.json` in
|
|
43
|
+
README and `socket.yml`, including the release audit and packed-consumer
|
|
44
|
+
checks that bound the shrinkwrap and shell-spawn Socket alerts.
|
|
45
|
+
|
|
46
|
+
### Fixed
|
|
47
|
+
|
|
48
|
+
- Corrected stale internal skill guidance that described Codex continuity as
|
|
49
|
+
bookkeeping-only, described Gemini allowlists/MCP allowlists as pass-through,
|
|
50
|
+
omitted Mistral async from async orchestration docs, or encouraged copying
|
|
51
|
+
Claude tool names into other provider allowlists.
|
|
52
|
+
|
|
53
|
+
## [2.6.3] - 2026-06-12: Claude cache-control veracity and Grok 0.2.50
|
|
54
|
+
|
|
55
|
+
### Fixed
|
|
56
|
+
|
|
57
|
+
- Claude `promptParts` cache-control stream-json payloads now preserve the
|
|
58
|
+
exact assembled prompt bytes: concatenating emitted Claude content blocks
|
|
59
|
+
matches `assemble(parts).text`, including stable-part separators.
|
|
60
|
+
- Empty or omitted stable-part `cacheControl` markers are now treated as a
|
|
61
|
+
no-op: they do not force the Claude stdin cache-control path, do not suppress
|
|
62
|
+
opt-in auto-emission, and return a `cache_control_noop` warning.
|
|
63
|
+
- Flight-recorder rows now persist the actual emitted
|
|
64
|
+
`cache_control_ttl_seconds`, and cache-state TTL reporting prefers that row
|
|
65
|
+
value while retaining a 1-hour compatibility fallback for legacy
|
|
66
|
+
`cache_control_blocks` rows.
|
|
67
|
+
- Provider cache docs now describe the verified Claude stream-json
|
|
68
|
+
`cache_control` path and the remaining hidden-request limits accurately,
|
|
69
|
+
including async flight-recorder metadata and slice κ TTL handoff.
|
|
70
|
+
|
|
71
|
+
### Upstream provider maintenance
|
|
72
|
+
|
|
73
|
+
- Grok Build stable `0.2.50` contract refresh: `--debug` and `--debug-file`
|
|
74
|
+
are acknowledged as upstream-only help/probe flags at top level and across
|
|
75
|
+
subcommands without becoming gateway argv allowlist flags.
|
|
76
|
+
- Declared `grok agent leader --relay-on-demand` on the non-exposed agent
|
|
77
|
+
leader subcommand, refreshed `docs/upstream/snapshots/grok.json`, and added
|
|
78
|
+
the 2026-06-12 Grok upstream scan report.
|
|
79
|
+
|
|
7
80
|
## [2.6.0] - 2026-06-12: Gemini provider on Google Antigravity CLI
|
|
8
81
|
|
|
9
82
|
### Changed
|
package/README.md
CHANGED
|
@@ -62,6 +62,8 @@ The next documentation focus is provider-specific skill and DAG-TOML pairs for e
|
|
|
62
62
|
- Security CI runs actionlint, zizmor, shellcheck, typos, osv-scanner, gitleaks, and lychee.
|
|
63
63
|
- GitHub release installer artifacts are checksummed and signed with Sigstore keyless signing.
|
|
64
64
|
- npm releases use provenance through OIDC trusted publishing.
|
|
65
|
+
- The npm package intentionally ships a generated, prod-only `npm-shrinkwrap.json` so registry installs resolve the audited release tree. Release gates regenerate it from `package-lock.json`, compare for parity, and run a registry-fidelity consumer install before publishing.
|
|
66
|
+
- Socket behavioural alerts are documented in [`socket.yml`](./socket.yml) and under "Security Considerations" below. `shellAccess` and `shrinkwrap` are reviewed package capabilities/configuration for this CLI appliance, not hidden install behaviour.
|
|
65
67
|
|
|
66
68
|
## Personal MCP Appliance
|
|
67
69
|
|
|
@@ -167,6 +169,7 @@ docker compose -f docker/personal.compose.yml run --rm doctor
|
|
|
167
169
|
- **SQLite Flight Recorder**: Every request/response logged to `~/.llm-cli-gateway/logs.db` with correlation IDs, token usage, duration, retry counts, and circuit breaker state. Browse with [Datasette](https://datasette.io/): `datasette ~/.llm-cli-gateway/logs.db`
|
|
168
170
|
- **Structured Metadata**: Tool responses include machine-readable `structuredContent` (model, cli, correlationId, sessionId, durationMs, token counts)
|
|
169
171
|
- **Cache observability resources**: `cache-state://global`, `cache-state://session/{id}`, and `cache-state://prefix/{hash}` MCP resources return aggregate cache hit/miss/savings — tokens and hashes only, no prompt text. `session_get` includes a `cacheState` block when the session has prior requests.
|
|
172
|
+
- **Provider capability inventory**: `provider_tool_capabilities` and `provider-tools://catalog` expose the gateway request fields, supported/degraded provider controls, local skill/tool discovery, and safe config-surface hints for Claude Code, Codex CLI, Gemini/Antigravity, Grok CLI/API, and Mistral Vibe. `doctor --json` includes a compact `provider_capabilities` summary for setup assistants.
|
|
170
173
|
|
|
171
174
|
### Cache-aware operation
|
|
172
175
|
|
|
@@ -1019,6 +1022,42 @@ GEMINI_HISTORY_ROOT=/path/to/.gemini/tmp
|
|
|
1019
1022
|
LLM_GATEWAY_DISABLE_MODEL_DISCOVERY=1
|
|
1020
1023
|
```
|
|
1021
1024
|
|
|
1025
|
+
##### `provider_tool_capabilities`
|
|
1026
|
+
|
|
1027
|
+
Report the provider tool and feature capability catalog. Use this before
|
|
1028
|
+
orchestrating provider-specific requests so callers can distinguish supported
|
|
1029
|
+
controls, provider-owned configuration, ignored parity fields, and unsupported
|
|
1030
|
+
inputs.
|
|
1031
|
+
|
|
1032
|
+
**Parameters:**
|
|
1033
|
+
|
|
1034
|
+
- `cli` (string, optional): Provider filter (`"claude"`, `"codex"`, `"gemini"`, `"grok"`, `"grok_api"`, or `"mistral"`)
|
|
1035
|
+
- `includeSkills` (boolean, default `true`): Include bounded local skill discovery
|
|
1036
|
+
- `includeProviderTools` (boolean, default `true`): Include provider-native tools extracted from discovered skills
|
|
1037
|
+
- `includeUnsupported` (boolean, default `true`): Include explicit unsupported/degraded input records
|
|
1038
|
+
- `includePaths` (boolean, default `false`): Include raw local filesystem paths in discovery output
|
|
1039
|
+
- `refresh` (boolean, default `false`): Bypass the short-lived capability cache
|
|
1040
|
+
|
|
1041
|
+
The response schema is `provider-tool-capabilities.v2`. Capability discovery is
|
|
1042
|
+
read-only and bounded; raw local paths are redacted unless `includePaths` is
|
|
1043
|
+
explicitly true, and secret-bearing auth files are not read.
|
|
1044
|
+
|
|
1045
|
+
Equivalent MCP resources:
|
|
1046
|
+
|
|
1047
|
+
- `provider-tools://catalog`: full provider catalog
|
|
1048
|
+
- `provider-tools://claude`
|
|
1049
|
+
- `provider-tools://codex`
|
|
1050
|
+
- `provider-tools://gemini`
|
|
1051
|
+
- `provider-tools://grok`
|
|
1052
|
+
- `provider-tools://grok_api`
|
|
1053
|
+
- `provider-tools://mistral`
|
|
1054
|
+
|
|
1055
|
+
`doctor --json` also emits a compact `provider_capabilities` block with the
|
|
1056
|
+
same schema version, per-provider request tool names, supported feature names,
|
|
1057
|
+
unsupported input names, config-surface counts, discovery counts, and resource
|
|
1058
|
+
URIs. This block is intended for setup assistants that need a concise capability
|
|
1059
|
+
summary without local skill bodies or raw paths.
|
|
1060
|
+
|
|
1022
1061
|
##### `cli_versions`
|
|
1023
1062
|
|
|
1024
1063
|
Report installed CLI versions.
|
|
@@ -1299,15 +1338,18 @@ The gateway supports concurrent requests across different CLIs. Each request spa
|
|
|
1299
1338
|
|
|
1300
1339
|
### Socket alerts — context for reviewers
|
|
1301
1340
|
|
|
1302
|
-
If you're vetting `llm-cli-gateway` through [Socket](https://socket.dev/npm/package/llm-cli-gateway) or a similar supply-chain scanner, you'll see behavioural alerts and some dependency-ownership alerts. They are accurate descriptions of what the package does and what it depends on. The reviewed `shellAccess`
|
|
1341
|
+
If you're vetting `llm-cli-gateway` through [Socket](https://socket.dev/npm/package/llm-cli-gateway) or a similar supply-chain scanner, you'll see behavioural alerts and some dependency-ownership alerts. They are accurate descriptions of what the package does and what it depends on. The reviewed `shellAccess` and `shrinkwrap` entries are configured in `socket.yml` for repository/PR policy surfaces, but Socket's public package page may still display them for the published npm artifact; the rationale remains documented here and in the package.
|
|
1342
|
+
|
|
1343
|
+
The currently flagged surfaces are not new in 2.6.x: the 2.3.0, 2.4.0, 2.5.0, and 2.6.3 npm tarballs all include `npm-shrinkwrap.json`, and all include the same `dist/executor.js` child-process spawn surface used to run provider CLIs. The `socket.yml` policy for 2.4.0, 2.5.0, 2.6.0, and 2.6.3 is materially the same for `shellAccess`; this README now adds the missing shrinkwrap disclosure as well.
|
|
1303
1344
|
|
|
1304
|
-
| Alert
|
|
1305
|
-
|
|
|
1306
|
-
| **Network access**
|
|
1307
|
-
| **Shell access**
|
|
1308
|
-
| **
|
|
1309
|
-
| **
|
|
1310
|
-
| **
|
|
1345
|
+
| Alert | Where | Why it's bounded |
|
|
1346
|
+
| ---------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
1347
|
+
| **Network access** | `src/http-transport.ts` opens an HTTP MCP transport when started via `npm run start:http`. `src/endpoint-exposure.ts` issues a HEAD probe to verify configured public/tunnel URLs. Socket also flagged `dist/upstream-contracts.js` in v1.17.2 from descriptive text, not a network call. | The transport binds to `127.0.0.1` by default and requires `LLM_GATEWAY_AUTH_TOKEN` to be set. The default stdio MCP entry point (`npm start`) opens no sockets. `src/upstream-contracts.ts` stores provider CLI metadata and imports no HTTP client APIs. |
|
|
1348
|
+
| **Shell access** | `src/executor.ts` uses `child_process.spawn(cmd, args, …)` to invoke the underlying LLM CLIs. | `spawn` is called with an argument array and **never** `shell: true`, so there is no shell interpolation path for caller input. The command name is restricted to an allow-list of known CLI binaries (`claude`, `codex`, `agy`, `grok`, `vibe`). |
|
|
1349
|
+
| **Published shrinkwrap** | The npm artifact includes `npm-shrinkwrap.json`; `package.json#files` includes it and `scripts/make-prod-shrinkwrap.mjs` generates it from `package-lock.json`. | This is a CLI/application package. npm documents the shrinkwrap use case for applications, daemons, and command-line tools published through the registry. Our shrinkwrap is a prod-only projection, not a committed full dev lockfile: `scripts/release-security-audit.sh` verifies parity with the audited lockfile, and `scripts/verify-registry-install.sh` proves fresh registry consumers receive no `better-sqlite3`/`prebuild-install`/`tar-fs`/`tar-stream` production chain. |
|
|
1350
|
+
| **Uses eval** | None in our source. Transitive: `@modelcontextprotocol/sdk` → `ajv@8` uses `new Function(...)` in `ajv/dist/compile/index.js` to compile JSON Schema validators. | This is ajv's standard codegen path. Only known schemas (defined in our source and the MCP SDK) flow into it; no caller-supplied data ever reaches the compiled function body. |
|
|
1351
|
+
| **SQLite adapter isolation** | Persistence uses Node's built-in `node:sqlite` module (no native binding, no install scripts) through a single adapter, `src/sqlite-driver.ts`. | `node:sqlite` is touched by exactly one production module (the adapter); every other module talks to SQLite through its typed surface. We never call any `db.pragma()` helper (it does not exist on `node:sqlite`); SQLite setup uses fixed literal `db.exec("PRAGMA ...")` statements. `npm run security:audit` fails the release if production code references `node:sqlite` outside the adapter or reintroduces a `.pragma()` call. |
|
|
1352
|
+
| **Dependency ownership** | A handful of small transitive packages (e.g. `media-typer` via `@modelcontextprotocol/sdk`) trip Socket's "unstable ownership" or "obfuscated code" heuristics. | These are pinned, well-known micro-deps in the Node ecosystem with no known issues. We pin direct override versions of `content-type` and `type-is` in `package.json#overrides`. As of 2.0.0 the prod graph carries no native module (`better-sqlite3` moved to devDependencies; `node:sqlite` is built into Node), eliminating the entire `prebuild-install`/`tar-fs`/`tar-stream` install-time chain. Our earlier direct dependency on `toml@3.0.0` was replaced with `smol-toml`. |
|
|
1311
1353
|
|
|
1312
1354
|
See [`socket.yml`](./socket.yml) for the same context in machine-readable form.
|
|
1313
1355
|
|
|
@@ -11,6 +11,7 @@ export interface AsyncJobFlightRecorderEntry {
|
|
|
11
11
|
stablePrefixHash?: string;
|
|
12
12
|
stablePrefixTokens?: number;
|
|
13
13
|
cacheControlBlocks?: number;
|
|
14
|
+
cacheControlTtlSeconds?: number;
|
|
14
15
|
}
|
|
15
16
|
export type AsyncJobUsageExtractor = (stdout: string) => {
|
|
16
17
|
inputTokens?: number;
|
|
@@ -515,6 +515,7 @@ export class AsyncJobManager {
|
|
|
515
515
|
stablePrefixHash: flightRecorderEntry.stablePrefixHash,
|
|
516
516
|
stablePrefixTokens: flightRecorderEntry.stablePrefixTokens,
|
|
517
517
|
cacheControlBlocks: flightRecorderEntry.cacheControlBlocks,
|
|
518
|
+
cacheControlTtlSeconds: flightRecorderEntry.cacheControlTtlSeconds,
|
|
518
519
|
});
|
|
519
520
|
}
|
|
520
521
|
catch (err) {
|
package/dist/cache-stats.d.ts
CHANGED
|
@@ -12,6 +12,8 @@ export interface SessionCacheStats {
|
|
|
12
12
|
lastRequestAt: string | null;
|
|
13
13
|
estimatedSavingsUsd: number;
|
|
14
14
|
ttlRemainingMs: number | null;
|
|
15
|
+
latestCacheControlBlocks?: number | null;
|
|
16
|
+
latestCacheControlTtlSeconds?: number | null;
|
|
15
17
|
}
|
|
16
18
|
export interface PrefixCacheStats {
|
|
17
19
|
stablePrefixHash: string;
|
package/dist/cache-stats.js
CHANGED
|
@@ -10,7 +10,9 @@ export function computeSessionCacheStats(db, sessionId) {
|
|
|
10
10
|
COALESCE(cache_read_tokens, 0) AS cache_read_tokens,
|
|
11
11
|
COALESCE(cache_creation_tokens, 0) AS cache_creation_tokens,
|
|
12
12
|
stable_prefix_hash,
|
|
13
|
-
datetime_utc
|
|
13
|
+
datetime_utc,
|
|
14
|
+
cache_control_blocks,
|
|
15
|
+
cache_control_ttl_seconds
|
|
14
16
|
FROM requests
|
|
15
17
|
WHERE session_id = ?
|
|
16
18
|
ORDER BY datetime_utc DESC`, sessionId);
|
|
@@ -51,6 +53,8 @@ export function computeSessionCacheStats(db, sessionId) {
|
|
|
51
53
|
lastRequestAt: lastAt,
|
|
52
54
|
estimatedSavingsUsd,
|
|
53
55
|
ttlRemainingMs: null,
|
|
56
|
+
latestCacheControlBlocks: rows.length > 0 ? (rows[0].cache_control_blocks ?? null) : null,
|
|
57
|
+
latestCacheControlTtlSeconds: rows.length > 0 ? (rows[0].cache_control_ttl_seconds ?? null) : null,
|
|
54
58
|
};
|
|
55
59
|
}
|
|
56
60
|
export function computeTtlRemaining(stats, cli, ttlPolicy) {
|
|
@@ -63,7 +67,14 @@ export function computeTtlRemaining(stats, cli, ttlPolicy) {
|
|
|
63
67
|
if (!Number.isFinite(lastWriteMs))
|
|
64
68
|
return null;
|
|
65
69
|
const elapsedMs = nowMs - lastWriteMs;
|
|
66
|
-
const
|
|
70
|
+
const isExplicit = typeof stats.latestCacheControlBlocks === "number" && stats.latestCacheControlBlocks > 0;
|
|
71
|
+
const recordedTtlSeconds = typeof stats.latestCacheControlTtlSeconds === "number" &&
|
|
72
|
+
Number.isFinite(stats.latestCacheControlTtlSeconds) &&
|
|
73
|
+
stats.latestCacheControlTtlSeconds > 0
|
|
74
|
+
? stats.latestCacheControlTtlSeconds
|
|
75
|
+
: null;
|
|
76
|
+
const ttlSeconds = recordedTtlSeconds ?? (isExplicit ? 3600 : ttlPolicy.anthropicTtlSeconds);
|
|
77
|
+
const ttlMs = ttlSeconds * 1000;
|
|
67
78
|
return Math.max(0, ttlMs - elapsedMs);
|
|
68
79
|
}
|
|
69
80
|
export function computePrefixCacheStats(db, stablePrefixHash) {
|
|
@@ -128,7 +139,8 @@ export function computeGlobalCacheStats(db, opts = {}) {
|
|
|
128
139
|
COALESCE(cache_creation_tokens, 0) AS cache_creation_tokens,
|
|
129
140
|
stable_prefix_hash,
|
|
130
141
|
datetime_utc,
|
|
131
|
-
cache_control_blocks
|
|
142
|
+
cache_control_blocks,
|
|
143
|
+
cache_control_ttl_seconds
|
|
132
144
|
FROM requests
|
|
133
145
|
WHERE datetime_utc >= ?`
|
|
134
146
|
: `SELECT cli, model,
|
|
@@ -136,7 +148,8 @@ export function computeGlobalCacheStats(db, opts = {}) {
|
|
|
136
148
|
COALESCE(cache_creation_tokens, 0) AS cache_creation_tokens,
|
|
137
149
|
stable_prefix_hash,
|
|
138
150
|
datetime_utc,
|
|
139
|
-
cache_control_blocks
|
|
151
|
+
cache_control_blocks,
|
|
152
|
+
cache_control_ttl_seconds
|
|
140
153
|
FROM requests`;
|
|
141
154
|
const rows = sinceIso ? db.queryRequests(sql, sinceIso) : db.queryRequests(sql);
|
|
142
155
|
const perCliMap = new Map();
|
package/dist/doctor.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { type EndpointExposureReport } from "./endpoint-exposure.js";
|
|
|
2
2
|
import { type ProviderLoginStatus } from "./provider-status.js";
|
|
3
3
|
import type { FlightRecorderQuery } from "./flight-recorder.js";
|
|
4
4
|
import { type CacheAwarenessConfig } from "./config.js";
|
|
5
|
+
import { type ProviderCapabilityId, type ProviderKind } from "./provider-tool-capabilities.js";
|
|
5
6
|
export type CliType = "claude" | "codex" | "gemini" | "grok" | "mistral";
|
|
6
7
|
export interface CacheAwarenessReport {
|
|
7
8
|
enabled_features: Array<"anthropic_cache_control" | "ttl_warnings">;
|
|
@@ -17,6 +18,26 @@ export interface CacheAwarenessReport {
|
|
|
17
18
|
total_cache_read_tokens: number;
|
|
18
19
|
}>>;
|
|
19
20
|
}
|
|
21
|
+
export interface ProviderCapabilitySummaryReport {
|
|
22
|
+
schema_version: "provider-tool-capabilities.v2";
|
|
23
|
+
tool: "provider_tool_capabilities";
|
|
24
|
+
resources: {
|
|
25
|
+
catalog: "provider-tools://catalog";
|
|
26
|
+
providers: Record<ProviderCapabilityId, string>;
|
|
27
|
+
};
|
|
28
|
+
cache_ttl_ms: number;
|
|
29
|
+
providers: Record<ProviderCapabilityId, {
|
|
30
|
+
provider_kind: ProviderKind;
|
|
31
|
+
cli_available: boolean;
|
|
32
|
+
gateway_request_tools: string[];
|
|
33
|
+
supported_features: string[];
|
|
34
|
+
unsupported_inputs: string[];
|
|
35
|
+
config_surface_count: number;
|
|
36
|
+
discovered_skill_count: number;
|
|
37
|
+
discovered_provider_tool_count: number;
|
|
38
|
+
warnings: string[];
|
|
39
|
+
}>;
|
|
40
|
+
}
|
|
20
41
|
export interface VibeSessionLoggingStatus {
|
|
21
42
|
config_path: string;
|
|
22
43
|
config_present: boolean;
|
|
@@ -116,6 +137,7 @@ export interface DoctorReport {
|
|
|
116
137
|
vibe_session_logging: VibeSessionLoggingStatus;
|
|
117
138
|
};
|
|
118
139
|
cache_awareness: CacheAwarenessReport;
|
|
140
|
+
provider_capabilities: ProviderCapabilitySummaryReport;
|
|
119
141
|
upstream: {
|
|
120
142
|
note: string;
|
|
121
143
|
recommendation: string;
|
package/dist/doctor.js
CHANGED
|
@@ -11,6 +11,7 @@ import { loadWorkspaceRegistry } from "./workspace-registry.js";
|
|
|
11
11
|
import { computeGlobalCacheStats } from "./cache-stats.js";
|
|
12
12
|
import { FlightRecorder, resolveFlightRecorderDbPath } from "./flight-recorder.js";
|
|
13
13
|
import { buildUpstreamContractReport } from "./upstream-contracts.js";
|
|
14
|
+
import { getProviderToolCapabilities, providerCapabilityIds, } from "./provider-tool-capabilities.js";
|
|
14
15
|
export function checkVibeSessionLogging(home = homedir()) {
|
|
15
16
|
const configPath = join(home, ".vibe", "config.toml");
|
|
16
17
|
if (!existsSync(configPath)) {
|
|
@@ -226,6 +227,49 @@ function buildCacheAwarenessReport(opts) {
|
|
|
226
227
|
per_cli: perCli,
|
|
227
228
|
};
|
|
228
229
|
}
|
|
230
|
+
function buildProviderCapabilitySummary(providerStatuses) {
|
|
231
|
+
const capabilities = getProviderToolCapabilities({
|
|
232
|
+
includeSkills: true,
|
|
233
|
+
includeProviderTools: true,
|
|
234
|
+
includeUnsupported: true,
|
|
235
|
+
includePaths: false,
|
|
236
|
+
});
|
|
237
|
+
const providers = Object.fromEntries(providerCapabilityIds().map(provider => {
|
|
238
|
+
const capability = capabilities[provider];
|
|
239
|
+
if (!capability) {
|
|
240
|
+
throw new Error(`Missing provider capability record for ${provider}`);
|
|
241
|
+
}
|
|
242
|
+
const cliAvailable = provider === "grok_api"
|
|
243
|
+
? capability.gatewayRequestTools.includes("grok_api_request")
|
|
244
|
+
: providerStatuses[provider].installed;
|
|
245
|
+
return [
|
|
246
|
+
provider,
|
|
247
|
+
{
|
|
248
|
+
provider_kind: capability.providerKind,
|
|
249
|
+
cli_available: cliAvailable,
|
|
250
|
+
gateway_request_tools: capability.gatewayRequestTools,
|
|
251
|
+
supported_features: Object.entries(capability.features)
|
|
252
|
+
.filter(([, feature]) => feature.supported)
|
|
253
|
+
.map(([name]) => name),
|
|
254
|
+
unsupported_inputs: capability.unsupportedInputs.map(input => input.input),
|
|
255
|
+
config_surface_count: capability.configSurfaces.length,
|
|
256
|
+
discovered_skill_count: capability.discoveredSkills.length,
|
|
257
|
+
discovered_provider_tool_count: capability.discoveredProviderTools.length,
|
|
258
|
+
warnings: capability.warnings,
|
|
259
|
+
},
|
|
260
|
+
];
|
|
261
|
+
}));
|
|
262
|
+
return {
|
|
263
|
+
schema_version: "provider-tool-capabilities.v2",
|
|
264
|
+
tool: "provider_tool_capabilities",
|
|
265
|
+
resources: {
|
|
266
|
+
catalog: "provider-tools://catalog",
|
|
267
|
+
providers: Object.fromEntries(providerCapabilityIds().map(provider => [provider, `provider-tools://${provider}`])),
|
|
268
|
+
},
|
|
269
|
+
cache_ttl_ms: 60_000,
|
|
270
|
+
providers,
|
|
271
|
+
};
|
|
272
|
+
}
|
|
229
273
|
export function createDoctorReport(envOrOptions = process.env) {
|
|
230
274
|
const opts = isCreateDoctorReportOptions(envOrOptions)
|
|
231
275
|
? envOrOptions
|
|
@@ -316,6 +360,7 @@ export function createDoctorReport(envOrOptions = process.env) {
|
|
|
316
360
|
endpoint_exposure: endpointExposure,
|
|
317
361
|
client_config: clientConfigStatus(),
|
|
318
362
|
cache_awareness: buildCacheAwarenessReport(opts),
|
|
363
|
+
provider_capabilities: buildProviderCapabilitySummary(providerStatuses),
|
|
319
364
|
upstream,
|
|
320
365
|
next_actions: [],
|
|
321
366
|
};
|
package/dist/flight-recorder.js
CHANGED
|
@@ -31,6 +31,13 @@ function ensureCacheControlBlocksColumn(db) {
|
|
|
31
31
|
db.exec("ALTER TABLE requests ADD COLUMN cache_control_blocks INTEGER");
|
|
32
32
|
}
|
|
33
33
|
}
|
|
34
|
+
function ensureCacheControlTtlSecondsColumn(db) {
|
|
35
|
+
const rows = db.prepare("PRAGMA table_info(requests)").all();
|
|
36
|
+
const names = new Set(rows.map((row) => (row && typeof row.name === "string" ? row.name : "")));
|
|
37
|
+
if (!names.has("cache_control_ttl_seconds")) {
|
|
38
|
+
db.exec("ALTER TABLE requests ADD COLUMN cache_control_ttl_seconds INTEGER");
|
|
39
|
+
}
|
|
40
|
+
}
|
|
34
41
|
export function resolveFlightRecorderDbPath() {
|
|
35
42
|
const configured = process.env.LLM_GATEWAY_LOGS_DB;
|
|
36
43
|
if (configured !== undefined) {
|
|
@@ -144,6 +151,10 @@ export class FlightRecorder {
|
|
|
144
151
|
this.db
|
|
145
152
|
.prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(4, ?)")
|
|
146
153
|
.run(new Date().toISOString());
|
|
154
|
+
ensureCacheControlTtlSecondsColumn(this.db);
|
|
155
|
+
this.db
|
|
156
|
+
.prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(5, ?)")
|
|
157
|
+
.run(new Date().toISOString());
|
|
147
158
|
if (process.platform !== "win32") {
|
|
148
159
|
try {
|
|
149
160
|
chmodSync(dbPath, 0o600);
|
|
@@ -154,10 +165,10 @@ export class FlightRecorder {
|
|
|
154
165
|
const insertRequest = this.db.prepare(`
|
|
155
166
|
INSERT INTO requests (id, cli, model, prompt, system, session_id, datetime_utc,
|
|
156
167
|
stable_prefix_hash, stable_prefix_tokens,
|
|
157
|
-
cache_control_blocks)
|
|
168
|
+
cache_control_blocks, cache_control_ttl_seconds)
|
|
158
169
|
VALUES (@id, @cli, @model, @prompt, @system, @session_id, @datetime_utc,
|
|
159
170
|
@stable_prefix_hash, @stable_prefix_tokens,
|
|
160
|
-
@cache_control_blocks)
|
|
171
|
+
@cache_control_blocks, @cache_control_ttl_seconds)
|
|
161
172
|
`);
|
|
162
173
|
const insertMetadata = this.db.prepare(`
|
|
163
174
|
INSERT INTO gateway_metadata (request_id, async_job_id, status)
|
|
@@ -175,6 +186,7 @@ export class FlightRecorder {
|
|
|
175
186
|
stable_prefix_hash: entry.stablePrefixHash ?? null,
|
|
176
187
|
stable_prefix_tokens: entry.stablePrefixTokens ?? null,
|
|
177
188
|
cache_control_blocks: entry.cacheControlBlocks ?? null,
|
|
189
|
+
cache_control_ttl_seconds: entry.cacheControlTtlSeconds ?? null,
|
|
178
190
|
});
|
|
179
191
|
insertMetadata.run({
|
|
180
192
|
request_id: entry.correlationId,
|
package/dist/index.d.ts
CHANGED
|
@@ -134,6 +134,7 @@ interface CliRequestPrep {
|
|
|
134
134
|
stablePrefixTokens: number | null;
|
|
135
135
|
stdinPayload?: string;
|
|
136
136
|
cacheControlBlocks?: number;
|
|
137
|
+
cacheControlTtlSeconds?: number;
|
|
137
138
|
warnings?: WarningEntry[];
|
|
138
139
|
}
|
|
139
140
|
export declare function prepareClaudeRequest(params: {
|
package/dist/index.js
CHANGED
|
@@ -22,6 +22,7 @@ import { loadConfig, loadPersistenceConfig, loadCacheAwarenessConfig, loadProvid
|
|
|
22
22
|
import { createXaiResponse, XaiApiError, } from "./xai-api-provider.js";
|
|
23
23
|
import { checkHealth } from "./health.js";
|
|
24
24
|
import { clearModelRegistryCache, getAvailableCliInfo, getCliInfo, resolveModelAlias, } from "./model-registry.js";
|
|
25
|
+
import { getProviderToolCapabilities } from "./provider-tool-capabilities.js";
|
|
25
26
|
import { AsyncJobManager, } from "./async-job-manager.js";
|
|
26
27
|
import { createJobStore } from "./job-store.js";
|
|
27
28
|
import { ApprovalManager } from "./approval-manager.js";
|
|
@@ -761,6 +762,7 @@ function buildAsyncFlightRecorderHandoff(cliName, prep, sessionId, outputFormat)
|
|
|
761
762
|
stablePrefixHash: prep.stablePrefixHash ?? undefined,
|
|
762
763
|
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
763
764
|
cacheControlBlocks: prep.cacheControlBlocks,
|
|
765
|
+
cacheControlTtlSeconds: prep.cacheControlTtlSeconds,
|
|
764
766
|
},
|
|
765
767
|
extractUsage: (stdout) => extractUsageAndCost(cli, stdout, fmt, { sessionId: sid, home }),
|
|
766
768
|
};
|
|
@@ -1048,6 +1050,27 @@ function registerBaseResources(server, runtime) {
|
|
|
1048
1050
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
1049
1051
|
return { contents: contents ? [contents] : [] };
|
|
1050
1052
|
});
|
|
1053
|
+
server.registerResource("provider-tools-catalog", "provider-tools://catalog", {
|
|
1054
|
+
title: "Provider Tool Capabilities Catalog",
|
|
1055
|
+
description: "Read-only catalog of gateway tool controls and discovered provider skills",
|
|
1056
|
+
mimeType: "application/json",
|
|
1057
|
+
}, async (uri) => {
|
|
1058
|
+
runtime.logger.debug("Reading provider-tools://catalog resource");
|
|
1059
|
+
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
1060
|
+
return { contents: contents ? [contents] : [] };
|
|
1061
|
+
});
|
|
1062
|
+
server.registerResource("provider-tools", new ResourceTemplate("provider-tools://{provider}", { list: undefined }), {
|
|
1063
|
+
title: "Provider Tool Capabilities",
|
|
1064
|
+
description: "Read-only gateway tool controls and discovered local skills for one provider CLI",
|
|
1065
|
+
mimeType: "application/json",
|
|
1066
|
+
}, async (uri, variables) => {
|
|
1067
|
+
const provider = Array.isArray(variables.provider)
|
|
1068
|
+
? variables.provider[0]
|
|
1069
|
+
: variables.provider;
|
|
1070
|
+
runtime.logger.debug(`Reading provider-tools://${provider}`);
|
|
1071
|
+
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
1072
|
+
return { contents: contents ? [contents] : [] };
|
|
1073
|
+
});
|
|
1051
1074
|
}
|
|
1052
1075
|
function resolvePromptOrPartsForPrep(args) {
|
|
1053
1076
|
const hasPrompt = typeof args.prompt === "string" && args.prompt.length > 0;
|
|
@@ -1105,7 +1128,18 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1105
1128
|
const ccEarly = params.promptParts?.cacheControl;
|
|
1106
1129
|
const cacheControlRequestedEarly = !!(ccEarly &&
|
|
1107
1130
|
(ccEarly.system || ccEarly.tools || ccEarly.context));
|
|
1108
|
-
|
|
1131
|
+
const explicitCacheControlBlockCount = params.promptParts && ccEarly
|
|
1132
|
+
? (ccEarly.system && params.promptParts.system && params.promptParts.system.length > 0
|
|
1133
|
+
? 1
|
|
1134
|
+
: 0) +
|
|
1135
|
+
(ccEarly.tools && params.promptParts.tools && params.promptParts.tools.length > 0 ? 1 : 0) +
|
|
1136
|
+
(ccEarly.context && params.promptParts.context && params.promptParts.context.length > 0
|
|
1137
|
+
? 1
|
|
1138
|
+
: 0)
|
|
1139
|
+
: 0;
|
|
1140
|
+
const effectiveExplicitCacheControl = explicitCacheControlBlockCount > 0;
|
|
1141
|
+
const cacheControlNoop = cacheControlRequestedEarly && !effectiveExplicitCacheControl;
|
|
1142
|
+
if (params.optimizePrompt && effectiveExplicitCacheControl) {
|
|
1109
1143
|
return createErrorResponse(params.operation, 1, "", corrId, new Error("optimizePrompt is incompatible with promptParts.cacheControl (slice κ): optimization rewrites the assembled prompt text the flight recorder logs, while the cache_control payload is built from raw promptParts; the two would desync and break Anthropic prefix-cache reuse. Disable optimizePrompt when opting into cacheControl."));
|
|
1110
1144
|
}
|
|
1111
1145
|
let effectivePrompt = assembledPrompt;
|
|
@@ -1140,7 +1174,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1140
1174
|
}
|
|
1141
1175
|
}
|
|
1142
1176
|
let autoEmittedCacheControlBlock = null;
|
|
1143
|
-
if (!
|
|
1177
|
+
if (!effectiveExplicitCacheControl &&
|
|
1144
1178
|
runtime.cacheAwareness.emitAnthropicCacheControl &&
|
|
1145
1179
|
!params.optimizePrompt &&
|
|
1146
1180
|
params.outputFormat === "stream-json" &&
|
|
@@ -1164,7 +1198,14 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1164
1198
|
}
|
|
1165
1199
|
}
|
|
1166
1200
|
const warnings = [];
|
|
1167
|
-
if (
|
|
1201
|
+
if (cacheControlNoop) {
|
|
1202
|
+
warnings.push({
|
|
1203
|
+
code: "cache_control_noop",
|
|
1204
|
+
message: "promptParts.cacheControl only marked empty or omitted stable parts; no cache_control breakpoint will be emitted from the explicit marker.",
|
|
1205
|
+
reason: "cacheControl marker did not match a non-empty stable block",
|
|
1206
|
+
});
|
|
1207
|
+
}
|
|
1208
|
+
if (!effectiveExplicitCacheControl &&
|
|
1168
1209
|
autoEmittedCacheControlBlock === null &&
|
|
1169
1210
|
params.promptParts &&
|
|
1170
1211
|
stablePrefixTokens !== null) {
|
|
@@ -1184,9 +1225,10 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1184
1225
|
});
|
|
1185
1226
|
}
|
|
1186
1227
|
}
|
|
1187
|
-
const cacheControlRequested =
|
|
1228
|
+
const cacheControlRequested = effectiveExplicitCacheControl || autoEmittedCacheControlBlock !== null;
|
|
1188
1229
|
let stdinPayload;
|
|
1189
1230
|
let cacheControlBlocks;
|
|
1231
|
+
let cacheControlTtlSeconds;
|
|
1190
1232
|
if (cacheControlRequested) {
|
|
1191
1233
|
if (params.outputFormat !== "stream-json") {
|
|
1192
1234
|
return createErrorResponse(params.operation, 1, "", corrId, new Error("promptParts.cacheControl requires outputFormat: 'stream-json' (slice κ pipes the cache_control blocks over --input-format stream-json; text/json output formats cannot carry the required NDJSON usage events)."));
|
|
@@ -1203,6 +1245,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1203
1245
|
const built = assembleClaudeCacheBlocks(effectiveParts);
|
|
1204
1246
|
stdinPayload = `${JSON.stringify(built.payload)}\n`;
|
|
1205
1247
|
cacheControlBlocks = built.markedBlockCount;
|
|
1248
|
+
cacheControlTtlSeconds = built.markedBlockCount > 0 ? 3600 : undefined;
|
|
1206
1249
|
}
|
|
1207
1250
|
const args = cacheControlRequested
|
|
1208
1251
|
? [
|
|
@@ -1291,6 +1334,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1291
1334
|
stablePrefixTokens,
|
|
1292
1335
|
stdinPayload,
|
|
1293
1336
|
cacheControlBlocks,
|
|
1337
|
+
cacheControlTtlSeconds,
|
|
1294
1338
|
warnings: warnings.length > 0 ? warnings : undefined,
|
|
1295
1339
|
};
|
|
1296
1340
|
}
|
|
@@ -3383,6 +3427,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3383
3427
|
stablePrefixHash: prep.stablePrefixHash ?? undefined,
|
|
3384
3428
|
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
3385
3429
|
cacheControlBlocks: prep.cacheControlBlocks,
|
|
3430
|
+
cacheControlTtlSeconds: prep.cacheControlTtlSeconds,
|
|
3386
3431
|
}, runtime);
|
|
3387
3432
|
logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prep.effectivePrompt.length}, sessionId=${effectiveSessionId}, cacheControlBlocks=${prep.cacheControlBlocks ?? 0}`);
|
|
3388
3433
|
try {
|
|
@@ -5392,6 +5437,44 @@ export function createGatewayServer(deps = {}) {
|
|
|
5392
5437
|
const result = cli ? { [cli]: cliInfo[cli] } : cliInfo;
|
|
5393
5438
|
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
|
5394
5439
|
});
|
|
5440
|
+
server.tool("provider_tool_capabilities", "Report provider tool/feature capabilities and discovered local skill/tool integrations for claude|codex|gemini|grok|grok_api|mistral.", {
|
|
5441
|
+
cli: z
|
|
5442
|
+
.preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "grok_api", "mistral"]).optional())
|
|
5443
|
+
.describe("Provider filter (claude|codex|gemini|grok|grok_api|mistral)"),
|
|
5444
|
+
includeSkills: z
|
|
5445
|
+
.boolean()
|
|
5446
|
+
.default(true)
|
|
5447
|
+
.describe("Include bounded local skill discovery results"),
|
|
5448
|
+
includeProviderTools: z
|
|
5449
|
+
.boolean()
|
|
5450
|
+
.default(true)
|
|
5451
|
+
.describe("Include provider-native tools extracted from local skills"),
|
|
5452
|
+
includeUnsupported: z
|
|
5453
|
+
.boolean()
|
|
5454
|
+
.default(true)
|
|
5455
|
+
.describe("Include explicit unsupported/degraded input records"),
|
|
5456
|
+
includePaths: z
|
|
5457
|
+
.boolean()
|
|
5458
|
+
.default(false)
|
|
5459
|
+
.describe("Include raw local filesystem paths in discovery output"),
|
|
5460
|
+
refresh: z.boolean().default(false).describe("Bypass the short-lived capability cache"),
|
|
5461
|
+
}, {
|
|
5462
|
+
title: "Provider tool capabilities",
|
|
5463
|
+
readOnlyHint: true,
|
|
5464
|
+
destructiveHint: false,
|
|
5465
|
+
idempotentHint: true,
|
|
5466
|
+
openWorldHint: false,
|
|
5467
|
+
}, async ({ cli, includeSkills, includeProviderTools, includeUnsupported, includePaths, refresh, }) => {
|
|
5468
|
+
const capabilities = getProviderToolCapabilities({
|
|
5469
|
+
cli,
|
|
5470
|
+
includeSkills,
|
|
5471
|
+
includeProviderTools,
|
|
5472
|
+
includeUnsupported,
|
|
5473
|
+
includePaths,
|
|
5474
|
+
refresh,
|
|
5475
|
+
});
|
|
5476
|
+
return { content: [{ type: "text", text: JSON.stringify(capabilities, null, 2) }] };
|
|
5477
|
+
});
|
|
5395
5478
|
server.tool("cli_versions", "Report installed provider CLI versions, availability, and login status for all five providers or one.", {
|
|
5396
5479
|
cli: z
|
|
5397
5480
|
.preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
|
package/dist/prompt-parts.js
CHANGED
|
@@ -60,14 +60,17 @@ export function assembleClaudeCacheBlocks(parts) {
|
|
|
60
60
|
for (const [name, value] of stableEntries) {
|
|
61
61
|
if (value === undefined || value.length === 0)
|
|
62
62
|
continue;
|
|
63
|
-
const block = {
|
|
63
|
+
const block = {
|
|
64
|
+
type: "text",
|
|
65
|
+
text: blocks.length > 0 ? `${SEPARATOR}${value}` : value,
|
|
66
|
+
};
|
|
64
67
|
if (cc[name]) {
|
|
65
68
|
block.cache_control = { type: "ephemeral", ttl: "1h" };
|
|
66
69
|
markedBlockCount += 1;
|
|
67
70
|
}
|
|
68
71
|
blocks.push(block);
|
|
69
72
|
}
|
|
70
|
-
blocks.push({ type: "text", text: parts.task });
|
|
73
|
+
blocks.push({ type: "text", text: blocks.length > 0 ? `${SEPARATOR}${parts.task}` : parts.task });
|
|
71
74
|
return {
|
|
72
75
|
payload: {
|
|
73
76
|
type: "user",
|