llm-cli-gateway 2.2.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,82 @@ All notable changes to the llm-cli-gateway project.
4
4
 
5
5
  ## Unreleased
6
6
 
7
+ ## [2.4.0] - 2026-06-08: Direct Grok API provider and provider-owned sessions
8
+
9
+ ### Added
10
+
11
+ - Direct xAI Grok API provider support:
12
+ - new `ProviderType` split so stored sessions, metrics, flight-recorder rows,
13
+ and migrations can represent the non-CLI `grok-api` provider alongside the
14
+ existing CLI providers;
15
+ - `[providers.xai]` config loading with API-key env indirection and provider
16
+ enablement gating;
17
+ - `grok_api_request`, registered only when xAI API config and credentials are
18
+ present, backed by the xAI Responses API;
19
+ - xAI response parsing, retry/circuit-breaker handling, usage/cost metadata,
20
+ and `previous_response_id` session metadata;
21
+ - focused migration, session-manager, provider-config, and Grok API tests.
22
+ - Provider subcommand contract resources and tooling:
23
+ - provider subcommand catalog/detail resource generation;
24
+ - `provider_subcommands_list`, `provider_subcommand_contract`, and
25
+ `provider_subcommand_drift` surfaces exercised through MCP Inspector.
26
+ - Host auto-upgrade operations:
27
+ - `scripts/host-upgrade.sh` for staged, atomic npm-based host upgrades with
28
+ rollback support;
29
+ - user systemd service/timer units for scheduled gateway auto-upgrade checks.
30
+ - Direct Grok API provider design draft documenting the follow-on async-runner
31
+ and capability-table design work.
32
+
33
+ ### Fixed
34
+
35
+ - Provider-owned stored session enforcement now rejects cross-provider reuse for
36
+ all request handlers, including `claude_request`, `codex_request`,
37
+ `gemini_request`, `grok_request`, `mistral_request`, their async variants,
38
+ `codex_fork_session`, and `grok_api_request`.
39
+ - `sessions://all` now reports active sessions across all provider types,
40
+ including `grok-api`.
41
+ - MCP resource URI schemes now use standards-valid hyphenated forms:
42
+ `cache-state://...` and `provider-subcommands://...`. MCP Inspector exposed
43
+ the previous underscore schemes as invalid URL schemes for standard MCP
44
+ clients. Legacy direct `provider_subcommands://...` reads remain accepted for
45
+ internal compatibility tests/callers, but advertised resources now use only
46
+ valid URI schemes.
47
+ - `src/executor.ts` avoids pidless child-process kill attempts.
48
+
49
+ ### Changed
50
+
51
+ - GitHub Actions pins were refreshed to current pinned action SHAs.
52
+ - Provider subcommand support remains CLI-only where appropriate; the direct
53
+ API provider is excluded from spawnable-CLI contract paths.
54
+
55
+ ### Verification
56
+
57
+ - Dirty-tree stack split and release evidence recorded in
58
+ `docs/reviews/dirty-tree-stack-split-verification-2026-06-08.md`.
59
+ - MCP Inspector smoke covered tools/list, resources/list, read-only tool calls,
60
+ session lifecycle, direct xAI API registration through a loopback mock, and
61
+ exhaustive advertised-resource reads.
62
+ - Multi-LLM review completed with Claude, Codex, Gemini, Grok, and Mistral
63
+ approvals for the main stack and the Inspector-discovered URI-scheme fix.
64
+ - Final merged `master` verification before mirror push:
65
+ `npm run check` passed, including build, lint, format check, 67 test files /
66
+ 1124 tests, and the release security audit.
67
+
68
+ ## [2.3.0] - 2026-06-08: MCP tool annotations and client safety hints
69
+
70
+ ### Added
71
+
72
+ - MCP tool annotations for all 37 tools (per MCP spec + tool-design best
73
+ practice): display `title` plus `readOnlyHint`/`destructiveHint`/
74
+ `idempotentHint`/`openWorldHint` on every registration. 14 pure-read tools
75
+ marked read-only/closed-world; `cli_upgrade`, `session_delete`,
76
+ `session_clear_all`, `llm_job_cancel` marked destructive; every
77
+ provider-spawning tool (requests, fork, validation) marked open-world with
78
+ destructive potential (spawned agentic CLIs can modify the environment).
79
+ Clients can use the hints for confirmation UX and safe auto-approval. New
80
+ invariant test pins titles, the exact destructive/read-only/open-world
81
+ sets, and the readOnly+destructive contradiction ban.
82
+
7
83
  ## [2.2.0] - 2026-06-07: MCP tool-surface usability — self-describing tools
8
84
 
9
85
  ### Added
@@ -247,7 +323,7 @@ to end with a verdaccio reproduction.
247
323
  - Consumer `npm ls` exits ELSPROBLEMS: the pinned `tar-stream@3.1.7` sits
248
324
  outside `tar-fs`'s `^2.1.4` range. Inherent to the out-of-range pin; disappears
249
325
  in 2.0.0 (Phase B / node:sqlite) when the `better-sqlite3 → prebuild-install
250
- → tar-fs` chain leaves the prod graph entirely.
326
+ → tar-fs` chain leaves the prod graph entirely.
251
327
  - Local-tarball installs still resolve `tar-stream@2.2.0` (shrinkwrap ignored on
252
328
  that path); the audit's advisory carve-out stays until Phase B.
253
329
 
@@ -1497,7 +1573,7 @@ boundary bypass); all are addressed in the two follow-up fix commits.
1497
1573
  Closes the two telemetry gaps that v1.6.0 explicitly deferred: async-path
1498
1574
  flight-recorder integration and Codex parser support for the actual
1499
1575
  `cached_input_tokens` field the current Codex CLI emits. Both ship
1500
- together because they jointly close out `cache_state://*` completeness
1576
+ together because they jointly close out `cache-state://*` completeness
1501
1577
  for the async tools and the codex CLI.
1502
1578
 
1503
1579
  ### Added — async-path flight recorder writes
@@ -1537,9 +1613,9 @@ stderr, exitCode }> }` so the manager constructor can write FR
1537
1613
  `{ count: 0, orphaned: [] }` (in-process state can't be orphaned).
1538
1614
  Breaking change to the `JobStore` interface; the `PostgresJobStore`
1539
1615
  stub was updated to match (the impl is still not yet shipped).
1540
- - `cache_state://global`, `cache_state://session/{id}`, and
1541
- `cache_state://prefix/{hash}` aggregates now include async-job
1542
- activity. No query changes — `cache_state://*` already didn't filter
1616
+ - `cache-state://global`, `cache-state://session/{id}`, and
1617
+ `cache-state://prefix/{hash}` aggregates now include async-job
1618
+ activity. No query changes — `cache-state://*` already didn't filter
1543
1619
  on `asyncJobId`, so the new rows participate naturally.
1544
1620
 
1545
1621
  ### Fixed — Codex parser accepts current CLI's cache-token field
@@ -1570,7 +1646,7 @@ distinguishing `errorMessage`. The underlying `jobs` table in JobStore
1570
1646
  retains the distinct `"canceled"` / `"orphaned"` statuses for
1571
1647
  `getJobSnapshot` callers. External consumers of `~/.llm-cli-gateway/
1572
1648
  logs.db` that filter `status='failed'` will count cancels and boot-time
1573
- orphans as errors; `cache_state://*` aggregation does not distinguish.
1649
+ orphans as errors; `cache-state://*` aggregation does not distinguish.
1574
1650
 
1575
1651
  ### No config or schema changes
1576
1652
 
@@ -1632,7 +1708,7 @@ Pure documentation release; zero source-code changes since 1.6.0.
1632
1708
  ### Changed — 12 SKILL.md files current with v1.6.0
1633
1709
 
1634
1710
  - All 12 skills (7 under `skills/`, 5 under `.agents/skills/`) extended
1635
- with `promptParts`, `cache_state://` MCP resources, and (where the
1711
+ with `promptParts`, `cache-state://` MCP resources, and (where the
1636
1712
  skill's centre of gravity is session continuity) the
1637
1713
  `cache_ttl_expiring_soon` warning. Depth tiered by skill audience:
1638
1714
  multi-llm-orchestration, model-routing, multi-llm-consensus,
@@ -1739,9 +1815,9 @@ Also includes (beyond cache-awareness):
1739
1815
  `requests`, plus `idx_requests_stable_hash`. Legacy rows keep NULL.
1740
1816
  - **Cache-state MCP resources** (read-only, tokens/hashes/aggregates only —
1741
1817
  never raw prompt text):
1742
- - `cache_state://global` (last 24h aggregates + per-CLI breakdown).
1743
- - `cache_state://session/{sessionId}` (per-session).
1744
- - `cache_state://prefix/{hash}` (per-stable-prefix-hash).
1818
+ - `cache-state://global` (last 24h aggregates + per-CLI breakdown).
1819
+ - `cache-state://session/{sessionId}` (per-session).
1820
+ - `cache-state://prefix/{hash}` (per-stable-prefix-hash).
1745
1821
  - **`session_get.cacheState`** projection: compact hit-rate / hit-count /
1746
1822
  cache-token-totals / estimated-savings-USD block, present only when the
1747
1823
  session has prior requests. Omitted entirely (not null, not empty) for
package/README.md CHANGED
@@ -164,7 +164,7 @@ docker compose -f docker/personal.compose.yml run --rm doctor
164
164
 
165
165
  - **SQLite Flight Recorder**: Every request/response logged to `~/.llm-cli-gateway/logs.db` with correlation IDs, token usage, duration, retry counts, and circuit breaker state. Browse with [Datasette](https://datasette.io/): `datasette ~/.llm-cli-gateway/logs.db`
166
166
  - **Structured Metadata**: Tool responses include machine-readable `structuredContent` (model, cli, correlationId, sessionId, durationMs, token counts)
167
- - **Cache observability resources**: `cache_state://global`, `cache_state://session/{id}`, and `cache_state://prefix/{hash}` MCP resources return aggregate cache hit/miss/savings — tokens and hashes only, no prompt text. `session_get` includes a `cacheState` block when the session has prior requests.
167
+ - **Cache observability resources**: `cache-state://global`, `cache-state://session/{id}`, and `cache-state://prefix/{hash}` MCP resources return aggregate cache hit/miss/savings — tokens and hashes only, no prompt text. `session_get` includes a `cacheState` block when the session has prior requests.
168
168
 
169
169
  ### Cache-aware operation
170
170
 
package/dist/config.d.ts CHANGED
@@ -32,6 +32,7 @@ export interface PersistenceConfigSources {
32
32
  configFile: string | null;
33
33
  envOverrides: string[];
34
34
  }
35
+ export declare function defaultGatewayConfigPath(): string;
35
36
  export declare function loadPersistenceConfig(logger?: Logger): PersistenceConfig;
36
37
  export declare const ANTHROPIC_TTL_SECONDS_VALUES: readonly [300, 3600];
37
38
  export type AnthropicTtlSeconds = (typeof ANTHROPIC_TTL_SECONDS_VALUES)[number];
@@ -58,3 +59,19 @@ export interface CacheAwarenessConfig {
58
59
  }
59
60
  export declare function loadCacheAwarenessConfig(logger?: Logger): CacheAwarenessConfig;
60
61
  export declare function minStableTokensForModel(config: CacheAwarenessConfig, modelName: string): number;
62
+ export declare const DEFAULT_XAI_API_KEY_ENV = "XAI_API_KEY";
63
+ export declare const DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1";
64
+ export declare const DEFAULT_XAI_MODEL = "grok-build-0.1";
65
+ export interface XaiProviderConfig {
66
+ apiKeyEnv: string;
67
+ baseUrl: string;
68
+ defaultModel: string;
69
+ }
70
+ export interface ProvidersConfig {
71
+ xai: XaiProviderConfig | null;
72
+ sources: {
73
+ configFile: string | null;
74
+ };
75
+ }
76
+ export declare function loadProvidersConfig(logger?: Logger): ProvidersConfig;
77
+ export declare function isXaiProviderEnabled(config: ProvidersConfig, env?: NodeJS.ProcessEnv): boolean;
package/dist/config.js CHANGED
@@ -56,6 +56,9 @@ const DEFAULT_SQLITE_PATH = path.join(os.homedir(), ".llm-cli-gateway", "logs.db
56
56
  function defaultPersistenceConfigPath() {
57
57
  return (process.env.LLM_GATEWAY_CONFIG ?? path.join(os.homedir(), ".llm-cli-gateway", "config.toml"));
58
58
  }
59
+ export function defaultGatewayConfigPath() {
60
+ return defaultPersistenceConfigPath();
61
+ }
59
62
  function readPersistenceFile(configPath, logger) {
60
63
  if (!existsSync(configPath)) {
61
64
  return { raw: undefined, sourcePath: null };
@@ -248,3 +251,84 @@ export function minStableTokensForModel(config, modelName) {
248
251
  return table.haiku;
249
252
  return table.default;
250
253
  }
254
+ export const DEFAULT_XAI_API_KEY_ENV = "XAI_API_KEY";
255
+ export const DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1";
256
+ export const DEFAULT_XAI_MODEL = "grok-build-0.1";
257
+ function isHttpsOrLoopbackUrl(value) {
258
+ try {
259
+ const url = new URL(value);
260
+ if (url.protocol === "https:")
261
+ return true;
262
+ if (url.protocol !== "http:")
263
+ return false;
264
+ return ["localhost", "127.0.0.1", "::1", "[::1]"].includes(url.hostname);
265
+ }
266
+ catch {
267
+ return false;
268
+ }
269
+ }
270
+ const XaiProviderSchema = z
271
+ .object({
272
+ api_key_env: z.string().min(1).default(DEFAULT_XAI_API_KEY_ENV),
273
+ base_url: z
274
+ .string()
275
+ .url()
276
+ .refine(isHttpsOrLoopbackUrl, {
277
+ message: "base_url must use https unless it targets localhost/loopback for tests",
278
+ })
279
+ .default(DEFAULT_XAI_BASE_URL),
280
+ default_model: z.string().min(1).default(DEFAULT_XAI_MODEL),
281
+ })
282
+ .strict();
283
+ function readProvidersFile(configPath, logger) {
284
+ if (!existsSync(configPath)) {
285
+ return { raw: undefined, sourcePath: null };
286
+ }
287
+ try {
288
+ const require = createRequire(import.meta.url);
289
+ const TOML = require("smol-toml");
290
+ const text = readFileSync(configPath, "utf-8");
291
+ const parsed = TOML.parse(text);
292
+ return { raw: parsed?.providers, sourcePath: configPath };
293
+ }
294
+ catch (err) {
295
+ logger.error(`Failed to parse gateway config at ${configPath}; using provider defaults`, err);
296
+ return { raw: undefined, sourcePath: null };
297
+ }
298
+ }
299
+ export function loadProvidersConfig(logger = noopLogger) {
300
+ const configPath = defaultGatewayConfigPath();
301
+ const { raw, sourcePath } = readProvidersFile(configPath, logger);
302
+ const providers = raw ?? {};
303
+ const rawXai = providers.xai;
304
+ if (rawXai === undefined) {
305
+ return {
306
+ xai: null,
307
+ sources: { configFile: sourcePath },
308
+ };
309
+ }
310
+ const parsed = XaiProviderSchema.safeParse(rawXai);
311
+ if (!parsed.success) {
312
+ logWarn(logger, "Invalid [providers.xai] config; xAI API provider disabled", {
313
+ error: parsed.error.message,
314
+ });
315
+ return {
316
+ xai: null,
317
+ sources: { configFile: sourcePath },
318
+ };
319
+ }
320
+ return {
321
+ xai: {
322
+ apiKeyEnv: parsed.data.api_key_env,
323
+ baseUrl: parsed.data.base_url,
324
+ defaultModel: parsed.data.default_model,
325
+ },
326
+ sources: { configFile: sourcePath },
327
+ };
328
+ }
329
+ export function isXaiProviderEnabled(config, env = process.env) {
330
+ const keyEnv = config.xai?.apiKeyEnv;
331
+ if (!keyEnv)
332
+ return false;
333
+ return typeof env[keyEnv] === "string" && env[keyEnv].trim().length > 0;
334
+ }
package/dist/executor.js CHANGED
@@ -269,30 +269,26 @@ export function killAllProcessGroups() {
269
269
  });
270
270
  }
271
271
  export function killProcessGroup(proc, signal) {
272
- if (proc.pid) {
273
- if (process.platform === "win32") {
274
- return killWindowsProcessTree(proc.pid);
275
- }
276
- try {
277
- process.kill(-proc.pid, signal);
278
- return true;
279
- }
280
- catch (err) {
281
- if (err.code !== "ESRCH") {
282
- try {
283
- return proc.kill(signal);
284
- }
285
- catch {
286
- return false;
287
- }
288
- }
289
- return false;
290
- }
272
+ const pid = proc.pid;
273
+ if (typeof pid !== "number" || !Number.isInteger(pid) || pid <= 0) {
274
+ return false;
275
+ }
276
+ if (process.platform === "win32") {
277
+ return killWindowsProcessTree(pid);
291
278
  }
292
279
  try {
293
- return proc.kill(signal);
280
+ process.kill(-pid, signal);
281
+ return true;
294
282
  }
295
- catch {
283
+ catch (err) {
284
+ if (err.code !== "ESRCH") {
285
+ try {
286
+ return proc.kill(signal);
287
+ }
288
+ catch {
289
+ return false;
290
+ }
291
+ }
296
292
  return false;
297
293
  }
298
294
  }
@@ -1,6 +1,7 @@
1
+ import type { ProviderType } from "./session-manager.js";
1
2
  export interface FlightLogStart {
2
3
  correlationId: string;
3
- cli: "claude" | "codex" | "gemini" | "grok" | "mistral";
4
+ cli: ProviderType;
4
5
  model: string;
5
6
  prompt: string;
6
7
  system?: string;
package/dist/index.d.ts CHANGED
@@ -1,10 +1,11 @@
1
1
  #!/usr/bin/env node
2
2
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
3
  import { z } from "zod/v3";
4
- import { ISessionManager } from "./session-manager.js";
4
+ import { ISessionManager, type ProviderType } from "./session-manager.js";
5
5
  import { ResourceProvider } from "./resources.js";
6
6
  import { PerformanceMetrics } from "./metrics.js";
7
- import { type PersistenceConfig, type CacheAwarenessConfig } from "./config.js";
7
+ import { type PersistenceConfig, type CacheAwarenessConfig, type ProvidersConfig } from "./config.js";
8
+ import { type XaiReasoningEffort } from "./xai-api-provider.js";
8
9
  import { DatabaseConnection } from "./db.js";
9
10
  import { AsyncJobManager } from "./async-job-manager.js";
10
11
  import { ApprovalManager, ApprovalRecord } from "./approval-manager.js";
@@ -44,7 +45,7 @@ declare const logger: {
44
45
  debug: (message: string, ...args: any[]) => void;
45
46
  };
46
47
  type GatewayLogger = typeof logger;
47
- export declare function buildServerInstructions(asyncJobsEnabled: boolean): string;
48
+ export declare function buildServerInstructions(asyncJobsEnabled: boolean, grokApiToolsEnabled?: boolean): string;
48
49
  export declare const MAX_TURNS_SCHEMA: z.ZodNumber;
49
50
  export declare const MAX_TOKENS_SCHEMA: z.ZodNumber;
50
51
  export declare const MAX_PRICE_SCHEMA: z.ZodNumber;
@@ -58,9 +59,9 @@ export declare const WORKTREE_SCHEMA: z.ZodUnion<[z.ZodBoolean, z.ZodObject<{
58
59
  name?: string | undefined;
59
60
  ref?: string | undefined;
60
61
  }>]>;
61
- export declare const SESSION_PROVIDER_VALUES: readonly ["claude", "codex", "gemini", "grok", "mistral"];
62
- export declare const SESSION_PROVIDER_ENUM: z.ZodEnum<["claude", "codex", "gemini", "grok", "mistral"]>;
63
- export type SessionProvider = (typeof SESSION_PROVIDER_VALUES)[number];
62
+ export declare const SESSION_PROVIDER_VALUES: readonly ["claude", "codex", "gemini", "grok", "mistral", "grok-api"];
63
+ export declare const SESSION_PROVIDER_ENUM: z.ZodEnum<["claude", "codex", "gemini", "grok", "mistral", "grok-api"]>;
64
+ export type SessionProvider = ProviderType;
64
65
  export interface GatewayServerDeps {
65
66
  sessionManager?: ISessionManager;
66
67
  resourceProvider?: ResourceProvider;
@@ -72,6 +73,7 @@ export interface GatewayServerDeps {
72
73
  logger?: GatewayLogger;
73
74
  persistence?: PersistenceConfig;
74
75
  cacheAwareness?: CacheAwarenessConfig;
76
+ providers?: ProvidersConfig;
75
77
  }
76
78
  export interface GatewayServerRuntime {
77
79
  sessionManager: ISessionManager;
@@ -84,10 +86,12 @@ export interface GatewayServerRuntime {
84
86
  logger: GatewayLogger;
85
87
  persistence: PersistenceConfig;
86
88
  cacheAwareness: CacheAwarenessConfig;
89
+ providers: ProvidersConfig;
87
90
  }
88
91
  export declare function resolveGatewayServerRuntime(deps?: GatewayServerDeps, options?: {
89
92
  isolateState?: boolean;
90
93
  }): GatewayServerRuntime;
94
+ export declare function shouldRegisterGrokApiTools(providers: ProvidersConfig): boolean;
91
95
  export interface ResolvedWorktree {
92
96
  cwd?: string;
93
97
  worktreePath?: string;
@@ -285,6 +289,22 @@ export declare function buildMistralRetryPrep(params: Pick<MistralRequestParams,
285
289
  env: Record<string, string>;
286
290
  ignoredDisallowedTools: boolean;
287
291
  };
292
+ export interface GrokApiRequestParams {
293
+ prompt?: string;
294
+ promptParts?: PromptParts;
295
+ model?: string;
296
+ sessionId?: string;
297
+ createNewSession?: boolean;
298
+ correlationId?: string;
299
+ optimizePrompt: boolean;
300
+ optimizeResponse?: boolean;
301
+ maxOutputTokens?: number;
302
+ temperature?: number;
303
+ topP?: number;
304
+ reasoningEffort?: XaiReasoningEffort;
305
+ timeoutMs?: number;
306
+ }
307
+ export declare function handleGrokApiRequest(deps: HandlerDeps, params: GrokApiRequestParams): Promise<ExtendedToolResponse>;
288
308
  export interface GeminiRequestParams {
289
309
  prompt?: string;
290
310
  promptParts?: PromptParts;