llm-cli-gateway 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,67 @@ All notable changes to the llm-cli-gateway project.
4
4
 
5
5
  ## Unreleased
6
6
 
7
+ ## [2.4.0] - 2026-06-08: Direct Grok API provider and provider-owned sessions
8
+
9
+ ### Added
10
+
11
+ - Direct xAI Grok API provider support:
12
+ - new `ProviderType` split so stored sessions, metrics, flight-recorder rows,
13
+ and migrations can represent the non-CLI `grok-api` provider alongside the
14
+ existing CLI providers;
15
+ - `[providers.xai]` config loading with API-key env indirection and provider
16
+ enablement gating;
17
+ - `grok_api_request`, registered only when xAI API config and credentials are
18
+ present, backed by the xAI Responses API;
19
+ - xAI response parsing, retry/circuit-breaker handling, usage/cost metadata,
20
+ and `previous_response_id` session metadata;
21
+ - focused migration, session-manager, provider-config, and Grok API tests.
22
+ - Provider subcommand contract resources and tooling:
23
+ - provider subcommand catalog/detail resource generation;
24
+ - `provider_subcommands_list`, `provider_subcommand_contract`, and
25
+ `provider_subcommand_drift` surfaces exercised through MCP Inspector.
26
+ - Host auto-upgrade operations:
27
+ - `scripts/host-upgrade.sh` for staged, atomic npm-based host upgrades with
28
+ rollback support;
29
+ - user systemd service/timer units for scheduled gateway auto-upgrade checks.
30
+ - Direct Grok API provider design draft documenting the follow-on async-runner
31
+ and capability-table design work.
32
+
33
+ ### Fixed
34
+
35
+ - Provider-owned stored session enforcement now rejects cross-provider reuse for
36
+ all request handlers, including `claude_request`, `codex_request`,
37
+ `gemini_request`, `grok_request`, `mistral_request`, their async variants,
38
+ `codex_fork_session`, and `grok_api_request`.
39
+ - `sessions://all` now reports active sessions across all provider types,
40
+ including `grok-api`.
41
+ - MCP resource URI schemes now use standards-valid hyphenated forms:
42
+ `cache-state://...` and `provider-subcommands://...`. MCP Inspector exposed
43
+ the previous underscore schemes as invalid URL schemes for standard MCP
44
+ clients. Legacy direct `provider_subcommands://...` reads remain accepted for
45
+ internal compatibility tests/callers, but advertised resources now use only
46
+ valid URI schemes.
47
+ - `src/executor.ts` avoids pidless child-process kill attempts.
48
+
49
+ ### Changed
50
+
51
+ - GitHub Actions pins were refreshed to current pinned action SHAs.
52
+ - Provider subcommand support remains CLI-only where appropriate; the direct
53
+ API provider is excluded from spawnable-CLI contract paths.
54
+
55
+ ### Verification
56
+
57
+ - Dirty-tree stack split and release evidence recorded in
58
+ `docs/reviews/dirty-tree-stack-split-verification-2026-06-08.md`.
59
+ - MCP Inspector smoke covered tools/list, resources/list, read-only tool calls,
60
+ session lifecycle, direct xAI API registration through a loopback mock, and
61
+ exhaustive advertised-resource reads.
62
+ - Multi-LLM review completed with Claude, Codex, Gemini, Grok, and Mistral
63
+ approvals for the main stack and the Inspector-discovered URI-scheme fix.
64
+ - Final merged `master` verification before mirror push:
65
+ `npm run check` passed, including build, lint, format check, 67 test files /
66
+ 1124 tests, and the release security audit.
67
+
7
68
  ## [2.3.0] - 2026-06-08: MCP tool annotations and client safety hints
8
69
 
9
70
  ### Added
@@ -1512,7 +1573,7 @@ boundary bypass); all are addressed in the two follow-up fix commits.
1512
1573
  Closes the two telemetry gaps that v1.6.0 explicitly deferred: async-path
1513
1574
  flight-recorder integration and Codex parser support for the actual
1514
1575
  `cached_input_tokens` field the current Codex CLI emits. Both ship
1515
- together because they jointly close out `cache_state://*` completeness
1576
+ together because they jointly close out `cache-state://*` completeness
1516
1577
  for the async tools and the codex CLI.
1517
1578
 
1518
1579
  ### Added — async-path flight recorder writes
@@ -1552,9 +1613,9 @@ stderr, exitCode }> }` so the manager constructor can write FR
1552
1613
  `{ count: 0, orphaned: [] }` (in-process state can't be orphaned).
1553
1614
  Breaking change to the `JobStore` interface; the `PostgresJobStore`
1554
1615
  stub was updated to match (the impl is still not yet shipped).
1555
- - `cache_state://global`, `cache_state://session/{id}`, and
1556
- `cache_state://prefix/{hash}` aggregates now include async-job
1557
- activity. No query changes — `cache_state://*` already didn't filter
1616
+ - `cache-state://global`, `cache-state://session/{id}`, and
1617
+ `cache-state://prefix/{hash}` aggregates now include async-job
1618
+ activity. No query changes — `cache-state://*` already didn't filter
1558
1619
  on `asyncJobId`, so the new rows participate naturally.
1559
1620
 
1560
1621
  ### Fixed — Codex parser accepts current CLI's cache-token field
@@ -1585,7 +1646,7 @@ distinguishing `errorMessage`. The underlying `jobs` table in JobStore
1585
1646
  retains the distinct `"canceled"` / `"orphaned"` statuses for
1586
1647
  `getJobSnapshot` callers. External consumers of `~/.llm-cli-gateway/
1587
1648
  logs.db` that filter `status='failed'` will count cancels and boot-time
1588
- orphans as errors; `cache_state://*` aggregation does not distinguish.
1649
+ orphans as errors; `cache-state://*` aggregation does not distinguish.
1589
1650
 
1590
1651
  ### No config or schema changes
1591
1652
 
@@ -1647,7 +1708,7 @@ Pure documentation release; zero source-code changes since 1.6.0.
1647
1708
  ### Changed — 12 SKILL.md files current with v1.6.0
1648
1709
 
1649
1710
  - All 12 skills (7 under `skills/`, 5 under `.agents/skills/`) extended
1650
- with `promptParts`, `cache_state://` MCP resources, and (where the
1711
+ with `promptParts`, `cache-state://` MCP resources, and (where the
1651
1712
  skill's centre of gravity is session continuity) the
1652
1713
  `cache_ttl_expiring_soon` warning. Depth tiered by skill audience:
1653
1714
  multi-llm-orchestration, model-routing, multi-llm-consensus,
@@ -1754,9 +1815,9 @@ Also includes (beyond cache-awareness):
1754
1815
  `requests`, plus `idx_requests_stable_hash`. Legacy rows keep NULL.
1755
1816
  - **Cache-state MCP resources** (read-only, tokens/hashes/aggregates only —
1756
1817
  never raw prompt text):
1757
- - `cache_state://global` (last 24h aggregates + per-CLI breakdown).
1758
- - `cache_state://session/{sessionId}` (per-session).
1759
- - `cache_state://prefix/{hash}` (per-stable-prefix-hash).
1818
+ - `cache-state://global` (last 24h aggregates + per-CLI breakdown).
1819
+ - `cache-state://session/{sessionId}` (per-session).
1820
+ - `cache-state://prefix/{hash}` (per-stable-prefix-hash).
1760
1821
  - **`session_get.cacheState`** projection: compact hit-rate / hit-count /
1761
1822
  cache-token-totals / estimated-savings-USD block, present only when the
1762
1823
  session has prior requests. Omitted entirely (not null, not empty) for
package/README.md CHANGED
@@ -164,7 +164,7 @@ docker compose -f docker/personal.compose.yml run --rm doctor
164
164
 
165
165
  - **SQLite Flight Recorder**: Every request/response logged to `~/.llm-cli-gateway/logs.db` with correlation IDs, token usage, duration, retry counts, and circuit breaker state. Browse with [Datasette](https://datasette.io/): `datasette ~/.llm-cli-gateway/logs.db`
166
166
  - **Structured Metadata**: Tool responses include machine-readable `structuredContent` (model, cli, correlationId, sessionId, durationMs, token counts)
167
- - **Cache observability resources**: `cache_state://global`, `cache_state://session/{id}`, and `cache_state://prefix/{hash}` MCP resources return aggregate cache hit/miss/savings — tokens and hashes only, no prompt text. `session_get` includes a `cacheState` block when the session has prior requests.
167
+ - **Cache observability resources**: `cache-state://global`, `cache-state://session/{id}`, and `cache-state://prefix/{hash}` MCP resources return aggregate cache hit/miss/savings — tokens and hashes only, no prompt text. `session_get` includes a `cacheState` block when the session has prior requests.
168
168
 
169
169
  ### Cache-aware operation
170
170
 
package/dist/config.d.ts CHANGED
@@ -32,6 +32,7 @@ export interface PersistenceConfigSources {
32
32
  configFile: string | null;
33
33
  envOverrides: string[];
34
34
  }
35
+ export declare function defaultGatewayConfigPath(): string;
35
36
  export declare function loadPersistenceConfig(logger?: Logger): PersistenceConfig;
36
37
  export declare const ANTHROPIC_TTL_SECONDS_VALUES: readonly [300, 3600];
37
38
  export type AnthropicTtlSeconds = (typeof ANTHROPIC_TTL_SECONDS_VALUES)[number];
@@ -58,3 +59,19 @@ export interface CacheAwarenessConfig {
58
59
  }
59
60
  export declare function loadCacheAwarenessConfig(logger?: Logger): CacheAwarenessConfig;
60
61
  export declare function minStableTokensForModel(config: CacheAwarenessConfig, modelName: string): number;
62
+ export declare const DEFAULT_XAI_API_KEY_ENV = "XAI_API_KEY";
63
+ export declare const DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1";
64
+ export declare const DEFAULT_XAI_MODEL = "grok-build-0.1";
65
+ export interface XaiProviderConfig {
66
+ apiKeyEnv: string;
67
+ baseUrl: string;
68
+ defaultModel: string;
69
+ }
70
+ export interface ProvidersConfig {
71
+ xai: XaiProviderConfig | null;
72
+ sources: {
73
+ configFile: string | null;
74
+ };
75
+ }
76
+ export declare function loadProvidersConfig(logger?: Logger): ProvidersConfig;
77
+ export declare function isXaiProviderEnabled(config: ProvidersConfig, env?: NodeJS.ProcessEnv): boolean;
package/dist/config.js CHANGED
@@ -56,6 +56,9 @@ const DEFAULT_SQLITE_PATH = path.join(os.homedir(), ".llm-cli-gateway", "logs.db
56
56
  function defaultPersistenceConfigPath() {
57
57
  return (process.env.LLM_GATEWAY_CONFIG ?? path.join(os.homedir(), ".llm-cli-gateway", "config.toml"));
58
58
  }
59
+ export function defaultGatewayConfigPath() {
60
+ return defaultPersistenceConfigPath();
61
+ }
59
62
  function readPersistenceFile(configPath, logger) {
60
63
  if (!existsSync(configPath)) {
61
64
  return { raw: undefined, sourcePath: null };
@@ -248,3 +251,84 @@ export function minStableTokensForModel(config, modelName) {
248
251
  return table.haiku;
249
252
  return table.default;
250
253
  }
254
+ export const DEFAULT_XAI_API_KEY_ENV = "XAI_API_KEY";
255
+ export const DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1";
256
+ export const DEFAULT_XAI_MODEL = "grok-build-0.1";
257
+ function isHttpsOrLoopbackUrl(value) {
258
+ try {
259
+ const url = new URL(value);
260
+ if (url.protocol === "https:")
261
+ return true;
262
+ if (url.protocol !== "http:")
263
+ return false;
264
+ return ["localhost", "127.0.0.1", "::1", "[::1]"].includes(url.hostname);
265
+ }
266
+ catch {
267
+ return false;
268
+ }
269
+ }
270
+ const XaiProviderSchema = z
271
+ .object({
272
+ api_key_env: z.string().min(1).default(DEFAULT_XAI_API_KEY_ENV),
273
+ base_url: z
274
+ .string()
275
+ .url()
276
+ .refine(isHttpsOrLoopbackUrl, {
277
+ message: "base_url must use https unless it targets localhost/loopback for tests",
278
+ })
279
+ .default(DEFAULT_XAI_BASE_URL),
280
+ default_model: z.string().min(1).default(DEFAULT_XAI_MODEL),
281
+ })
282
+ .strict();
283
+ function readProvidersFile(configPath, logger) {
284
+ if (!existsSync(configPath)) {
285
+ return { raw: undefined, sourcePath: null };
286
+ }
287
+ try {
288
+ const require = createRequire(import.meta.url);
289
+ const TOML = require("smol-toml");
290
+ const text = readFileSync(configPath, "utf-8");
291
+ const parsed = TOML.parse(text);
292
+ return { raw: parsed?.providers, sourcePath: configPath };
293
+ }
294
+ catch (err) {
295
+ logger.error(`Failed to parse gateway config at ${configPath}; using provider defaults`, err);
296
+ return { raw: undefined, sourcePath: null };
297
+ }
298
+ }
299
+ export function loadProvidersConfig(logger = noopLogger) {
300
+ const configPath = defaultGatewayConfigPath();
301
+ const { raw, sourcePath } = readProvidersFile(configPath, logger);
302
+ const providers = raw ?? {};
303
+ const rawXai = providers.xai;
304
+ if (rawXai === undefined) {
305
+ return {
306
+ xai: null,
307
+ sources: { configFile: sourcePath },
308
+ };
309
+ }
310
+ const parsed = XaiProviderSchema.safeParse(rawXai);
311
+ if (!parsed.success) {
312
+ logWarn(logger, "Invalid [providers.xai] config; xAI API provider disabled", {
313
+ error: parsed.error.message,
314
+ });
315
+ return {
316
+ xai: null,
317
+ sources: { configFile: sourcePath },
318
+ };
319
+ }
320
+ return {
321
+ xai: {
322
+ apiKeyEnv: parsed.data.api_key_env,
323
+ baseUrl: parsed.data.base_url,
324
+ defaultModel: parsed.data.default_model,
325
+ },
326
+ sources: { configFile: sourcePath },
327
+ };
328
+ }
329
+ export function isXaiProviderEnabled(config, env = process.env) {
330
+ const keyEnv = config.xai?.apiKeyEnv;
331
+ if (!keyEnv)
332
+ return false;
333
+ return typeof env[keyEnv] === "string" && env[keyEnv].trim().length > 0;
334
+ }
package/dist/executor.js CHANGED
@@ -269,30 +269,26 @@ export function killAllProcessGroups() {
269
269
  });
270
270
  }
271
271
  export function killProcessGroup(proc, signal) {
272
- if (proc.pid) {
273
- if (process.platform === "win32") {
274
- return killWindowsProcessTree(proc.pid);
275
- }
276
- try {
277
- process.kill(-proc.pid, signal);
278
- return true;
279
- }
280
- catch (err) {
281
- if (err.code !== "ESRCH") {
282
- try {
283
- return proc.kill(signal);
284
- }
285
- catch {
286
- return false;
287
- }
288
- }
289
- return false;
290
- }
272
+ const pid = proc.pid;
273
+ if (typeof pid !== "number" || !Number.isInteger(pid) || pid <= 0) {
274
+ return false;
275
+ }
276
+ if (process.platform === "win32") {
277
+ return killWindowsProcessTree(pid);
291
278
  }
292
279
  try {
293
- return proc.kill(signal);
280
+ process.kill(-pid, signal);
281
+ return true;
294
282
  }
295
- catch {
283
+ catch (err) {
284
+ if (err.code !== "ESRCH") {
285
+ try {
286
+ return proc.kill(signal);
287
+ }
288
+ catch {
289
+ return false;
290
+ }
291
+ }
296
292
  return false;
297
293
  }
298
294
  }
@@ -1,6 +1,7 @@
1
+ import type { ProviderType } from "./session-manager.js";
1
2
  export interface FlightLogStart {
2
3
  correlationId: string;
3
- cli: "claude" | "codex" | "gemini" | "grok" | "mistral";
4
+ cli: ProviderType;
4
5
  model: string;
5
6
  prompt: string;
6
7
  system?: string;
package/dist/index.d.ts CHANGED
@@ -1,10 +1,11 @@
1
1
  #!/usr/bin/env node
2
2
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
3
  import { z } from "zod/v3";
4
- import { ISessionManager } from "./session-manager.js";
4
+ import { ISessionManager, type ProviderType } from "./session-manager.js";
5
5
  import { ResourceProvider } from "./resources.js";
6
6
  import { PerformanceMetrics } from "./metrics.js";
7
- import { type PersistenceConfig, type CacheAwarenessConfig } from "./config.js";
7
+ import { type PersistenceConfig, type CacheAwarenessConfig, type ProvidersConfig } from "./config.js";
8
+ import { type XaiReasoningEffort } from "./xai-api-provider.js";
8
9
  import { DatabaseConnection } from "./db.js";
9
10
  import { AsyncJobManager } from "./async-job-manager.js";
10
11
  import { ApprovalManager, ApprovalRecord } from "./approval-manager.js";
@@ -44,7 +45,7 @@ declare const logger: {
44
45
  debug: (message: string, ...args: any[]) => void;
45
46
  };
46
47
  type GatewayLogger = typeof logger;
47
- export declare function buildServerInstructions(asyncJobsEnabled: boolean): string;
48
+ export declare function buildServerInstructions(asyncJobsEnabled: boolean, grokApiToolsEnabled?: boolean): string;
48
49
  export declare const MAX_TURNS_SCHEMA: z.ZodNumber;
49
50
  export declare const MAX_TOKENS_SCHEMA: z.ZodNumber;
50
51
  export declare const MAX_PRICE_SCHEMA: z.ZodNumber;
@@ -58,9 +59,9 @@ export declare const WORKTREE_SCHEMA: z.ZodUnion<[z.ZodBoolean, z.ZodObject<{
58
59
  name?: string | undefined;
59
60
  ref?: string | undefined;
60
61
  }>]>;
61
- export declare const SESSION_PROVIDER_VALUES: readonly ["claude", "codex", "gemini", "grok", "mistral"];
62
- export declare const SESSION_PROVIDER_ENUM: z.ZodEnum<["claude", "codex", "gemini", "grok", "mistral"]>;
63
- export type SessionProvider = (typeof SESSION_PROVIDER_VALUES)[number];
62
+ export declare const SESSION_PROVIDER_VALUES: readonly ["claude", "codex", "gemini", "grok", "mistral", "grok-api"];
63
+ export declare const SESSION_PROVIDER_ENUM: z.ZodEnum<["claude", "codex", "gemini", "grok", "mistral", "grok-api"]>;
64
+ export type SessionProvider = ProviderType;
64
65
  export interface GatewayServerDeps {
65
66
  sessionManager?: ISessionManager;
66
67
  resourceProvider?: ResourceProvider;
@@ -72,6 +73,7 @@ export interface GatewayServerDeps {
72
73
  logger?: GatewayLogger;
73
74
  persistence?: PersistenceConfig;
74
75
  cacheAwareness?: CacheAwarenessConfig;
76
+ providers?: ProvidersConfig;
75
77
  }
76
78
  export interface GatewayServerRuntime {
77
79
  sessionManager: ISessionManager;
@@ -84,10 +86,12 @@ export interface GatewayServerRuntime {
84
86
  logger: GatewayLogger;
85
87
  persistence: PersistenceConfig;
86
88
  cacheAwareness: CacheAwarenessConfig;
89
+ providers: ProvidersConfig;
87
90
  }
88
91
  export declare function resolveGatewayServerRuntime(deps?: GatewayServerDeps, options?: {
89
92
  isolateState?: boolean;
90
93
  }): GatewayServerRuntime;
94
+ export declare function shouldRegisterGrokApiTools(providers: ProvidersConfig): boolean;
91
95
  export interface ResolvedWorktree {
92
96
  cwd?: string;
93
97
  worktreePath?: string;
@@ -285,6 +289,22 @@ export declare function buildMistralRetryPrep(params: Pick<MistralRequestParams,
285
289
  env: Record<string, string>;
286
290
  ignoredDisallowedTools: boolean;
287
291
  };
292
+ export interface GrokApiRequestParams {
293
+ prompt?: string;
294
+ promptParts?: PromptParts;
295
+ model?: string;
296
+ sessionId?: string;
297
+ createNewSession?: boolean;
298
+ correlationId?: string;
299
+ optimizePrompt: boolean;
300
+ optimizeResponse?: boolean;
301
+ maxOutputTokens?: number;
302
+ temperature?: number;
303
+ topP?: number;
304
+ reasoningEffort?: XaiReasoningEffort;
305
+ timeoutMs?: number;
306
+ }
307
+ export declare function handleGrokApiRequest(deps: HandlerDeps, params: GrokApiRequestParams): Promise<ExtendedToolResponse>;
288
308
  export interface GeminiRequestParams {
289
309
  prompt?: string;
290
310
  promptParts?: PromptParts;