npm - llm-cli-gateway - Versions diffs - 2.3.0 → 2.4.0 - Mend

llm-cli-gateway 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/CHANGELOG.md +70 -9
package/README.md +1 -1
package/dist/config.d.ts +17 -0
package/dist/config.js +84 -0
package/dist/executor.js +17 -21
package/dist/flight-recorder.d.ts +2 -1
package/dist/index.d.ts +26 -6
package/dist/index.js +608 -54
package/dist/metrics.d.ts +3 -3
package/dist/metrics.js +8 -8
package/dist/request-helpers.d.ts +8 -8
package/dist/resources.js +56 -7
package/dist/session-manager-pg.d.ts +6 -6
package/dist/session-manager-pg.js +1 -0
package/dist/session-manager.d.ts +16 -12
package/dist/session-manager.js +4 -1
package/dist/upstream-contracts.d.ts +84 -0
package/dist/upstream-contracts.js +698 -6
package/dist/xai-api-provider.d.ts +43 -0
package/dist/xai-api-provider.js +191 -0
package/migrations/001_initial_schema.sql +65 -0
package/migrations/002_session_ids_as_text.sql +26 -0
package/migrations/003_provider_type_sessions.sql +20 -0
package/npm-shrinkwrap.json +2 -2
package/package.json +2 -1

package/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,67 @@ All notable changes to the llm-cli-gateway project.
 ## Unreleased
+## [2.4.0] - 2026-06-08: Direct Grok API provider and provider-owned sessions
+### Added
+- Direct xAI Grok API provider support:
+  - new `ProviderType` split so stored sessions, metrics, flight-recorder rows,
+    and migrations can represent the non-CLI `grok-api` provider alongside the
+    existing CLI providers;
+  - `[providers.xai]` config loading with API-key env indirection and provider
+    enablement gating;
+  - `grok_api_request`, registered only when xAI API config and credentials are
+    present, backed by the xAI Responses API;
+  - xAI response parsing, retry/circuit-breaker handling, usage/cost metadata,
+    and `previous_response_id` session metadata;
+  - focused migration, session-manager, provider-config, and Grok API tests.
+- Provider subcommand contract resources and tooling:
+  - provider subcommand catalog/detail resource generation;
+  - `provider_subcommands_list`, `provider_subcommand_contract`, and
+    `provider_subcommand_drift` surfaces exercised through MCP Inspector.
+- Host auto-upgrade operations:
+  - `scripts/host-upgrade.sh` for staged, atomic npm-based host upgrades with
+    rollback support;
+  - user systemd service/timer units for scheduled gateway auto-upgrade checks.
+- Direct Grok API provider design draft documenting the follow-on async-runner
+  and capability-table design work.
+### Fixed
+- Provider-owned stored session enforcement now rejects cross-provider reuse for
+  all request handlers, including `claude_request`, `codex_request`,
+  `gemini_request`, `grok_request`, `mistral_request`, their async variants,
+  `codex_fork_session`, and `grok_api_request`.
+- `sessions://all` now reports active sessions across all provider types,
+  including `grok-api`.
+- MCP resource URI schemes now use standards-valid hyphenated forms:
+  `cache-state://...` and `provider-subcommands://...`. MCP Inspector exposed
+  the previous underscore schemes as invalid URL schemes for standard MCP
+  clients. Legacy direct `provider_subcommands://...` reads remain accepted for
+  internal compatibility tests/callers, but advertised resources now use only
+  valid URI schemes.
+- `src/executor.ts` avoids pidless child-process kill attempts.
+### Changed
+- GitHub Actions pins were refreshed to current pinned action SHAs.
+- Provider subcommand support remains CLI-only where appropriate; the direct
+  API provider is excluded from spawnable-CLI contract paths.
+### Verification
+- Dirty-tree stack split and release evidence recorded in
+  `docs/reviews/dirty-tree-stack-split-verification-2026-06-08.md`.
+- MCP Inspector smoke covered tools/list, resources/list, read-only tool calls,
+  session lifecycle, direct xAI API registration through a loopback mock, and
+  exhaustive advertised-resource reads.
+- Multi-LLM review completed with Claude, Codex, Gemini, Grok, and Mistral
+  approvals for the main stack and the Inspector-discovered URI-scheme fix.
+- Final merged `master` verification before mirror push:
+  `npm run check` passed, including build, lint, format check, 67 test files /
+  1124 tests, and the release security audit.
 ## [2.3.0] - 2026-06-08: MCP tool annotations and client safety hints
 ### Added
@@ -1512,7 +1573,7 @@ boundary bypass); all are addressed in the two follow-up fix commits.
 Closes the two telemetry gaps that v1.6.0 explicitly deferred: async-path
 flight-recorder integration and Codex parser support for the actual
 `cached_input_tokens` field the current Codex CLI emits. Both ship
-together because they jointly close out `cache_state://*` completeness
+together because they jointly close out `cache-state://*` completeness
 for the async tools and the codex CLI.
 ### Added — async-path flight recorder writes
@@ -1552,9 +1613,9 @@ stderr, exitCode }> }` so the manager constructor can write FR
   `{ count: 0, orphaned: [] }` (in-process state can't be orphaned).
   Breaking change to the `JobStore` interface; the `PostgresJobStore`
   stub was updated to match (the impl is still not yet shipped).
-- `cache_state://global`, `cache_state://session/{id}`, and
-  `cache_state://prefix/{hash}` aggregates now include async-job
-  activity. No query changes — `cache_state://*` already didn't filter
+- `cache-state://global`, `cache-state://session/{id}`, and
+  `cache-state://prefix/{hash}` aggregates now include async-job
+  activity. No query changes — `cache-state://*` already didn't filter
   on `asyncJobId`, so the new rows participate naturally.
 ### Fixed — Codex parser accepts current CLI's cache-token field
@@ -1585,7 +1646,7 @@ distinguishing `errorMessage`. The underlying `jobs` table in JobStore
 retains the distinct `"canceled"` / `"orphaned"` statuses for
 `getJobSnapshot` callers. External consumers of `~/.llm-cli-gateway/
 logs.db` that filter `status='failed'` will count cancels and boot-time
-orphans as errors; `cache_state://*` aggregation does not distinguish.
+orphans as errors; `cache-state://*` aggregation does not distinguish.
 ### No config or schema changes
@@ -1647,7 +1708,7 @@ Pure documentation release; zero source-code changes since 1.6.0.
 ### Changed — 12 SKILL.md files current with v1.6.0
 - All 12 skills (7 under `skills/`, 5 under `.agents/skills/`) extended
-  with `promptParts`, `cache_state://` MCP resources, and (where the
+  with `promptParts`, `cache-state://` MCP resources, and (where the
   skill's centre of gravity is session continuity) the
   `cache_ttl_expiring_soon` warning. Depth tiered by skill audience:
   multi-llm-orchestration, model-routing, multi-llm-consensus,
@@ -1754,9 +1815,9 @@ Also includes (beyond cache-awareness):
   `requests`, plus `idx_requests_stable_hash`. Legacy rows keep NULL.
 - **Cache-state MCP resources** (read-only, tokens/hashes/aggregates only —
   never raw prompt text):
-  - `cache_state://global` (last 24h aggregates + per-CLI breakdown).
-  - `cache_state://session/{sessionId}` (per-session).
-  - `cache_state://prefix/{hash}` (per-stable-prefix-hash).
+  - `cache-state://global` (last 24h aggregates + per-CLI breakdown).
+  - `cache-state://session/{sessionId}` (per-session).
+  - `cache-state://prefix/{hash}` (per-stable-prefix-hash).
 - **`session_get.cacheState`** projection: compact hit-rate / hit-count /
   cache-token-totals / estimated-savings-USD block, present only when the
   session has prior requests. Omitted entirely (not null, not empty) for

package/README.md CHANGED Viewed

@@ -164,7 +164,7 @@ docker compose -f docker/personal.compose.yml run --rm doctor
 - **SQLite Flight Recorder**: Every request/response logged to `~/.llm-cli-gateway/logs.db` with correlation IDs, token usage, duration, retry counts, and circuit breaker state. Browse with [Datasette](https://datasette.io/): `datasette ~/.llm-cli-gateway/logs.db`
 - **Structured Metadata**: Tool responses include machine-readable `structuredContent` (model, cli, correlationId, sessionId, durationMs, token counts)
-- **Cache observability resources**: `cache_state://global`, `cache_state://session/{id}`, and `cache_state://prefix/{hash}` MCP resources return aggregate cache hit/miss/savings — tokens and hashes only, no prompt text. `session_get` includes a `cacheState` block when the session has prior requests.
+- **Cache observability resources**: `cache-state://global`, `cache-state://session/{id}`, and `cache-state://prefix/{hash}` MCP resources return aggregate cache hit/miss/savings — tokens and hashes only, no prompt text. `session_get` includes a `cacheState` block when the session has prior requests.
 ### Cache-aware operation

package/dist/config.d.ts CHANGED Viewed

@@ -32,6 +32,7 @@ export interface PersistenceConfigSources {
     configFile: string | null;
     envOverrides: string[];
 }
+export declare function defaultGatewayConfigPath(): string;
 export declare function loadPersistenceConfig(logger?: Logger): PersistenceConfig;
 export declare const ANTHROPIC_TTL_SECONDS_VALUES: readonly [300, 3600];
 export type AnthropicTtlSeconds = (typeof ANTHROPIC_TTL_SECONDS_VALUES)[number];
@@ -58,3 +59,19 @@ export interface CacheAwarenessConfig {
 }
 export declare function loadCacheAwarenessConfig(logger?: Logger): CacheAwarenessConfig;
 export declare function minStableTokensForModel(config: CacheAwarenessConfig, modelName: string): number;
+export declare const DEFAULT_XAI_API_KEY_ENV = "XAI_API_KEY";
+export declare const DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1";
+export declare const DEFAULT_XAI_MODEL = "grok-build-0.1";
+export interface XaiProviderConfig {
+    apiKeyEnv: string;
+    baseUrl: string;
+    defaultModel: string;
+}
+export interface ProvidersConfig {
+    xai: XaiProviderConfig | null;
+    sources: {
+        configFile: string | null;
+    };
+}
+export declare function loadProvidersConfig(logger?: Logger): ProvidersConfig;
+export declare function isXaiProviderEnabled(config: ProvidersConfig, env?: NodeJS.ProcessEnv): boolean;

package/dist/config.js CHANGED Viewed

@@ -56,6 +56,9 @@ const DEFAULT_SQLITE_PATH = path.join(os.homedir(), ".llm-cli-gateway", "logs.db
 function defaultPersistenceConfigPath() {
     return (process.env.LLM_GATEWAY_CONFIG ?? path.join(os.homedir(), ".llm-cli-gateway", "config.toml"));
 }
+export function defaultGatewayConfigPath() {
+    return defaultPersistenceConfigPath();
+}
 function readPersistenceFile(configPath, logger) {
     if (!existsSync(configPath)) {
         return { raw: undefined, sourcePath: null };
@@ -248,3 +251,84 @@ export function minStableTokensForModel(config, modelName) {
         return table.haiku;
     return table.default;
 }
+export const DEFAULT_XAI_API_KEY_ENV = "XAI_API_KEY";
+export const DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1";
+export const DEFAULT_XAI_MODEL = "grok-build-0.1";
+function isHttpsOrLoopbackUrl(value) {
+    try {
+        const url = new URL(value);
+        if (url.protocol === "https:")
+            return true;
+        if (url.protocol !== "http:")
+            return false;
+        return ["localhost", "127.0.0.1", "::1", "[::1]"].includes(url.hostname);
+    }
+    catch {
+        return false;
+    }
+}
+const XaiProviderSchema = z
+    .object({
+    api_key_env: z.string().min(1).default(DEFAULT_XAI_API_KEY_ENV),
+    base_url: z
+        .string()
+        .url()
+        .refine(isHttpsOrLoopbackUrl, {
+        message: "base_url must use https unless it targets localhost/loopback for tests",
+    })
+        .default(DEFAULT_XAI_BASE_URL),
+    default_model: z.string().min(1).default(DEFAULT_XAI_MODEL),
+})
+    .strict();
+function readProvidersFile(configPath, logger) {
+    if (!existsSync(configPath)) {
+        return { raw: undefined, sourcePath: null };
+    }
+    try {
+        const require = createRequire(import.meta.url);
+        const TOML = require("smol-toml");
+        const text = readFileSync(configPath, "utf-8");
+        const parsed = TOML.parse(text);
+        return { raw: parsed?.providers, sourcePath: configPath };
+    }
+    catch (err) {
+        logger.error(`Failed to parse gateway config at ${configPath}; using provider defaults`, err);
+        return { raw: undefined, sourcePath: null };
+    }
+}
+export function loadProvidersConfig(logger = noopLogger) {
+    const configPath = defaultGatewayConfigPath();
+    const { raw, sourcePath } = readProvidersFile(configPath, logger);
+    const providers = raw ?? {};
+    const rawXai = providers.xai;
+    if (rawXai === undefined) {
+        return {
+            xai: null,
+            sources: { configFile: sourcePath },
+        };
+    }
+    const parsed = XaiProviderSchema.safeParse(rawXai);
+    if (!parsed.success) {
+        logWarn(logger, "Invalid [providers.xai] config; xAI API provider disabled", {
+            error: parsed.error.message,
+        });
+        return {
+            xai: null,
+            sources: { configFile: sourcePath },
+        };
+    }
+    return {
+        xai: {
+            apiKeyEnv: parsed.data.api_key_env,
+            baseUrl: parsed.data.base_url,
+            defaultModel: parsed.data.default_model,
+        },
+        sources: { configFile: sourcePath },
+    };
+}
+export function isXaiProviderEnabled(config, env = process.env) {
+    const keyEnv = config.xai?.apiKeyEnv;
+    if (!keyEnv)
+        return false;
+    return typeof env[keyEnv] === "string" && env[keyEnv].trim().length > 0;
+}

package/dist/executor.js CHANGED Viewed

@@ -269,30 +269,26 @@ export function killAllProcessGroups() {
     });
 }
 export function killProcessGroup(proc, signal) {
-    if (proc.pid) {
-        if (process.platform === "win32") {
-            return killWindowsProcessTree(proc.pid);
-        }
-        try {
-            process.kill(-proc.pid, signal);
-            return true;
-        }
-        catch (err) {
-            if (err.code !== "ESRCH") {
-                try {
-                    return proc.kill(signal);
-                }
-                catch {
-                    return false;
-                }
-            }
-            return false;
-        }
+    const pid = proc.pid;
+    if (typeof pid !== "number" || !Number.isInteger(pid) || pid <= 0) {
+        return false;
+    }
+    if (process.platform === "win32") {
+        return killWindowsProcessTree(pid);
     }
     try {
-        return proc.kill(signal);
+        process.kill(-pid, signal);
+        return true;
     }
-    catch {
+    catch (err) {
+        if (err.code !== "ESRCH") {
+            try {
+                return proc.kill(signal);
+            }
+            catch {
+                return false;
+            }
+        }
         return false;
     }
 }

package/dist/flight-recorder.d.ts CHANGED Viewed

@@ -1,6 +1,7 @@
+import type { ProviderType } from "./session-manager.js";
 export interface FlightLogStart {
     correlationId: string;
-    cli: "claude" | "codex" | "gemini" | "grok" | "mistral";
+    cli: ProviderType;
     model: string;
     prompt: string;
     system?: string;

package/dist/index.d.ts CHANGED Viewed

@@ -1,10 +1,11 @@
 #!/usr/bin/env node
 import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { z } from "zod/v3";
-import { ISessionManager } from "./session-manager.js";
+import { ISessionManager, type ProviderType } from "./session-manager.js";
 import { ResourceProvider } from "./resources.js";
 import { PerformanceMetrics } from "./metrics.js";
-import { type PersistenceConfig, type CacheAwarenessConfig } from "./config.js";
+import { type PersistenceConfig, type CacheAwarenessConfig, type ProvidersConfig } from "./config.js";
+import { type XaiReasoningEffort } from "./xai-api-provider.js";
 import { DatabaseConnection } from "./db.js";
 import { AsyncJobManager } from "./async-job-manager.js";
 import { ApprovalManager, ApprovalRecord } from "./approval-manager.js";
@@ -44,7 +45,7 @@ declare const logger: {
     debug: (message: string, ...args: any[]) => void;
 };
 type GatewayLogger = typeof logger;
-export declare function buildServerInstructions(asyncJobsEnabled: boolean): string;
+export declare function buildServerInstructions(asyncJobsEnabled: boolean, grokApiToolsEnabled?: boolean): string;
 export declare const MAX_TURNS_SCHEMA: z.ZodNumber;
 export declare const MAX_TOKENS_SCHEMA: z.ZodNumber;
 export declare const MAX_PRICE_SCHEMA: z.ZodNumber;
@@ -58,9 +59,9 @@ export declare const WORKTREE_SCHEMA: z.ZodUnion<[z.ZodBoolean, z.ZodObject<{
     name?: string | undefined;
     ref?: string | undefined;
 }>]>;
-export declare const SESSION_PROVIDER_VALUES: readonly ["claude", "codex", "gemini", "grok", "mistral"];
-export declare const SESSION_PROVIDER_ENUM: z.ZodEnum<["claude", "codex", "gemini", "grok", "mistral"]>;
-export type SessionProvider = (typeof SESSION_PROVIDER_VALUES)[number];
+export declare const SESSION_PROVIDER_VALUES: readonly ["claude", "codex", "gemini", "grok", "mistral", "grok-api"];
+export declare const SESSION_PROVIDER_ENUM: z.ZodEnum<["claude", "codex", "gemini", "grok", "mistral", "grok-api"]>;
+export type SessionProvider = ProviderType;
 export interface GatewayServerDeps {
     sessionManager?: ISessionManager;
     resourceProvider?: ResourceProvider;
@@ -72,6 +73,7 @@ export interface GatewayServerDeps {
     logger?: GatewayLogger;
     persistence?: PersistenceConfig;
     cacheAwareness?: CacheAwarenessConfig;
+    providers?: ProvidersConfig;
 }
 export interface GatewayServerRuntime {
     sessionManager: ISessionManager;
@@ -84,10 +86,12 @@ export interface GatewayServerRuntime {
     logger: GatewayLogger;
     persistence: PersistenceConfig;
     cacheAwareness: CacheAwarenessConfig;
+    providers: ProvidersConfig;
 }
 export declare function resolveGatewayServerRuntime(deps?: GatewayServerDeps, options?: {
     isolateState?: boolean;
 }): GatewayServerRuntime;
+export declare function shouldRegisterGrokApiTools(providers: ProvidersConfig): boolean;
 export interface ResolvedWorktree {
     cwd?: string;
     worktreePath?: string;
@@ -285,6 +289,22 @@ export declare function buildMistralRetryPrep(params: Pick<MistralRequestParams,
     env: Record<string, string>;
     ignoredDisallowedTools: boolean;
 };
+export interface GrokApiRequestParams {
+    prompt?: string;
+    promptParts?: PromptParts;
+    model?: string;
+    sessionId?: string;
+    createNewSession?: boolean;
+    correlationId?: string;
+    optimizePrompt: boolean;
+    optimizeResponse?: boolean;
+    maxOutputTokens?: number;
+    temperature?: number;
+    topP?: number;
+    reasoningEffort?: XaiReasoningEffort;
+    timeoutMs?: number;
+}
+export declare function handleGrokApiRequest(deps: HandlerDeps, params: GrokApiRequestParams): Promise<ExtendedToolResponse>;
 export interface GeminiRequestParams {
     prompt?: string;
     promptParts?: PromptParts;