npm - llm-cli-gateway - Versions diffs - 1.17.3 → 1.17.5 - Mend

llm-cli-gateway 1.17.3 → 1.17.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/CHANGELOG.md +45 -0
package/README.md +1 -1
package/dist/approval-manager.js +0 -8
package/dist/async-job-manager.d.ts +0 -113
package/dist/async-job-manager.js +6 -124
package/dist/cache-stats.d.ts +0 -89
package/dist/cache-stats.js +0 -62
package/dist/claude-mcp-config.js +0 -1
package/dist/cli-updater.d.ts +0 -8
package/dist/cli-updater.js +0 -12
package/dist/codex-json-parser.d.ts +0 -20
package/dist/codex-json-parser.js +0 -21
package/dist/config.d.ts +0 -31
package/dist/config.js +2 -72
package/dist/db.d.ts +0 -18
package/dist/db.js +0 -22
package/dist/doctor.d.ts +0 -49
package/dist/doctor.js +0 -47
package/dist/endpoint-exposure.js +0 -1
package/dist/executor.d.ts +0 -19
package/dist/executor.js +3 -38
package/dist/flight-recorder.d.ts +0 -26
package/dist/flight-recorder.js +1 -70
package/dist/gemini-json-parser.d.ts +0 -25
package/dist/gemini-json-parser.js +0 -28
package/dist/health.d.ts +0 -3
package/dist/health.js +0 -3
package/dist/index.d.ts +12 -208
package/dist/index.js +116 -588
package/dist/job-store.d.ts +0 -74
package/dist/job-store.js +1 -73
package/dist/logger.d.ts +0 -7
package/dist/logger.js +0 -6
package/dist/migrate-sessions.d.ts +0 -3
package/dist/migrate-sessions.js +0 -16
package/dist/migrate.js +1 -18
package/dist/mistral-meta-json-parser.js +0 -67
package/dist/model-registry.js +0 -13
package/dist/pricing.d.ts +0 -46
package/dist/pricing.js +0 -47
package/dist/process-monitor.d.ts +0 -15
package/dist/process-monitor.js +2 -31
package/dist/prompt-parts.d.ts +6 -31
package/dist/prompt-parts.js +0 -11
package/dist/provider-status.d.ts +0 -8
package/dist/provider-status.js +0 -11
package/dist/request-helpers.d.ts +4 -316
package/dist/request-helpers.js +13 -231
package/dist/resources.d.ts +0 -20
package/dist/resources.js +1 -34
package/dist/retry.d.ts +0 -45
package/dist/retry.js +3 -40
package/dist/session-manager-pg.d.ts +0 -32
package/dist/session-manager-pg.js +0 -32
package/dist/session-manager.d.ts +0 -21
package/dist/session-manager.js +1 -15
package/dist/stream-json-parser.d.ts +0 -18
package/dist/stream-json-parser.js +0 -22
package/dist/upstream-contracts.d.ts +0 -55
package/dist/upstream-contracts.js +86 -64
package/dist/validation-orchestrator.js +0 -3
package/dist/worktree-manager.d.ts +0 -9
package/dist/worktree-manager.js +0 -21
package/package.json +1 -1

package/dist/model-registry.js CHANGED Viewed

@@ -14,9 +14,6 @@ const FALLBACK_INFO = {
         modelOrder: ["opus", "sonnet", "haiku"],
     },
     codex: {
-        // U26: gpt-5.5 is the bundled fallback default. Config/env overrides still
-        // win (applyCodexOverrides runs after). Older aliases are retained in the
-        // models map so callers that still pass `gpt-5.3-codex` resolve cleanly.
         description: "OpenAI's Codex CLI - best for code execution in sandboxed environments",
         models: {
             "gpt-5.5": "Latest Codex frontier model. Best for: most Codex tasks (default since U26)",
@@ -36,8 +33,6 @@ const FALLBACK_INFO = {
         },
     },
     grok: {
-        // No hardcoded `defaultModel`. Let Grok CLI pick its own built-in default
-        // unless an explicit value is found via env vars in applyGrokOverrides.
         description: "xAI's Grok Build CLI - best for agentic coding tasks via xAI's Grok models",
         models: {
             "grok-build": "Default Grok model for code/agentic tasks. Best for: most Grok build sessions",
@@ -45,10 +40,6 @@ const FALLBACK_INFO = {
         modelOrder: ["grok-build"],
     },
     mistral: {
-        // Mistral Vibe selects the active model via VIBE_ACTIVE_MODEL; there is no
-        // `--model` flag. Do not set a bundled default here: Vibe's own default and
-        // user config move independently of this gateway. The model list is only a
-        // low-confidence recovery set for stale config/model-not-found failures.
         description: "Mistral AI's Vibe CLI - agentic coding via Mistral models (model selection via VIBE_ACTIVE_MODEL env var)",
         models: {
             "mistral-medium-3.5": "Vibe coding model alias observed in Vibe 2.x defaults. Used only when discovery/config requires an explicit VIBE_ACTIVE_MODEL.",
@@ -99,9 +90,6 @@ export function resolveModelAlias(cli, model, info) {
     const normalized = trimmed.toLowerCase();
     const cliInfo = info[cli];
     if (normalized === "default" || normalized === "latest") {
-        // If no default is configured, return undefined so the CLI picks its own
-        // built-in default. Avoids passing the literal string "default"/"latest"
-        // as a model name to the CLI.
         return cliInfo.defaultModel;
     }
     const alias = resolveConfiguredAlias(cliInfo, normalized);
@@ -393,7 +381,6 @@ function applyMistralOverrides(info) {
     addEnvModels(info, "MISTRAL_MODELS");
     addEnvAliases(info, "mistral", "MISTRAL_MODEL_ALIASES");
     addGlobalEnvAliases(info, "mistral");
-    // Vibe uses VIBE_ACTIVE_MODEL instead of a CLI flag. Explicit env values win.
     const envDefault = process.env.MISTRAL_DEFAULT_MODEL || process.env.VIBE_ACTIVE_MODEL;
     if (envDefault) {
         const source = process.env.MISTRAL_DEFAULT_MODEL

package/dist/pricing.d.ts CHANGED Viewed

@@ -1,54 +1,8 @@
-/**
- * Per-model pricing for cache-savings estimation.
- *
- * `priced_as_of` is the date these numbers were last refreshed. The
- * gateway's doctor surfaces this so operators can see when the table is
- * stale — pricing is an ESTIMATE, not a billing number.
- *
- * Pricing units: USD per 1M tokens.
- *
- * Anthropic source: <https://platform.claude.com/docs/en/about-claude/pricing>
- *   - Sonnet 4.x / Sonnet 3.5: $3 input / $15 output.
- *   - Opus 4.5+ / Mythos Preview: $15 input / $75 output.
- *   - Opus 4 / 4.1 (deprecated): same as 4.5+.
- *   - Haiku 4.5: $1 input / $5 output.
- *   - Haiku 3.5 (Vertex-only): $0.80 input / $4 output.
- *
- * Cache pricing multipliers (Anthropic):
- *   - cache write 5-min TTL: 1.25× base input.
- *   - cache write 1-hour TTL: 2× base input.
- *   - cache read: 0.10× base input (90% savings).
- *
- * Codex / OpenAI: GPT-5.4 input ~$1.25 / output $10 per 1M (approx; OpenAI
- * does not publish a stable per-CLI table). Cached input ~50% of base.
- *
- * Gemini, Grok, Mistral: pricing varies by model and is not surfaced in
- * gateway today. Returns 0 for unknown.
- */
 export interface PricePerMillion {
     inputUsd: number;
     outputUsd: number;
-    /** Multiplier on inputUsd for a cache HIT (read). Anthropic: 0.10. */
     cacheReadMultiplier: number;
 }
 export declare const PRICING_AS_OF = "2026-05-26";
-/**
- * Look up pricing by (cli, model) name. Best-effort; unknown models return
- * ZEROED pricing so estimated_savings_usd in aggregates falls back to 0
- * rather than throwing OR over-reporting savings on an unpriced model.
- *
- * Recognised model families:
- *   - claude: model name contains "sonnet" | "opus" | "haiku".
- *   - codex: model name contains "gpt-5" or "o3" (current OpenAI families).
- *
- * Anything outside these explicit matches returns ZERO. This is a
- * deliberate conservative choice — we'd rather under-report savings on
- * an unrecognised model than over-report on one whose actual pricing we
- * don't know. Update this table when a new model family ships.
- */
 export declare function getPricing(cli: "claude" | "codex" | "gemini" | "grok" | "mistral", model: string): PricePerMillion;
-/**
- * Estimate USD saved by `cacheReadTokens` being served from cache instead
- * of fresh input. Returns 0 for zero cache reads or unknown pricing.
- */
 export declare function estimateCacheSavingsUsd(cli: "claude" | "codex" | "gemini" | "grok" | "mistral", model: string, cacheReadTokens: number): number;

package/dist/pricing.js CHANGED Viewed

@@ -1,30 +1,3 @@
-/**
- * Per-model pricing for cache-savings estimation.
- *
- * `priced_as_of` is the date these numbers were last refreshed. The
- * gateway's doctor surfaces this so operators can see when the table is
- * stale — pricing is an ESTIMATE, not a billing number.
- *
- * Pricing units: USD per 1M tokens.
- *
- * Anthropic source: <https://platform.claude.com/docs/en/about-claude/pricing>
- *   - Sonnet 4.x / Sonnet 3.5: $3 input / $15 output.
- *   - Opus 4.5+ / Mythos Preview: $15 input / $75 output.
- *   - Opus 4 / 4.1 (deprecated): same as 4.5+.
- *   - Haiku 4.5: $1 input / $5 output.
- *   - Haiku 3.5 (Vertex-only): $0.80 input / $4 output.
- *
- * Cache pricing multipliers (Anthropic):
- *   - cache write 5-min TTL: 1.25× base input.
- *   - cache write 1-hour TTL: 2× base input.
- *   - cache read: 0.10× base input (90% savings).
- *
- * Codex / OpenAI: GPT-5.4 input ~$1.25 / output $10 per 1M (approx; OpenAI
- * does not publish a stable per-CLI table). Cached input ~50% of base.
- *
- * Gemini, Grok, Mistral: pricing varies by model and is not surfaced in
- * gateway today. Returns 0 for unknown.
- */
 export const PRICING_AS_OF = "2026-05-26";
 const ANTHROPIC_SONNET = {
     inputUsd: 3,
@@ -44,7 +17,6 @@ const ANTHROPIC_HAIKU = {
 const OPENAI_GPT5 = {
     inputUsd: 1.25,
     outputUsd: 10,
-    // OpenAI prompt-caching: cached input tokens billed at 50% of base.
     cacheReadMultiplier: 0.5,
 };
 const ZERO = {
@@ -52,20 +24,6 @@ const ZERO = {
     outputUsd: 0,
     cacheReadMultiplier: 0,
 };
-/**
- * Look up pricing by (cli, model) name. Best-effort; unknown models return
- * ZEROED pricing so estimated_savings_usd in aggregates falls back to 0
- * rather than throwing OR over-reporting savings on an unpriced model.
- *
- * Recognised model families:
- *   - claude: model name contains "sonnet" | "opus" | "haiku".
- *   - codex: model name contains "gpt-5" or "o3" (current OpenAI families).
- *
- * Anything outside these explicit matches returns ZERO. This is a
- * deliberate conservative choice — we'd rather under-report savings on
- * an unrecognised model than over-report on one whose actual pricing we
- * don't know. Update this table when a new model family ships.
- */
 export function getPricing(cli, model) {
     const lower = model.toLowerCase();
     if (cli === "claude") {
@@ -84,17 +42,12 @@ export function getPricing(cli, model) {
     }
     return ZERO;
 }
-/**
- * Estimate USD saved by `cacheReadTokens` being served from cache instead
- * of fresh input. Returns 0 for zero cache reads or unknown pricing.
- */
 export function estimateCacheSavingsUsd(cli, model, cacheReadTokens) {
     if (cacheReadTokens <= 0)
         return 0;
     const p = getPricing(cli, model);
     if (p.inputUsd === 0)
         return 0;
-    // Savings = (fresh-input-cost) - (cache-read-cost) = inputUsd × (1 - mult)
     const savedPerToken = (p.inputUsd * (1 - p.cacheReadMultiplier)) / 1_000_000;
     return cacheReadTokens * savedPerToken;
 }

package/dist/process-monitor.d.ts CHANGED Viewed

@@ -1,7 +1,3 @@
-/**
- * On-demand process health monitoring via /proc (Linux).
- * Gracefully degrades on non-Linux platforms.
- */
 import type { Logger } from "./logger.js";
 export interface ProcessHealth {
     pid: number;
@@ -20,25 +16,15 @@ export interface JobHealth {
     isZombie: boolean;
     runningForMs: number;
 }
-/**
- * Parse /proc/[pid]/stat safely.
- * The `comm` field (field 2) is in parentheses and may contain spaces,
- * so we find the LAST ')' and parse remaining fields from there.
- */
 export declare function parseProcStat(content: string): {
     state: string;
     utime: number;
     stime: number;
 } | null;
-/**
- * Parse VmRSS from /proc/[pid]/status.
- * Returns RSS in kilobytes (already in kB in /proc/[pid]/status).
- */
 export declare function parseVmRss(content: string): number | null;
 export declare class ProcessMonitor {
     private prevSamples;
     constructor(_logger?: Logger);
-    /** Clear all cached CPU samples */
     reset(): void;
     sampleProcess(pid: number): ProcessHealth;
     checkJobHealth(jobs: {
@@ -48,6 +34,5 @@ export declare class ProcessMonitor {
         pid: number | null;
         startedAt: string;
     }[]): JobHealth[];
-    /** Clean up stale samples for PIDs that no longer exist */
     cleanupSamples(activePids: Set<number>): void;
 }

package/dist/process-monitor.js CHANGED Viewed

@@ -1,20 +1,10 @@
-/**
- * On-demand process health monitoring via /proc (Linux).
- * Gracefully degrades on non-Linux platforms.
- */
 import { readFileSync } from "fs";
-/**
- * Parse /proc/[pid]/stat safely.
- * The `comm` field (field 2) is in parentheses and may contain spaces,
- * so we find the LAST ')' and parse remaining fields from there.
- */
 export function parseProcStat(content) {
     const lastParen = content.lastIndexOf(")");
     if (lastParen === -1)
         return null;
-    const afterComm = content.slice(lastParen + 2); // skip ") "
+    const afterComm = content.slice(lastParen + 2);
     const fields = afterComm.split(" ");
-    // fields[0] = state, fields[11] = utime (14-3), fields[12] = stime (15-3)
     if (fields.length < 13)
         return null;
     const utime = parseInt(fields[11], 10);
@@ -27,22 +17,14 @@ export function parseProcStat(content) {
         stime,
     };
 }
-/**
- * Parse VmRSS from /proc/[pid]/status.
- * Returns RSS in kilobytes (already in kB in /proc/[pid]/status).
- */
 export function parseVmRss(content) {
     const match = content.match(/^VmRSS:\s+(\d+)\s+kB$/m);
     return match ? parseInt(match[1], 10) : null;
 }
-/**
- * Read total system CPU jiffies from /proc/stat.
- * Used to normalize per-process CPU into a percentage.
- */
 function getTotalCpuJiffies() {
     try {
         const content = readFileSync("/proc/stat", "utf-8");
-        const cpuLine = content.split("\n")[0]; // "cpu  user nice system idle ..."
+        const cpuLine = content.split("\n")[0];
         const fields = cpuLine.split(/\s+/).slice(1).map(Number);
         return fields.reduce((a, b) => a + b, 0);
     }
@@ -51,16 +33,13 @@ function getTotalCpuJiffies() {
     }
 }
 export class ProcessMonitor {
-    // Previous samples for CPU delta calculation
     prevSamples = new Map();
     constructor(_logger) { }
-    /** Clear all cached CPU samples */
     reset() {
         this.prevSamples.clear();
     }
     sampleProcess(pid) {
         const now = new Date().toISOString();
-        // 1. Existence check
         let alive = false;
         try {
             process.kill(pid, 0);
@@ -77,12 +56,10 @@ export class ProcessMonitor {
                     sampledAt: now,
                 };
             }
-            // EPERM = process exists but we can't signal it
             if (err.code === "EPERM") {
                 alive = true;
             }
         }
-        // 2. Parse /proc/[pid]/stat for state + CPU ticks
         let state = null;
         let cpuPercent = null;
         try {
@@ -90,7 +67,6 @@ export class ProcessMonitor {
             const parsed = parseProcStat(statContent);
             if (parsed) {
                 state = parsed.state;
-                // CPU delta calculation
                 const totalJiffies = getTotalCpuJiffies();
                 const prev = this.prevSamples.get(pid);
                 if (prev && totalJiffies !== null) {
@@ -100,7 +76,6 @@ export class ProcessMonitor {
                         cpuPercent = (processJiffiesDelta / totalJiffiesDelta) * 100;
                     }
                 }
-                // Store for next delta
                 if (totalJiffies !== null) {
                     this.prevSamples.set(pid, {
                         utime: parsed.utime,
@@ -112,16 +87,13 @@ export class ProcessMonitor {
             }
         }
         catch {
-            // /proc not available (non-Linux) — degrade gracefully
         }
-        // 3. Parse /proc/[pid]/status for VmRSS
         let memoryRssKb = null;
         try {
             const statusContent = readFileSync(`/proc/${pid}/status`, "utf-8");
             memoryRssKb = parseVmRss(statusContent);
         }
         catch {
-            // Non-Linux or process exited between checks
         }
         return { pid, alive, state, cpuPercent, memoryRssKb, sampledAt: now };
     }
@@ -151,7 +123,6 @@ export class ProcessMonitor {
             };
         });
     }
-    /** Clean up stale samples for PIDs that no longer exist */
     cleanupSamples(activePids) {
         for (const pid of this.prevSamples.keys()) {
             if (!activePids.has(pid)) {

package/dist/prompt-parts.d.ts CHANGED Viewed

@@ -9,20 +9,6 @@ export interface PromptParts {
     tools?: string;
     context?: string;
     task: string;
-    /**
-     * Slice κ (Claude only): per-block opt-in to Anthropic `cache_control`
-     * breakpoints. Setting `system: true` (or tools/context) marks that
-     * block with `cache_control: {type:"ephemeral", ttl:"1h"}` in the
-     * stream-json payload the gateway pipes to `claude --input-format
-     * stream-json`. The `task` block is NEVER marked (it's the volatile
-     * tail). Empty parts are silently skipped even if their flag is true.
-     *
-     * Constraint: callers MUST also pass `outputFormat:"stream-json"` —
-     * mixing cacheControl with text/json output returns an error response.
-     * `ttl` is hard-coded to `"1h"` because Claude Code injects its own
-     * 1h-marked system blocks ahead of caller content and Anthropic
-     * rejects a 1h block after a 5m block.
-     */
     cacheControl?: PromptPartsCacheControl;
 }
 export declare const PromptPartsSchema: z.ZodObject<{
@@ -35,32 +21,32 @@ export declare const PromptPartsSchema: z.ZodObject<{
         tools: z.ZodOptional<z.ZodBoolean>;
         context: z.ZodOptional<z.ZodBoolean>;
     }, "strict", z.ZodTypeAny, {
-        system?: boolean | undefined;
         tools?: boolean | undefined;
+        system?: boolean | undefined;
         context?: boolean | undefined;
     }, {
-        system?: boolean | undefined;
         tools?: boolean | undefined;
+        system?: boolean | undefined;
         context?: boolean | undefined;
     }>>;
 }, "strip", z.ZodTypeAny, {
     task: string;
-    system?: string | undefined;
     tools?: string | undefined;
+    system?: string | undefined;
     context?: string | undefined;
     cacheControl?: {
-        system?: boolean | undefined;
         tools?: boolean | undefined;
+        system?: boolean | undefined;
         context?: boolean | undefined;
     } | undefined;
 }, {
     task: string;
-    system?: string | undefined;
     tools?: string | undefined;
+    system?: string | undefined;
     context?: string | undefined;
     cacheControl?: {
-        system?: boolean | undefined;
         tools?: boolean | undefined;
+        system?: boolean | undefined;
         context?: boolean | undefined;
     } | undefined;
 }>;
@@ -98,15 +84,4 @@ export interface AssembleClaudeCacheBlocksResult {
     payload: ClaudeStreamJsonUserMessage;
     markedBlockCount: number;
 }
-/**
- * Slice κ: build the Claude `--input-format stream-json` payload from
- * a `PromptParts`. Each non-empty part becomes one content block in
- * `system → tools → context → task` order; parts whose name is `true`
- * in `cacheControl` get `cache_control: {type:"ephemeral", ttl:"1h"}`.
- *
- * Empty parts are skipped (no zero-byte blocks) — a true flag on an
- * empty part is silently a no-op and not counted in `markedBlockCount`.
- * The `task` block is never marked, even if a caller accidentally
- * tries (the schema doesn't expose `task` in `cacheControl`).
- */
 export declare function assembleClaudeCacheBlocks(parts: PromptParts): AssembleClaudeCacheBlocksResult;

package/dist/prompt-parts.js CHANGED Viewed

@@ -48,17 +48,6 @@ export function resolvePromptInput(input) {
         stablePrefixTokens: null,
     };
 }
-/**
- * Slice κ: build the Claude `--input-format stream-json` payload from
- * a `PromptParts`. Each non-empty part becomes one content block in
- * `system → tools → context → task` order; parts whose name is `true`
- * in `cacheControl` get `cache_control: {type:"ephemeral", ttl:"1h"}`.
- *
- * Empty parts are skipped (no zero-byte blocks) — a true flag on an
- * empty part is silently a no-op and not counted in `markedBlockCount`.
- * The `task` block is never marked, even if a caller accidentally
- * tries (the schema doesn't expose `task` in `cacheControl`).
- */
 export function assembleClaudeCacheBlocks(parts) {
     const blocks = [];
     let markedBlockCount = 0;

package/dist/provider-status.d.ts CHANGED Viewed

@@ -30,12 +30,4 @@ export interface GeminiAuthStatus {
     status: "present" | "not_found";
     methods: GeminiAuthMethods;
 }
-/**
- * U27: Detect Gemini auth across all supported methods.
- * Returns "present" if ANY of:
- *   - OAuth credential file present (~/.gemini/oauth_creds.json, etc.)
- *   - GEMINI_API_KEY env var set and non-empty
- *   - GOOGLE_API_KEY env var set and non-empty
- *   - GOOGLE_CLOUD_PROJECT set AND GOOGLE_GENAI_USE_VERTEXAI=true
- */
 export declare function geminiAuthStatus(env?: NodeJS.ProcessEnv, home?: string): GeminiAuthStatus;

package/dist/provider-status.js CHANGED Viewed

@@ -12,8 +12,6 @@ const VERSION_ARGS = {
     grok: ["--version"],
     mistral: ["--version"],
 };
-// Mistral Vibe ships as the `vibe` binary (PyPI package mistral-vibe); the gateway
-// uses `mistral` as the provider key but invokes `vibe` on the shell.
 export const PROVIDER_COMMANDS = {
     claude: "claude",
     codex: "codex",
@@ -130,7 +128,6 @@ function inferLoginStatus(provider, exitCode, output) {
                 return "not_authenticated";
         }
         catch {
-            // Fall through to text heuristics.
         }
     }
     if (/not\s+(logged|signed|authenticated)\s*in|unauthenticated|login required|not authorized/i.test(output)) {
@@ -158,14 +155,6 @@ function loginCheckDetail(provider, status, exitCode) {
         return `${provider} login check exited non-zero without exposing credential material.`;
     return `${provider} login check completed, but the output did not clearly indicate login state.`;
 }
-/**
- * U27: Detect Gemini auth across all supported methods.
- * Returns "present" if ANY of:
- *   - OAuth credential file present (~/.gemini/oauth_creds.json, etc.)
- *   - GEMINI_API_KEY env var set and non-empty
- *   - GOOGLE_API_KEY env var set and non-empty
- *   - GOOGLE_CLOUD_PROJECT set AND GOOGLE_GENAI_USE_VERTEXAI=true
- */
 export function geminiAuthStatus(env = process.env, home = homedir()) {
     const candidates = [
         join(home, ".gemini", "oauth_creds.json"),