npm - llm-cli-gateway - Versions diffs - 1.5.35 → 1.6.1 - Mend

llm-cli-gateway 1.5.35 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/CHANGELOG.md +201 -0
package/README.md +35 -4
package/dist/cache-stats.d.ts +112 -0
package/dist/cache-stats.js +225 -0
package/dist/config.d.ts +41 -0
package/dist/config.js +109 -0
package/dist/doctor.d.ts +42 -1
package/dist/doctor.js +121 -2
package/dist/flight-recorder.d.ts +27 -0
package/dist/flight-recorder.js +79 -2
package/dist/index.d.ts +46 -9
package/dist/index.js +395 -67
package/dist/pricing.d.ts +54 -0
package/dist/pricing.js +100 -0
package/dist/prompt-parts.d.ts +38 -0
package/dist/prompt-parts.js +42 -0
package/dist/resources.d.ts +32 -1
package/dist/resources.js +52 -1
package/package.json +2 -1
package/setup/status.schema.json +39 -0
package/socket.yml +29 -0

package/dist/pricing.d.ts ADDED Viewed

@@ -0,0 +1,54 @@
+/**
+ * Per-model pricing for cache-savings estimation.
+ *
+ * `priced_as_of` is the date these numbers were last refreshed. The
+ * gateway's doctor surfaces this so operators can see when the table is
+ * stale — pricing is an ESTIMATE, not a billing number.
+ *
+ * Pricing units: USD per 1M tokens.
+ *
+ * Anthropic source: <https://platform.claude.com/docs/en/about-claude/pricing>
+ *   - Sonnet 4.x / Sonnet 3.5: $3 input / $15 output.
+ *   - Opus 4.5+ / Mythos Preview: $15 input / $75 output.
+ *   - Opus 4 / 4.1 (deprecated): same as 4.5+.
+ *   - Haiku 4.5: $1 input / $5 output.
+ *   - Haiku 3.5 (Vertex-only): $0.80 input / $4 output.
+ *
+ * Cache pricing multipliers (Anthropic):
+ *   - cache write 5-min TTL: 1.25× base input.
+ *   - cache write 1-hour TTL: 2× base input.
+ *   - cache read: 0.10× base input (90% savings).
+ *
+ * Codex / OpenAI: GPT-5.4 input ~$1.25 / output $10 per 1M (approx; OpenAI
+ * does not publish a stable per-CLI table). Cached input ~50% of base.
+ *
+ * Gemini, Grok, Mistral: pricing varies by model and is not surfaced in
+ * gateway today. Returns 0 for unknown.
+ */
+export interface PricePerMillion {
+    inputUsd: number;
+    outputUsd: number;
+    /** Multiplier on inputUsd for a cache HIT (read). Anthropic: 0.10. */
+    cacheReadMultiplier: number;
+}
+export declare const PRICING_AS_OF = "2026-05-26";
+/**
+ * Look up pricing by (cli, model) name. Best-effort; unknown models return
+ * ZEROED pricing so estimated_savings_usd in aggregates falls back to 0
+ * rather than throwing OR over-reporting savings on an unpriced model.
+ *
+ * Recognised model families:
+ *   - claude: model name contains "sonnet" | "opus" | "haiku".
+ *   - codex: model name contains "gpt-5" or "o3" (current OpenAI families).
+ *
+ * Anything outside these explicit matches returns ZERO. This is a
+ * deliberate conservative choice — we'd rather under-report savings on
+ * an unrecognised model than over-report on one whose actual pricing we
+ * don't know. Update this table when a new model family ships.
+ */
+export declare function getPricing(cli: "claude" | "codex" | "gemini" | "grok" | "mistral", model: string): PricePerMillion;
+/**
+ * Estimate USD saved by `cacheReadTokens` being served from cache instead
+ * of fresh input. Returns 0 for zero cache reads or unknown pricing.
+ */
+export declare function estimateCacheSavingsUsd(cli: "claude" | "codex" | "gemini" | "grok" | "mistral", model: string, cacheReadTokens: number): number;

package/dist/pricing.js ADDED Viewed

@@ -0,0 +1,100 @@
+/**
+ * Per-model pricing for cache-savings estimation.
+ *
+ * `priced_as_of` is the date these numbers were last refreshed. The
+ * gateway's doctor surfaces this so operators can see when the table is
+ * stale — pricing is an ESTIMATE, not a billing number.
+ *
+ * Pricing units: USD per 1M tokens.
+ *
+ * Anthropic source: <https://platform.claude.com/docs/en/about-claude/pricing>
+ *   - Sonnet 4.x / Sonnet 3.5: $3 input / $15 output.
+ *   - Opus 4.5+ / Mythos Preview: $15 input / $75 output.
+ *   - Opus 4 / 4.1 (deprecated): same as 4.5+.
+ *   - Haiku 4.5: $1 input / $5 output.
+ *   - Haiku 3.5 (Vertex-only): $0.80 input / $4 output.
+ *
+ * Cache pricing multipliers (Anthropic):
+ *   - cache write 5-min TTL: 1.25× base input.
+ *   - cache write 1-hour TTL: 2× base input.
+ *   - cache read: 0.10× base input (90% savings).
+ *
+ * Codex / OpenAI: GPT-5.4 input ~$1.25 / output $10 per 1M (approx; OpenAI
+ * does not publish a stable per-CLI table). Cached input ~50% of base.
+ *
+ * Gemini, Grok, Mistral: pricing varies by model and is not surfaced in
+ * gateway today. Returns 0 for unknown.
+ */
+export const PRICING_AS_OF = "2026-05-26";
+const ANTHROPIC_SONNET = {
+    inputUsd: 3,
+    outputUsd: 15,
+    cacheReadMultiplier: 0.1,
+};
+const ANTHROPIC_OPUS = {
+    inputUsd: 15,
+    outputUsd: 75,
+    cacheReadMultiplier: 0.1,
+};
+const ANTHROPIC_HAIKU = {
+    inputUsd: 1,
+    outputUsd: 5,
+    cacheReadMultiplier: 0.1,
+};
+const OPENAI_GPT5 = {
+    inputUsd: 1.25,
+    outputUsd: 10,
+    // OpenAI prompt-caching: cached input tokens billed at 50% of base.
+    cacheReadMultiplier: 0.5,
+};
+const ZERO = {
+    inputUsd: 0,
+    outputUsd: 0,
+    cacheReadMultiplier: 0,
+};
+/**
+ * Look up pricing by (cli, model) name. Best-effort; unknown models return
+ * ZEROED pricing so estimated_savings_usd in aggregates falls back to 0
+ * rather than throwing OR over-reporting savings on an unpriced model.
+ *
+ * Recognised model families:
+ *   - claude: model name contains "sonnet" | "opus" | "haiku".
+ *   - codex: model name contains "gpt-5" or "o3" (current OpenAI families).
+ *
+ * Anything outside these explicit matches returns ZERO. This is a
+ * deliberate conservative choice — we'd rather under-report savings on
+ * an unrecognised model than over-report on one whose actual pricing we
+ * don't know. Update this table when a new model family ships.
+ */
+export function getPricing(cli, model) {
+    const lower = model.toLowerCase();
+    if (cli === "claude") {
+        if (lower.includes("sonnet"))
+            return ANTHROPIC_SONNET;
+        if (lower.includes("opus"))
+            return ANTHROPIC_OPUS;
+        if (lower.includes("haiku"))
+            return ANTHROPIC_HAIKU;
+        return ZERO;
+    }
+    if (cli === "codex") {
+        if (lower.includes("gpt-5") || lower.includes("o3"))
+            return OPENAI_GPT5;
+        return ZERO;
+    }
+    return ZERO;
+}
+/**
+ * Estimate USD saved by `cacheReadTokens` being served from cache instead
+ * of fresh input. Returns 0 for zero cache reads or unknown pricing.
+ */
+export function estimateCacheSavingsUsd(cli, model, cacheReadTokens) {
+    if (cacheReadTokens <= 0)
+        return 0;
+    const p = getPricing(cli, model);
+    if (p.inputUsd === 0)
+        return 0;
+    // Savings = (fresh-input-cost) - (cache-read-cost) = inputUsd × (1 - mult)
+    const savedPerToken = (p.inputUsd * (1 - p.cacheReadMultiplier)) / 1_000_000;
+    return cacheReadTokens * savedPerToken;
+}

package/dist/prompt-parts.d.ts ADDED Viewed

@@ -0,0 +1,38 @@
+import { z } from "zod";
+export interface PromptParts {
+    system?: string;
+    tools?: string;
+    context?: string;
+    task: string;
+}
+export declare const PromptPartsSchema: z.ZodObject<{
+    system: z.ZodOptional<z.ZodString>;
+    tools: z.ZodOptional<z.ZodString>;
+    context: z.ZodOptional<z.ZodString>;
+    task: z.ZodString;
+}, "strip", z.ZodTypeAny, {
+    task: string;
+    system?: string | undefined;
+    tools?: string | undefined;
+    context?: string | undefined;
+}, {
+    task: string;
+    system?: string | undefined;
+    tools?: string | undefined;
+    context?: string | undefined;
+}>;
+export interface AssembleResult {
+    text: string;
+    stableByteEnd: number;
+}
+export declare function assemble(parts: PromptParts): AssembleResult;
+export interface ResolvedPromptInput {
+    assembledPrompt: string;
+    stablePrefixHash: string | null;
+    stablePrefixTokens: number | null;
+}
+export interface ResolvePromptInputArgs {
+    prompt?: string;
+    promptParts?: PromptParts;
+}
+export declare function resolvePromptInput(input: ResolvePromptInputArgs): ResolvedPromptInput;

package/dist/prompt-parts.js ADDED Viewed

@@ -0,0 +1,42 @@
+import { createHash } from "crypto";
+import { z } from "zod";
+export const PromptPartsSchema = z.object({
+    system: z.string().optional(),
+    tools: z.string().optional(),
+    context: z.string().optional(),
+    task: z.string().min(1),
+});
+const SEPARATOR = "\n\n";
+export function assemble(parts) {
+    const stableSegments = [];
+    if (parts.system && parts.system.length > 0)
+        stableSegments.push(parts.system);
+    if (parts.tools && parts.tools.length > 0)
+        stableSegments.push(parts.tools);
+    if (parts.context && parts.context.length > 0)
+        stableSegments.push(parts.context);
+    const stableText = stableSegments.join(SEPARATOR);
+    const stableByteEnd = Buffer.byteLength(stableText, "utf8");
+    const text = stableText.length > 0 ? `${stableText}${SEPARATOR}${parts.task}` : parts.task;
+    return { text, stableByteEnd };
+}
+export function resolvePromptInput(input) {
+    if (input.promptParts !== undefined) {
+        const assembled = assemble(input.promptParts);
+        const stableBytes = Buffer.from(assembled.text, "utf8").subarray(0, assembled.stableByteEnd);
+        const hash = assembled.stableByteEnd > 0
+            ? createHash("sha256").update(stableBytes).digest("hex")
+            : createHash("sha256").update("").digest("hex");
+        const tokens = Math.ceil(assembled.stableByteEnd / 4);
+        return {
+            assembledPrompt: assembled.text,
+            stablePrefixHash: hash,
+            stablePrefixTokens: tokens,
+        };
+    }
+    return {
+        assembledPrompt: input.prompt ?? "",
+        stablePrefixHash: null,
+        stablePrefixTokens: null,
+    };
+}

package/dist/resources.d.ts CHANGED Viewed

@@ -1,5 +1,8 @@
 import { ISessionManager } from "./session-manager.js";
 import { PerformanceMetrics } from "./metrics.js";
+import { FlightRecorderQuery } from "./flight-recorder.js";
+import { type GlobalCacheStats, type PrefixCacheStats, type SessionCacheStats } from "./cache-stats.js";
+import type { CacheAwarenessConfig } from "./config.js";
 export interface ResourceDefinition {
     uri: string;
     name: string;
@@ -20,7 +23,35 @@ export interface ResourceContents {
 export declare class ResourceProvider {
     private sessionManager;
     private performanceMetrics;
-    constructor(sessionManager: ISessionManager, performanceMetrics: PerformanceMetrics);
+    private flightRecorder;
+    private cacheAwareness;
+    constructor(sessionManager: ISessionManager, performanceMetrics: PerformanceMetrics, flightRecorder?: FlightRecorderQuery, cacheAwareness?: CacheAwarenessConfig | null);
+    /** Read-only flight-recorder accessor for cache-state resource readers. */
+    getFlightRecorderQuery(): FlightRecorderQuery;
+    /**
+     * cache_state://global — aggregates across the entire flight recorder.
+     * Optionally restrict to a recent window via `lastNHours`. Returns
+     * tokens/hashes/aggregates ONLY — no prompt text fields. The redaction is
+     * structural: the response shape (GlobalCacheStats) has no `prompt`,
+     * `response`, `system`, or `task` field by construction.
+     */
+    readCacheStateGlobal(opts?: {
+        lastNHours?: number;
+    }): GlobalCacheStats;
+    /**
+     * cache_state://session/{sessionId} — per-session aggregates. Returns
+     * empty defaults when the session has no rows. Token/hash fields only.
+     *
+     * Slice 3: populates `ttlRemainingMs` by applying the configured TTL
+     * policy. Null for non-claude sessions or when the gateway has no
+     * cache-awareness config loaded (defaults to 5-min policy).
+     */
+    readCacheStateSession(sessionId: string): SessionCacheStats;
+    /**
+     * cache_state://prefix/{hash} — per-stable-prefix-hash aggregates.
+     * Returns empty defaults for unknown hashes. Token/hash fields only.
+     */
+    readCacheStateForPrefix(stablePrefixHash: string): PrefixCacheStats;
     listResources(): ResourceDefinition[];
     readResource(uri: string): Promise<ResourceContents | null>;
 }

package/dist/resources.js CHANGED Viewed

@@ -1,10 +1,61 @@
 import { getAvailableCliInfo } from "./model-registry.js";
+import { computeGlobalCacheStats, computePrefixCacheStats, computeSessionCacheStats, computeTtlRemaining, } from "./cache-stats.js";
 export class ResourceProvider {
     sessionManager;
     performanceMetrics;
-    constructor(sessionManager, performanceMetrics) {
+    flightRecorder;
+    cacheAwareness;
+    constructor(sessionManager, performanceMetrics,
+    // Optional read access to the flight recorder. Used by cache-state
+    // resources (slice 2). Falls back to a stub returning [] when not
+    // injected so existing call sites continue to work without changes.
+    flightRecorder = { queryRequests: () => [] },
+    // Slice 3: optional cache-awareness config. When present, drives the
+    // TTL policy applied to ttlRemainingMs on session-scoped reads.
+    // When absent, the default Anthropic 5-min TTL applies (matches the
+    // 1.x default of `[cache_awareness].anthropic_ttl_seconds = 300`).
+    cacheAwareness = null) {
         this.sessionManager = sessionManager;
         this.performanceMetrics = performanceMetrics;
+        this.flightRecorder = flightRecorder;
+        this.cacheAwareness = cacheAwareness;
+    }
+    /** Read-only flight-recorder accessor for cache-state resource readers. */
+    getFlightRecorderQuery() {
+        return this.flightRecorder;
+    }
+    /**
+     * cache_state://global — aggregates across the entire flight recorder.
+     * Optionally restrict to a recent window via `lastNHours`. Returns
+     * tokens/hashes/aggregates ONLY — no prompt text fields. The redaction is
+     * structural: the response shape (GlobalCacheStats) has no `prompt`,
+     * `response`, `system`, or `task` field by construction.
+     */
+    readCacheStateGlobal(opts = {}) {
+        return computeGlobalCacheStats(this.flightRecorder, opts);
+    }
+    /**
+     * cache_state://session/{sessionId} — per-session aggregates. Returns
+     * empty defaults when the session has no rows. Token/hash fields only.
+     *
+     * Slice 3: populates `ttlRemainingMs` by applying the configured TTL
+     * policy. Null for non-claude sessions or when the gateway has no
+     * cache-awareness config loaded (defaults to 5-min policy).
+     */
+    readCacheStateSession(sessionId) {
+        const stats = computeSessionCacheStats(this.flightRecorder, sessionId);
+        const ttlSeconds = this.cacheAwareness?.anthropicTtlSeconds ?? 300;
+        stats.ttlRemainingMs = computeTtlRemaining(stats, stats.cli, {
+            anthropicTtlSeconds: ttlSeconds,
+        });
+        return stats;
+    }
+    /**
+     * cache_state://prefix/{hash} — per-stable-prefix-hash aggregates.
+     * Returns empty defaults for unknown hashes. Token/hash fields only.
+     */
+    readCacheStateForPrefix(stablePrefixHash) {
+        return computePrefixCacheStats(this.flightRecorder, stablePrefixHash);
     }
     // List all available resources
     listResources() {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "llm-cli-gateway",
-  "version": "1.5.35",
+  "version": "1.6.1",
   "mcpName": "io.github.verivus-oss/llm-cli-gateway",
   "description": "MCP server providing unified access to Claude Code, Codex, Gemini, Grok, and Mistral Vibe CLIs with session management, retry logic, async job orchestration, durable job results, and cross-LLM validation.",
   "license": "MIT",
@@ -109,6 +109,7 @@
     "@vitest/coverage-v8": "^4.1.2",
     "eslint": "^8.57.1",
     "eslint-config-prettier": "^9.0.0",
+    "eslint-plugin-security": "^3.0.1",
     "ioredis": "5.9.2",
     "pg": "^8.12.0",
     "prettier": "^3.0.0",

package/setup/status.schema.json CHANGED Viewed

@@ -14,6 +14,7 @@
     "providers",
     "endpoint_exposure",
     "client_config",
+    "cache_awareness",
     "next_actions"
   ],
   "properties": {
@@ -263,6 +264,44 @@
       },
       "additionalProperties": false
     },
+    "cache_awareness": {
+      "type": "object",
+      "required": ["enabled_features", "last_24h", "per_cli"],
+      "properties": {
+        "enabled_features": {
+          "type": "array",
+          "items": {
+            "type": "string",
+            "enum": ["anthropic_cache_control", "ttl_warnings"]
+          }
+        },
+        "last_24h": {
+          "type": "object",
+          "required": ["hit_rate", "total_hits", "total_requests", "estimated_savings_usd"],
+          "properties": {
+            "hit_rate": { "type": "number" },
+            "total_hits": { "type": "integer" },
+            "total_requests": { "type": "integer" },
+            "estimated_savings_usd": { "type": "number" }
+          },
+          "additionalProperties": false
+        },
+        "per_cli": {
+          "type": "object",
+          "additionalProperties": {
+            "type": "object",
+            "required": ["hit_rate", "total_hits", "total_cache_read_tokens"],
+            "properties": {
+              "hit_rate": { "type": "number" },
+              "total_hits": { "type": "integer" },
+              "total_cache_read_tokens": { "type": "integer" }
+            },
+            "additionalProperties": false
+          }
+        }
+      },
+      "additionalProperties": false
+    },
     "next_actions": {
       "type": "array",
       "items": { "type": "string" }

package/socket.yml CHANGED Viewed

@@ -14,6 +14,25 @@ version: 2
 #     src/endpoint-exposure.ts also issues a HEAD probe when verifying
 #     tunnel reachability — opt-in via the start:http entry point only.
 #
+#     Additionally, Socket may flag `dist/index.js` and `dist/job-store.js`
+#     against the `globalThis["fetch"]` rule. This is a substring-match
+#     false positive (verified for v1.6.0 by sub-agent investigation on
+#     2026-05-26; same matches exist in v1.5.35). Neither file contains
+#     any `fetch(`, `globalThis.fetch`, polyfill import, or any other
+#     network-call construct. The matches are:
+#       - dist/index.js — the English word "fetch" inside an async-defer
+#         error message ("Poll with llm_job_status, fetch with
+#         llm_job_result.") AND the JSON field name `fetchWith:
+#         "llm_job_result"` (part of the deferred-job response contract).
+#       - dist/job-store.js — the word "fetch" inside a code comment on
+#         markOrphanedOnStartup() describing how callers retrieve partial
+#         output from SQLite.
+#     Verify with: `grep -rEn "\bfetch\(|globalThis\.fetch|globalThis\[" dist/`
+#     — returns empty. Production code does not import undici / node-fetch
+#     / axios / got. The cache-awareness slice (v1.6.0) introduced zero
+#     new network surfaces; all I/O is filesystem (SQLite, sessions.json)
+#     or in-process.
+#
 #   shellAccess
 #     src/executor.ts uses child_process.spawn(cmd, args, { ... }) with a
 #     fixed allow-list of CLI binaries (claude / codex / gemini / grok /
@@ -33,6 +52,16 @@ version: 2
 #     gateway does not call db.pragma() from production code; SQLite setup
 #     uses fixed literal db.exec("PRAGMA ...") statements, and the release
 #     security audit fails future production `.pragma()` calls.
+#
+#   ioredis obfuscated code / base64 strings
+#     Socket may flag ioredis@5.10.1 built/constants/TLSProfiles.js because it
+#     contains base64-looking strings. This is a reviewed false positive: the
+#     strings are PEM-encoded Redis Cloud TLS CA certificates. The file exports
+#     static TLS profile data only; it contains no decoder loop, dynamic eval,
+#     network call, or hidden execution path. The same file is byte-for-byte
+#     identical in ioredis@5.9.2. ioredis is not installed by the default
+#     production dependency tree; it is an optional peer for PostgreSQL/Redis
+#     session storage and a pinned dev dependency for tests.
 issueRules:
   # Defaults from Socket. Listed explicitly so future contributors see what