npm - llm-cli-gateway - Versions diffs - 1.17.4 → 1.17.5 - Mend

llm-cli-gateway 1.17.4 → 1.17.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/CHANGELOG.md +15 -0
package/README.md +1 -1
package/dist/approval-manager.js +0 -8
package/dist/async-job-manager.d.ts +0 -113
package/dist/async-job-manager.js +6 -124
package/dist/cache-stats.d.ts +0 -89
package/dist/cache-stats.js +0 -62
package/dist/claude-mcp-config.js +0 -1
package/dist/cli-updater.d.ts +0 -8
package/dist/cli-updater.js +0 -12
package/dist/codex-json-parser.d.ts +0 -20
package/dist/codex-json-parser.js +0 -21
package/dist/config.d.ts +0 -31
package/dist/config.js +2 -72
package/dist/db.d.ts +0 -18
package/dist/db.js +0 -22
package/dist/doctor.d.ts +0 -49
package/dist/doctor.js +0 -47
package/dist/endpoint-exposure.js +0 -1
package/dist/executor.d.ts +0 -19
package/dist/executor.js +3 -38
package/dist/flight-recorder.d.ts +0 -26
package/dist/flight-recorder.js +1 -70
package/dist/gemini-json-parser.d.ts +0 -25
package/dist/gemini-json-parser.js +0 -28
package/dist/health.d.ts +0 -3
package/dist/health.js +0 -3
package/dist/index.d.ts +1 -221
package/dist/index.js +14 -563
package/dist/job-store.d.ts +0 -74
package/dist/job-store.js +1 -73
package/dist/logger.d.ts +0 -7
package/dist/logger.js +0 -6
package/dist/migrate-sessions.d.ts +0 -3
package/dist/migrate-sessions.js +0 -16
package/dist/migrate.js +1 -18
package/dist/mistral-meta-json-parser.js +0 -67
package/dist/model-registry.js +0 -13
package/dist/pricing.d.ts +0 -46
package/dist/pricing.js +0 -47
package/dist/process-monitor.d.ts +0 -15
package/dist/process-monitor.js +2 -31
package/dist/prompt-parts.d.ts +0 -25
package/dist/prompt-parts.js +0 -11
package/dist/provider-status.d.ts +0 -8
package/dist/provider-status.js +0 -11
package/dist/request-helpers.d.ts +0 -334
package/dist/request-helpers.js +1 -229
package/dist/resources.d.ts +0 -20
package/dist/resources.js +1 -34
package/dist/retry.d.ts +0 -45
package/dist/retry.js +3 -40
package/dist/session-manager-pg.d.ts +0 -32
package/dist/session-manager-pg.js +0 -32
package/dist/session-manager.d.ts +0 -21
package/dist/session-manager.js +1 -15
package/dist/stream-json-parser.d.ts +0 -18
package/dist/stream-json-parser.js +0 -22
package/dist/upstream-contracts.d.ts +0 -55
package/dist/upstream-contracts.js +0 -77
package/dist/validation-orchestrator.js +0 -3
package/dist/worktree-manager.d.ts +0 -9
package/dist/worktree-manager.js +0 -21
package/package.json +1 -1

package/dist/cache-stats.d.ts CHANGED Viewed

@@ -1,44 +1,16 @@
-/**
- * Cache observability aggregates.
- *
- * Pure read-only aggregation over the FlightRecorder's `requests` table.
- * No new storage — every value is computed at query time from existing
- * columns (`cache_read_tokens`, `cache_creation_tokens`, `stable_prefix_*`,
- * `datetime_utc`, etc.).
- *
- * COALESCE / NULL handling: rows from before the v3 migration have NULL
- * for stable_prefix_*. Rows from CLIs whose parser does not surface cache
- * tokens (gemini, grok, mistral, and codex until its parser is fixed)
- * have NULL for cache_read_tokens / cache_creation_tokens. All aggregates
- * tolerate NULL via COALESCE(col, 0) — never divides by zero.
- */
 import type { FlightRecorderQuery } from "./flight-recorder.js";
 export type CacheStatsCli = "claude" | "codex" | "gemini" | "grok" | "mistral";
 export interface SessionCacheStats {
     sessionId: string;
     cli: CacheStatsCli | null;
-    /** Total cache_read_tokens across all rows in this session. */
     totalCacheReadTokens: number;
-    /** Total cache_creation_tokens across all rows in this session. */
     totalCacheCreationTokens: number;
-    /** Number of rows in this session. */
     requestCount: number;
-    /** Number of rows where cache_read_tokens > 0. */
     hitCount: number;
-    /** hitCount / requestCount (0 when requestCount = 0). */
     hitRate: number;
-    /** Distinct stable_prefix_hash values seen in this session. */
     distinctPrefixCount: number;
-    /** Last time any row in this session was written (datetime_utc max). ISO string or null. */
     lastRequestAt: string | null;
-    /** Estimated USD saved by cache reads in this session (best-effort). */
     estimatedSavingsUsd: number;
-    /**
-     * Slice 3: best-effort remaining TTL on the Anthropic cache breakpoint
-     * established at lastRequestAt. Null for non-claude CLIs (we have no
-     * read on their cache state) and null when lastRequestAt is null.
-     * Computed by computeTtlRemaining(); see ttlPolicy parameter.
-     */
     ttlRemainingMs: number | null;
 }
 export interface PrefixCacheStats {
@@ -48,7 +20,6 @@ export interface PrefixCacheStats {
     hitRate: number;
     totalCacheReadTokens: number;
     totalCacheCreationTokens: number;
-    /** Distinct CLI x model combos that hashed to this prefix. */
     cliBreakdown: Array<{
         cli: CacheStatsCli;
         model: string;
@@ -59,7 +30,6 @@ export interface PrefixCacheStats {
     estimatedSavingsUsd: number;
 }
 export interface GlobalCacheStats {
-    /** Optional window: rows since (now - lastNHours * 3600s). */
     windowHours: number | null;
     totalRequests: number;
     totalHits: number;
@@ -76,27 +46,6 @@ export interface GlobalCacheStats {
         estimatedSavingsUsd: number;
     }>;
     estimatedSavingsUsd: number;
-    /**
-     * Rec #3 (slice κ): derived metrics that distinguish gateway-driven
-     * κ-explicit `cache_control` breakpoints from Claude Code's
-     * own baseline cache reads.
-     *
-     * - explicitCacheControlRows: rows where the gateway emitted at
-     *   least one `cache_control` marker (`cache_control_blocks > 0`).
-     * - explicitCacheControlHits: those rows whose `cache_read_tokens
-     *   > 0` — closest signal we have to "the caller's marked block
-     *   actually hit Anthropic's cache" (still includes Claude Code's
-     *   baseline cache reads on top, which is unavoidable without
-     *   per-block token accounting from Anthropic).
-     * - explicitCacheControlHitRate: ratio explicit hits / explicit rows.
-     * - stablePrefixReuseCount: distinct `stable_prefix_hash` values
-     *   that appear in >1 row in-window (i.e. real reuse opportunities).
-     * - avgCacheCreationAfterFirstCall: averaged across stable-prefix
-     *   reuse groups, the cache_creation_tokens on rows AFTER the
-     *   first-by-datetime in each group. Drops sharply when caller
-     *   blocks are reused; stays high when Claude Code's session-wrap
-     *   floor dominates.
-     */
     explicitCacheControlRows: number;
     explicitCacheControlHits: number;
     explicitCacheControlHitRate: number;
@@ -105,38 +54,15 @@ export interface GlobalCacheStats {
 }
 export declare function computeSessionCacheStats(db: FlightRecorderQuery, sessionId: string): SessionCacheStats;
 export interface TtlPolicy {
-    /**
-     * Seconds: how long Anthropic holds a cache entry after the last
-     * write. Default 300 (5 minutes). Set to 3600 when the operator has
-     * opted into Anthropic's 1-hour cache TTL via
-     * `[cache_awareness].anthropic_ttl_seconds = 3600`.
-     */
     anthropicTtlSeconds: 300 | 3600;
-    /** Defaults to `() => Date.now()`. Overridable for deterministic tests. */
     now?: () => number;
 }
-/**
- * Slice 3: compute the best-effort milliseconds remaining on the cache
- * breakpoint established at `stats.lastRequestAt`.
- *
- * - Claude: Anthropic's documented TTL (5min default, 1h beta). Computed
- *   as max(0, ttl - (now - lastWriteAt)).
- * - Other CLIs: returns null. We do not observe the provider's actual
- *   cache state, so any number we'd return would be a guess. session_get
- *   and cache_state resources should report null for these.
- *
- * Note: this is "best effort". A cache eviction inside Anthropic's
- * window will NOT be visible to us — the warning may be optimistic
- * (see risks section in dag.toml).
- */
 export declare function computeTtlRemaining(stats: SessionCacheStats, cli: CacheStatsCli | null, ttlPolicy: TtlPolicy): number | null;
 export declare function computePrefixCacheStats(db: FlightRecorderQuery, stablePrefixHash: string): PrefixCacheStats;
 export interface GlobalCacheStatsOpts {
-    /** If set, restrict to rows whose datetime_utc is within the last N hours. */
     lastNHours?: number;
 }
 export declare function computeGlobalCacheStats(db: FlightRecorderQuery, opts?: GlobalCacheStatsOpts): GlobalCacheStats;
-/** Default response truncation budget, matching llm_job_result's maxChars. */
 export declare const PERSISTED_REQUEST_DEFAULT_MAX_CHARS = 200000;
 export interface PersistedRequestRecord {
     correlationId: string;
@@ -151,35 +77,20 @@ export interface PersistedRequestRecord {
     retryCount: number | null;
     circuitBreakerState: string | null;
     costUsd: number | null;
-    /** NULL for sync requests; the async job UUID for *_request_async rows. */
     asyncJobId: string | null;
     inputTokens: number | null;
     outputTokens: number | null;
     cacheReadTokens: number | null;
     cacheCreationTokens: number | null;
-    /** Full character length of the persisted prompt (always reported). */
     promptChars: number;
-    /** Full character length of the persisted response (pre-truncation). */
     responseChars: number;
-    /** True when `response` was clipped to `maxChars`. */
     responseTruncated: boolean;
-    /** Persisted response text, truncated to maxChars. NULL if the row never completed. */
     response: string | null;
-    /** Only present when includePrompt = true. */
     prompt?: string;
-    /** Parsed thinking blocks (claude), or null. */
     thinkingBlocks: string[] | null;
 }
 export interface ReadPersistedRequestOptions {
-    /** Truncate the returned response to this many characters. Default 200000. */
     maxChars?: number;
-    /** Include the full persisted prompt text in the result. Default false. */
     includePrompt?: boolean;
 }
-/**
- * Fetch a single persisted request by correlation id from the flight recorder.
- * Returns null when no row matches (including a NoopFlightRecorder, which
- * yields no rows — i.e. flight recording disabled). The response is truncated
- * to `maxChars`; the full pre-truncation length is reported via responseChars.
- */
 export declare function readPersistedRequest(db: FlightRecorderQuery, correlationId: string, opts?: ReadPersistedRequestOptions): PersistedRequestRecord | null;

package/dist/cache-stats.js CHANGED Viewed

@@ -1,17 +1,3 @@
-/**
- * Cache observability aggregates.
- *
- * Pure read-only aggregation over the FlightRecorder's `requests` table.
- * No new storage — every value is computed at query time from existing
- * columns (`cache_read_tokens`, `cache_creation_tokens`, `stable_prefix_*`,
- * `datetime_utc`, etc.).
- *
- * COALESCE / NULL handling: rows from before the v3 migration have NULL
- * for stable_prefix_*. Rows from CLIs whose parser does not surface cache
- * tokens (gemini, grok, mistral, and codex until its parser is fixed)
- * have NULL for cache_read_tokens / cache_creation_tokens. All aggregates
- * tolerate NULL via COALESCE(col, 0) — never divides by zero.
- */
 import { estimateCacheSavingsUsd } from "./pricing.js";
 function safeNum(n) {
     return typeof n === "number" && Number.isFinite(n) ? n : 0;
@@ -64,27 +50,9 @@ export function computeSessionCacheStats(db, sessionId) {
         distinctPrefixCount: prefixSet.size,
         lastRequestAt: lastAt,
         estimatedSavingsUsd,
-        // ttlRemainingMs is populated by computeTtlRemaining() — the field
-        // exists on the type so the resource shape is uniform, but its value
-        // is left null here. Callers (session_get / cache_state resources)
-        // apply the configured TTL policy and set the field.
         ttlRemainingMs: null,
     };
 }
-/**
- * Slice 3: compute the best-effort milliseconds remaining on the cache
- * breakpoint established at `stats.lastRequestAt`.
- *
- * - Claude: Anthropic's documented TTL (5min default, 1h beta). Computed
- *   as max(0, ttl - (now - lastWriteAt)).
- * - Other CLIs: returns null. We do not observe the provider's actual
- *   cache state, so any number we'd return would be a guess. session_get
- *   and cache_state resources should report null for these.
- *
- * Note: this is "best effort". A cache eviction inside Anthropic's
- * window will NOT be visible to us — the warning may be optimistic
- * (see risks section in dag.toml).
- */
 export function computeTtlRemaining(stats, cli, ttlPolicy) {
     if (cli !== "claude")
         return null;
@@ -177,16 +145,8 @@ export function computeGlobalCacheStats(db, opts = {}) {
     let totalRead = 0;
     let totalCreation = 0;
     let totalSavings = 0;
-    // Rec #3: κ-explicit metrics. A row is "κ-explicit" iff it has
-    // `cache_control_blocks > 0` — i.e. the gateway emitted at least one
-    // caller-supplied `cache_control` marker. Rows with NULL or 0 are
-    // either pre-v4 or non-κ Claude / non-Claude requests.
     let explicitRows = 0;
     let explicitHits = 0;
-    // Per-prefix reuse tracking: collect cache_creation_tokens for every
-    // row keyed by stable_prefix_hash, ordered ascending by datetime_utc.
-    // For each group with >1 row, drop the first (the cache-write call)
-    // and average the rest (the cache-read calls).
     const perPrefix = new Map();
     for (const row of rows) {
         totalRequests += 1;
@@ -235,8 +195,6 @@ export function computeGlobalCacheStats(db, opts = {}) {
             continue;
         stablePrefixReuseCount += 1;
         arr.sort((a, b) => a.datetime_utc < b.datetime_utc ? -1 : a.datetime_utc > b.datetime_utc ? 1 : 0);
-        // Every row after the first-by-time in this prefix group (the reuse
-        // calls). Iterate the tail directly rather than index-walking `arr`.
         const [, ...afterFirst] = arr;
         for (const entry of afterFirst) {
             creationAfterFirstSum += entry.cache_creation_tokens;
@@ -269,20 +227,6 @@ export function computeGlobalCacheStats(db, opts = {}) {
         avgCacheCreationAfterFirstCall,
     };
 }
-//──────────────────────────────────────────────────────────────────────────────
-// Read-back of a single persisted request by correlation id.
-//
-// The flight recorder already persists every request's `response` column on
-// logComplete (flight-recorder.ts), regardless of sync vs async. But the only
-// MCP read-back surface — llm_job_result — is keyed on an async job id and
-// reads the AsyncJobManager, not the recorder. So a *sync* response (which has
-// async_job_id = NULL and is handed back inline exactly once) has no retrieval
-// path after the fact. This helper closes that gap: given the correlationId
-// that every sync/async response echoes in `structuredContent.correlationId`,
-// it returns the persisted row from the recorder. Pure read-only — uses the
-// same FlightRecorderQuery surface as the cache aggregates above.
-//──────────────────────────────────────────────────────────────────────────────
-/** Default response truncation budget, matching llm_job_result's maxChars. */
 export const PERSISTED_REQUEST_DEFAULT_MAX_CHARS = 200_000;
 function parseThinkingBlocks(raw) {
     if (!raw)
@@ -295,12 +239,6 @@ function parseThinkingBlocks(raw) {
         return null;
     }
 }
-/**
- * Fetch a single persisted request by correlation id from the flight recorder.
- * Returns null when no row matches (including a NoopFlightRecorder, which
- * yields no rows — i.e. flight recording disabled). The response is truncated
- * to `maxChars`; the full pre-truncation length is reported via responseChars.
- */
 export function readPersistedRequest(db, correlationId, opts = {}) {
     const maxChars = opts.maxChars ?? PERSISTED_REQUEST_DEFAULT_MAX_CHARS;
     const rows = db.queryRequests(`SELECT r.id, r.cli, r.model, r.prompt, r.response, r.session_id,

package/dist/claude-mcp-config.js CHANGED Viewed

@@ -113,7 +113,6 @@ function toClaudeServerDef(server) {
     if (server === "ref_tools" && process.env.REF_API_KEY) {
         env.REF_API_KEY = process.env.REF_API_KEY;
     }
-    // sqry should always be usable without env, but exa/ref_tools typically need credentials.
     if ((server === "exa" && !env.EXA_API_KEY) || (server === "ref_tools" && !env.REF_API_KEY)) {
         return null;
     }

package/dist/cli-updater.d.ts CHANGED Viewed

@@ -24,14 +24,6 @@ export interface CliUpgradePlan {
     note?: string;
 }
 export type MistralInstallMethod = "pip" | "uv" | "brew" | "unknown";
-/**
- * Detect how Vibe was installed on this machine. Vibe does not self-update, so
- * cli_upgrade has to dispatch to the package manager that owns the binary.
- *
- * Probe order: pip → uv → brew. The first one that returns a positive signal
- * wins; if none do, callers should surface an actionable error rather than
- * blindly running `vibe update` (a command that does not exist).
- */
 export declare function detectMistralInstallMethod(exec?: (cmd: string, args: string[]) => {
     exitCode: number | null;
     stdout: string;

package/dist/cli-updater.js CHANGED Viewed

@@ -3,14 +3,6 @@ import { executeCli } from "./executor.js";
 import { getProviderRuntimeStatus } from "./provider-status.js";
 const MISTRAL_VIBE_PACKAGE = "mistral-vibe";
 const LEGACY_VIBE_PACKAGE = "vibe-cli";
-/**
- * Detect how Vibe was installed on this machine. Vibe does not self-update, so
- * cli_upgrade has to dispatch to the package manager that owns the binary.
- *
- * Probe order: pip → uv → brew. The first one that returns a positive signal
- * wins; if none do, callers should surface an actionable error rather than
- * blindly running `vibe update` (a command that does not exist).
- */
 export function detectMistralInstallMethod(exec = (cmd, args) => {
     const result = spawnSync(cmd, args, { encoding: "utf8", timeout: 5_000, windowsHide: true });
     return {
@@ -155,10 +147,6 @@ export async function getCliVersions(cli) {
 }
 function buildMistralUpgradePlan(normalizedTarget, detectMistral) {
     const method = detectMistral();
-    // Vibe ships no self-update command. cli_upgrade dispatches to the installer
-    // it detects; if none can be detected the caller gets an actionable error
-    // (we surface it as a no-op plan with `command: ""` so runCliUpgrade can
-    // throw before spawning anything).
     if (method === "pip") {
         const pkg = normalizedTarget === "latest"
             ? MISTRAL_VIBE_PACKAGE

package/dist/codex-json-parser.d.ts CHANGED Viewed

@@ -1,23 +1,3 @@
-/**
- * Parser for Codex CLI `--json` JSONL event stream.
- *
- * Codex emits one JSON object per line, e.g.:
- *   {"type":"thread.started","thread_id":"t-abc"}
- *   {"type":"turn.started","turn_id":"u-001"}
- *   {"type":"item.started","item":{...}}
- *   {"type":"item.completed","item":{"type":"agent_message","text":"..."}}
- *   {"type":"turn.completed","usage":{"input_tokens":...,"output_tokens":...,...}}
- *   {"type":"turn.failed","error":{...}}
- *   {"type":"error","message":"..."}
- *
- * This parser is lenient: malformed lines are skipped, partial streams are
- * tolerated (usage is `undefined` if no turn.completed event arrived), and
- * error events are surfaced.
- *
- * Cost is intentionally NOT computed here — Codex does not price client-side
- * and U23 only plumbs tokens. A future unit can compute cost from the model
- * registry.
- */
 export interface CodexUsage {
     input_tokens: number;
     output_tokens: number;

package/dist/codex-json-parser.js CHANGED Viewed

@@ -1,23 +1,3 @@
-/**
- * Parser for Codex CLI `--json` JSONL event stream.
- *
- * Codex emits one JSON object per line, e.g.:
- *   {"type":"thread.started","thread_id":"t-abc"}
- *   {"type":"turn.started","turn_id":"u-001"}
- *   {"type":"item.started","item":{...}}
- *   {"type":"item.completed","item":{"type":"agent_message","text":"..."}}
- *   {"type":"turn.completed","usage":{"input_tokens":...,"output_tokens":...,...}}
- *   {"type":"turn.failed","error":{...}}
- *   {"type":"error","message":"..."}
- *
- * This parser is lenient: malformed lines are skipped, partial streams are
- * tolerated (usage is `undefined` if no turn.completed event arrived), and
- * error events are surfaced.
- *
- * Cost is intentionally NOT computed here — Codex does not price client-side
- * and U23 only plumbs tokens. A future unit can compute cost from the model
- * registry.
- */
 export function parseCodexJsonStream(stdout) {
     const lines = stdout.split("\n").filter(line => line.trim().length > 0);
     const result = {};
@@ -28,7 +8,6 @@ export function parseCodexJsonStream(stdout) {
             parsed = JSON.parse(line);
         }
         catch {
-            // Skip preamble/garbage lines that aren't valid JSON.
             continue;
         }
         if (!parsed || typeof parsed !== "object") {

package/dist/config.d.ts CHANGED Viewed

@@ -13,11 +13,6 @@ export interface Config {
     database?: DatabaseConfig;
     sessionTtl: number;
 }
-/**
- * Load configuration from environment variables.
- * Always returns a Config object with base fields.
- * Database fields are populated when DATABASE_URL is set.
- */
 export declare function loadConfig(): Config;
 export declare const PERSISTENCE_BACKENDS: readonly ["sqlite", "postgres", "memory", "none"];
 export type PersistenceBackend = (typeof PERSISTENCE_BACKENDS)[number];
@@ -30,32 +25,16 @@ export interface PersistenceConfig {
     retentionDays: number;
     dedupWindowMs: number;
     acknowledgeEphemeral: boolean;
-    /** True iff async-job tools should be registered on the MCP server. */
     asyncJobsEnabled: boolean;
-    /** Audit trail: which inputs (file, env vars) contributed to the resolved config. */
     sources: PersistenceConfigSources;
 }
 export interface PersistenceConfigSources {
     configFile: string | null;
     envOverrides: string[];
 }
-/**
- * Load and validate the persistence config from (in order, last-write-wins):
- *   1. Built-in defaults (backend=sqlite, default retention/dedup).
- *   2. ~/.llm-cli-gateway/config.toml (or $LLM_GATEWAY_CONFIG).
- *   3. Legacy env vars (with deprecation warning).
- *
- * Throws on incoherent configs (memory/none + asyncJobsEnabled without ack).
- */
 export declare function loadPersistenceConfig(logger?: Logger): PersistenceConfig;
 export declare const ANTHROPIC_TTL_SECONDS_VALUES: readonly [300, 3600];
 export type AnthropicTtlSeconds = (typeof ANTHROPIC_TTL_SECONDS_VALUES)[number];
-/**
- * Per-Anthropic-model-family minimum cacheable tokens. Sourced from
- * docs/personal-mcp/PROVIDER_CACHE_SURFACES.md (Anthropic API docs as of
- * 2026-05-26). Models below the threshold cannot be cached even with
- * cache_control set — Anthropic silently returns un-cached.
- */
 export declare const DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL: {
     readonly sonnet: 1024;
     readonly opus: 4096;
@@ -73,19 +52,9 @@ export interface CacheAwarenessConfig {
         haiku: number;
         default: number;
     };
-    /** Audit trail: file the config was loaded from (or null if defaults). */
     sources: {
         configFile: string | null;
     };
 }
-/**
- * Load [cache_awareness] from ~/.llm-cli-gateway/config.toml. Defaults: all
- * behaviour off, per-model min-token thresholds from PROVIDER_CACHE_SURFACES.md.
- */
 export declare function loadCacheAwarenessConfig(logger?: Logger): CacheAwarenessConfig;
-/**
- * Look up the per-model-family threshold. `modelName` is the user-facing model
- * string (e.g. "claude-sonnet-4-6", "claude-opus-4-7"). Falls back to `default`
- * when the family is unrecognised.
- */
 export declare function minStableTokensForModel(config: CacheAwarenessConfig, modelName: string): number;

package/dist/config.js CHANGED Viewed

@@ -4,30 +4,22 @@ import path from "path";
 import { createRequire } from "module";
 import { z } from "zod/v3";
 import { logWarn, noopLogger } from "./logger.js";
-// Zod schemas for configuration validation
 const DatabaseUrlSchema = z
     .string()
     .url()
     .refine(url => url.startsWith("postgresql://") || url.startsWith("postgres://"), {
     message: "Database URL must start with postgresql:// or postgres://",
 });
-export const DEFAULT_SESSION_TTL_SECONDS = 2592000; // 30 days
-/**
- * Load configuration from environment variables.
- * Always returns a Config object with base fields.
- * Database fields are populated when DATABASE_URL is set.
- */
+export const DEFAULT_SESSION_TTL_SECONDS = 2592000;
 export function loadConfig() {
     const databaseUrl = process.env.DATABASE_URL;
     const rawSessionTtl = parseInt(process.env.SESSION_TTL || String(DEFAULT_SESSION_TTL_SECONDS), 10);
     const sessionTtl = Number.isFinite(rawSessionTtl) && rawSessionTtl > 0
         ? rawSessionTtl
         : DEFAULT_SESSION_TTL_SECONDS;
-    // If no database config, return base config (file-based storage)
     if (!databaseUrl) {
         return { sessionTtl };
     }
-    // Validate URL
     try {
         DatabaseUrlSchema.parse(databaseUrl);
     }
@@ -47,25 +39,9 @@ export function loadConfig() {
         sessionTtl,
     };
 }
-//──────────────────────────────────────────────────────────────────────────────
-// Persistence configuration
-//
-// The async job store is now driven by a typed config (TOML file +
-// validated env-var overrides) instead of a single LLM_GATEWAY_LOGS_DB env
-// var. The structural invariant: `*_request_async` tools are only registered
-// when a real durable store is attached, so silent in-memory loss after the
-// 1h TTL becomes impossible.
-//
-// Backends:
-//   - "sqlite":   durable on disk (default).
-//   - "postgres": durable in Postgres (interface only — impl not yet shipped).
-//   - "memory":   in-process MemoryJobStore. Process-lifetime durability only.
-//                 Requires acknowledgeEphemeral=true to register async tools.
-//   - "none":     no store. Async tools are NOT registered.
-//──────────────────────────────────────────────────────────────────────────────
 export const PERSISTENCE_BACKENDS = ["sqlite", "postgres", "memory", "none"];
 export const DEFAULT_JOB_RETENTION_DAYS = 30;
-export const DEFAULT_DEDUP_WINDOW_MS = 60 * 60 * 1000; // 1 hour
+export const DEFAULT_DEDUP_WINDOW_MS = 60 * 60 * 1000;
 const PersistenceSchema = z
     .object({
     backend: z.enum(PERSISTENCE_BACKENDS).default("sqlite"),
@@ -80,10 +56,6 @@ const DEFAULT_SQLITE_PATH = path.join(os.homedir(), ".llm-cli-gateway", "logs.db
 function defaultPersistenceConfigPath() {
     return (process.env.LLM_GATEWAY_CONFIG ?? path.join(os.homedir(), ".llm-cli-gateway", "config.toml"));
 }
-/**
- * Read and parse the optional TOML config file. Returns the raw `[persistence]`
- * table (if present) and the file path. Missing file is fine — defaults apply.
- */
 function readPersistenceFile(configPath, logger) {
     if (!existsSync(configPath)) {
         return { raw: undefined, sourcePath: null };
@@ -100,18 +72,10 @@ function readPersistenceFile(configPath, logger) {
         return { raw: undefined, sourcePath: null };
     }
 }
-/**
- * Apply legacy env-var overrides on top of the file/defaults. Each application
- * appends a string to `sources.envOverrides` and emits a one-time deprecation
- * warning so operators can migrate to the config file.
- */
 function applyEnvOverrides(base, logger, sources) {
     const out = { ...base };
     const jobsDbEnv = process.env.LLM_GATEWAY_JOBS_DB;
     const logsDbEnv = process.env.LLM_GATEWAY_LOGS_DB;
-    // Empty string is treated as "not set" — only an explicitly non-empty value
-    // (or the literal "none") overrides the file/defaults. This avoids the
-    // old footgun where `LLM_GATEWAY_LOGS_DB=` silently disabled persistence.
     const dbEnvRaw = jobsDbEnv && jobsDbEnv.length > 0
         ? jobsDbEnv
         : logsDbEnv && logsDbEnv.length > 0
@@ -160,14 +124,6 @@ function applyEnvOverrides(base, logger, sources) {
 function expandHome(p) {
     return p.startsWith("~/") ? path.join(os.homedir(), p.slice(2)) : p;
 }
-/**
- * Load and validate the persistence config from (in order, last-write-wins):
- *   1. Built-in defaults (backend=sqlite, default retention/dedup).
- *   2. ~/.llm-cli-gateway/config.toml (or $LLM_GATEWAY_CONFIG).
- *   3. Legacy env vars (with deprecation warning).
- *
- * Throws on incoherent configs (memory/none + asyncJobsEnabled without ack).
- */
 export function loadPersistenceConfig(logger = noopLogger) {
     const configPath = defaultPersistenceConfigPath();
     const { raw, sourcePath } = readPersistenceFile(configPath, logger);
@@ -209,24 +165,7 @@ export function loadPersistenceConfig(logger = noopLogger) {
         sources,
     };
 }
-//──────────────────────────────────────────────────────────────────────────────
-// Cache-awareness configuration
-//
-// Reads the [cache_awareness] block from the same ~/.llm-cli-gateway/config.toml
-// file as [persistence], but uses a SEPARATE loader and schema. Keeping the two
-// independent means a malformed [cache_awareness] never breaks persistence
-// loading and vice versa. No env-var overrides — purely TOML.
-//
-// All defaults are "off"; behavioural changes (slice 1 cache_control, slice 3
-// TTL warnings) ship dormant until operators opt in.
-//──────────────────────────────────────────────────────────────────────────────
 export const ANTHROPIC_TTL_SECONDS_VALUES = [300, 3600];
-/**
- * Per-Anthropic-model-family minimum cacheable tokens. Sourced from
- * docs/personal-mcp/PROVIDER_CACHE_SURFACES.md (Anthropic API docs as of
- * 2026-05-26). Models below the threshold cannot be cached even with
- * cache_control set — Anthropic silently returns un-cached.
- */
 export const DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL = {
     sonnet: 1024,
     opus: 4096,
@@ -275,10 +214,6 @@ function readCacheAwarenessFile(configPath, logger) {
         return { raw: undefined, sourcePath: null };
     }
 }
-/**
- * Load [cache_awareness] from ~/.llm-cli-gateway/config.toml. Defaults: all
- * behaviour off, per-model min-token thresholds from PROVIDER_CACHE_SURFACES.md.
- */
 export function loadCacheAwarenessConfig(logger = noopLogger) {
     const configPath = defaultPersistenceConfigPath();
     const { raw, sourcePath } = readCacheAwarenessFile(configPath, logger);
@@ -302,11 +237,6 @@ export function loadCacheAwarenessConfig(logger = noopLogger) {
         sources: { configFile: sourcePath },
     };
 }
-/**
- * Look up the per-model-family threshold. `modelName` is the user-facing model
- * string (e.g. "claude-sonnet-4-6", "claude-opus-4-7"). Falls back to `default`
- * when the family is unrecognised.
- */
 export function minStableTokensForModel(config, modelName) {
     const lower = modelName.toLowerCase();
     const table = config.minStableTokensForCacheControl;

package/dist/db.d.ts CHANGED Viewed

@@ -7,32 +7,14 @@ export interface HealthCheckResult {
         latency: number;
     };
 }
-/**
- * Database connection manager for PostgreSQL-backed sessions.
- */
 export declare class DatabaseConnection {
     private logger;
     private pool;
     private config;
     constructor(config: Config, logger?: Logger);
-    /**
-     * Initialize connection to PostgreSQL.
-     */
     connect(): Promise<void>;
-    /**
-     * Graceful shutdown - close all connections
-     */
     disconnect(): Promise<void>;
-    /**
-     * Health check for PostgreSQL.
-     */
     healthCheck(): Promise<HealthCheckResult>;
-    /**
-     * Get PostgreSQL pool
-     */
     getPool(): Pool;
 }
-/**
- * Factory function to create and connect DatabaseConnection
- */
 export declare function createDatabaseConnection(config: Config, logger?: Logger): Promise<DatabaseConnection>;