llm-cli-gateway 1.17.4 → 1.17.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +1 -1
  3. package/dist/approval-manager.js +0 -8
  4. package/dist/async-job-manager.d.ts +0 -113
  5. package/dist/async-job-manager.js +6 -124
  6. package/dist/cache-stats.d.ts +0 -89
  7. package/dist/cache-stats.js +0 -62
  8. package/dist/claude-mcp-config.js +0 -1
  9. package/dist/cli-updater.d.ts +0 -8
  10. package/dist/cli-updater.js +0 -12
  11. package/dist/codex-json-parser.d.ts +0 -20
  12. package/dist/codex-json-parser.js +0 -21
  13. package/dist/config.d.ts +0 -31
  14. package/dist/config.js +2 -72
  15. package/dist/db.d.ts +0 -18
  16. package/dist/db.js +0 -22
  17. package/dist/doctor.d.ts +0 -49
  18. package/dist/doctor.js +0 -47
  19. package/dist/endpoint-exposure.js +0 -1
  20. package/dist/executor.d.ts +0 -19
  21. package/dist/executor.js +3 -38
  22. package/dist/flight-recorder.d.ts +0 -26
  23. package/dist/flight-recorder.js +1 -70
  24. package/dist/gemini-json-parser.d.ts +0 -25
  25. package/dist/gemini-json-parser.js +0 -28
  26. package/dist/health.d.ts +0 -3
  27. package/dist/health.js +0 -3
  28. package/dist/index.d.ts +1 -221
  29. package/dist/index.js +14 -563
  30. package/dist/job-store.d.ts +0 -74
  31. package/dist/job-store.js +1 -73
  32. package/dist/logger.d.ts +0 -7
  33. package/dist/logger.js +0 -6
  34. package/dist/migrate-sessions.d.ts +0 -3
  35. package/dist/migrate-sessions.js +0 -16
  36. package/dist/migrate.js +1 -18
  37. package/dist/mistral-meta-json-parser.js +0 -67
  38. package/dist/model-registry.js +0 -13
  39. package/dist/pricing.d.ts +0 -46
  40. package/dist/pricing.js +0 -47
  41. package/dist/process-monitor.d.ts +0 -15
  42. package/dist/process-monitor.js +2 -31
  43. package/dist/prompt-parts.d.ts +0 -25
  44. package/dist/prompt-parts.js +0 -11
  45. package/dist/provider-status.d.ts +0 -8
  46. package/dist/provider-status.js +0 -11
  47. package/dist/request-helpers.d.ts +0 -334
  48. package/dist/request-helpers.js +1 -229
  49. package/dist/resources.d.ts +0 -20
  50. package/dist/resources.js +1 -34
  51. package/dist/retry.d.ts +0 -45
  52. package/dist/retry.js +3 -40
  53. package/dist/session-manager-pg.d.ts +0 -32
  54. package/dist/session-manager-pg.js +0 -32
  55. package/dist/session-manager.d.ts +0 -21
  56. package/dist/session-manager.js +1 -15
  57. package/dist/stream-json-parser.d.ts +0 -18
  58. package/dist/stream-json-parser.js +0 -22
  59. package/dist/upstream-contracts.d.ts +0 -55
  60. package/dist/upstream-contracts.js +0 -77
  61. package/dist/validation-orchestrator.js +0 -3
  62. package/dist/worktree-manager.d.ts +0 -9
  63. package/dist/worktree-manager.js +0 -21
  64. package/package.json +1 -1
@@ -1,44 +1,16 @@
1
- /**
2
- * Cache observability aggregates.
3
- *
4
- * Pure read-only aggregation over the FlightRecorder's `requests` table.
5
- * No new storage — every value is computed at query time from existing
6
- * columns (`cache_read_tokens`, `cache_creation_tokens`, `stable_prefix_*`,
7
- * `datetime_utc`, etc.).
8
- *
9
- * COALESCE / NULL handling: rows from before the v3 migration have NULL
10
- * for stable_prefix_*. Rows from CLIs whose parser does not surface cache
11
- * tokens (gemini, grok, mistral, and codex until its parser is fixed)
12
- * have NULL for cache_read_tokens / cache_creation_tokens. All aggregates
13
- * tolerate NULL via COALESCE(col, 0) — never divides by zero.
14
- */
15
1
  import type { FlightRecorderQuery } from "./flight-recorder.js";
16
2
  export type CacheStatsCli = "claude" | "codex" | "gemini" | "grok" | "mistral";
17
3
  export interface SessionCacheStats {
18
4
  sessionId: string;
19
5
  cli: CacheStatsCli | null;
20
- /** Total cache_read_tokens across all rows in this session. */
21
6
  totalCacheReadTokens: number;
22
- /** Total cache_creation_tokens across all rows in this session. */
23
7
  totalCacheCreationTokens: number;
24
- /** Number of rows in this session. */
25
8
  requestCount: number;
26
- /** Number of rows where cache_read_tokens > 0. */
27
9
  hitCount: number;
28
- /** hitCount / requestCount (0 when requestCount = 0). */
29
10
  hitRate: number;
30
- /** Distinct stable_prefix_hash values seen in this session. */
31
11
  distinctPrefixCount: number;
32
- /** Last time any row in this session was written (datetime_utc max). ISO string or null. */
33
12
  lastRequestAt: string | null;
34
- /** Estimated USD saved by cache reads in this session (best-effort). */
35
13
  estimatedSavingsUsd: number;
36
- /**
37
- * Slice 3: best-effort remaining TTL on the Anthropic cache breakpoint
38
- * established at lastRequestAt. Null for non-claude CLIs (we have no
39
- * read on their cache state) and null when lastRequestAt is null.
40
- * Computed by computeTtlRemaining(); see ttlPolicy parameter.
41
- */
42
14
  ttlRemainingMs: number | null;
43
15
  }
44
16
  export interface PrefixCacheStats {
@@ -48,7 +20,6 @@ export interface PrefixCacheStats {
48
20
  hitRate: number;
49
21
  totalCacheReadTokens: number;
50
22
  totalCacheCreationTokens: number;
51
- /** Distinct CLI x model combos that hashed to this prefix. */
52
23
  cliBreakdown: Array<{
53
24
  cli: CacheStatsCli;
54
25
  model: string;
@@ -59,7 +30,6 @@ export interface PrefixCacheStats {
59
30
  estimatedSavingsUsd: number;
60
31
  }
61
32
  export interface GlobalCacheStats {
62
- /** Optional window: rows since (now - lastNHours * 3600s). */
63
33
  windowHours: number | null;
64
34
  totalRequests: number;
65
35
  totalHits: number;
@@ -76,27 +46,6 @@ export interface GlobalCacheStats {
76
46
  estimatedSavingsUsd: number;
77
47
  }>;
78
48
  estimatedSavingsUsd: number;
79
- /**
80
- * Rec #3 (slice κ): derived metrics that distinguish gateway-driven
81
- * κ-explicit `cache_control` breakpoints from Claude Code's
82
- * own baseline cache reads.
83
- *
84
- * - explicitCacheControlRows: rows where the gateway emitted at
85
- * least one `cache_control` marker (`cache_control_blocks > 0`).
86
- * - explicitCacheControlHits: those rows whose `cache_read_tokens
87
- * > 0` — closest signal we have to "the caller's marked block
88
- * actually hit Anthropic's cache" (still includes Claude Code's
89
- * baseline cache reads on top, which is unavoidable without
90
- * per-block token accounting from Anthropic).
91
- * - explicitCacheControlHitRate: ratio explicit hits / explicit rows.
92
- * - stablePrefixReuseCount: distinct `stable_prefix_hash` values
93
- * that appear in >1 row in-window (i.e. real reuse opportunities).
94
- * - avgCacheCreationAfterFirstCall: averaged across stable-prefix
95
- * reuse groups, the cache_creation_tokens on rows AFTER the
96
- * first-by-datetime in each group. Drops sharply when caller
97
- * blocks are reused; stays high when Claude Code's session-wrap
98
- * floor dominates.
99
- */
100
49
  explicitCacheControlRows: number;
101
50
  explicitCacheControlHits: number;
102
51
  explicitCacheControlHitRate: number;
@@ -105,38 +54,15 @@ export interface GlobalCacheStats {
105
54
  }
106
55
  export declare function computeSessionCacheStats(db: FlightRecorderQuery, sessionId: string): SessionCacheStats;
107
56
  export interface TtlPolicy {
108
- /**
109
- * Seconds: how long Anthropic holds a cache entry after the last
110
- * write. Default 300 (5 minutes). Set to 3600 when the operator has
111
- * opted into Anthropic's 1-hour cache TTL via
112
- * `[cache_awareness].anthropic_ttl_seconds = 3600`.
113
- */
114
57
  anthropicTtlSeconds: 300 | 3600;
115
- /** Defaults to `() => Date.now()`. Overridable for deterministic tests. */
116
58
  now?: () => number;
117
59
  }
118
- /**
119
- * Slice 3: compute the best-effort milliseconds remaining on the cache
120
- * breakpoint established at `stats.lastRequestAt`.
121
- *
122
- * - Claude: Anthropic's documented TTL (5min default, 1h beta). Computed
123
- * as max(0, ttl - (now - lastWriteAt)).
124
- * - Other CLIs: returns null. We do not observe the provider's actual
125
- * cache state, so any number we'd return would be a guess. session_get
126
- * and cache_state resources should report null for these.
127
- *
128
- * Note: this is "best effort". A cache eviction inside Anthropic's
129
- * window will NOT be visible to us — the warning may be optimistic
130
- * (see risks section in dag.toml).
131
- */
132
60
  export declare function computeTtlRemaining(stats: SessionCacheStats, cli: CacheStatsCli | null, ttlPolicy: TtlPolicy): number | null;
133
61
  export declare function computePrefixCacheStats(db: FlightRecorderQuery, stablePrefixHash: string): PrefixCacheStats;
134
62
  export interface GlobalCacheStatsOpts {
135
- /** If set, restrict to rows whose datetime_utc is within the last N hours. */
136
63
  lastNHours?: number;
137
64
  }
138
65
  export declare function computeGlobalCacheStats(db: FlightRecorderQuery, opts?: GlobalCacheStatsOpts): GlobalCacheStats;
139
- /** Default response truncation budget, matching llm_job_result's maxChars. */
140
66
  export declare const PERSISTED_REQUEST_DEFAULT_MAX_CHARS = 200000;
141
67
  export interface PersistedRequestRecord {
142
68
  correlationId: string;
@@ -151,35 +77,20 @@ export interface PersistedRequestRecord {
151
77
  retryCount: number | null;
152
78
  circuitBreakerState: string | null;
153
79
  costUsd: number | null;
154
- /** NULL for sync requests; the async job UUID for *_request_async rows. */
155
80
  asyncJobId: string | null;
156
81
  inputTokens: number | null;
157
82
  outputTokens: number | null;
158
83
  cacheReadTokens: number | null;
159
84
  cacheCreationTokens: number | null;
160
- /** Full character length of the persisted prompt (always reported). */
161
85
  promptChars: number;
162
- /** Full character length of the persisted response (pre-truncation). */
163
86
  responseChars: number;
164
- /** True when `response` was clipped to `maxChars`. */
165
87
  responseTruncated: boolean;
166
- /** Persisted response text, truncated to maxChars. NULL if the row never completed. */
167
88
  response: string | null;
168
- /** Only present when includePrompt = true. */
169
89
  prompt?: string;
170
- /** Parsed thinking blocks (claude), or null. */
171
90
  thinkingBlocks: string[] | null;
172
91
  }
173
92
  export interface ReadPersistedRequestOptions {
174
- /** Truncate the returned response to this many characters. Default 200000. */
175
93
  maxChars?: number;
176
- /** Include the full persisted prompt text in the result. Default false. */
177
94
  includePrompt?: boolean;
178
95
  }
179
- /**
180
- * Fetch a single persisted request by correlation id from the flight recorder.
181
- * Returns null when no row matches (including a NoopFlightRecorder, which
182
- * yields no rows — i.e. flight recording disabled). The response is truncated
183
- * to `maxChars`; the full pre-truncation length is reported via responseChars.
184
- */
185
96
  export declare function readPersistedRequest(db: FlightRecorderQuery, correlationId: string, opts?: ReadPersistedRequestOptions): PersistedRequestRecord | null;
@@ -1,17 +1,3 @@
1
- /**
2
- * Cache observability aggregates.
3
- *
4
- * Pure read-only aggregation over the FlightRecorder's `requests` table.
5
- * No new storage — every value is computed at query time from existing
6
- * columns (`cache_read_tokens`, `cache_creation_tokens`, `stable_prefix_*`,
7
- * `datetime_utc`, etc.).
8
- *
9
- * COALESCE / NULL handling: rows from before the v3 migration have NULL
10
- * for stable_prefix_*. Rows from CLIs whose parser does not surface cache
11
- * tokens (gemini, grok, mistral, and codex until its parser is fixed)
12
- * have NULL for cache_read_tokens / cache_creation_tokens. All aggregates
13
- * tolerate NULL via COALESCE(col, 0) — never divides by zero.
14
- */
15
1
  import { estimateCacheSavingsUsd } from "./pricing.js";
16
2
  function safeNum(n) {
17
3
  return typeof n === "number" && Number.isFinite(n) ? n : 0;
@@ -64,27 +50,9 @@ export function computeSessionCacheStats(db, sessionId) {
64
50
  distinctPrefixCount: prefixSet.size,
65
51
  lastRequestAt: lastAt,
66
52
  estimatedSavingsUsd,
67
- // ttlRemainingMs is populated by computeTtlRemaining() — the field
68
- // exists on the type so the resource shape is uniform, but its value
69
- // is left null here. Callers (session_get / cache_state resources)
70
- // apply the configured TTL policy and set the field.
71
53
  ttlRemainingMs: null,
72
54
  };
73
55
  }
74
- /**
75
- * Slice 3: compute the best-effort milliseconds remaining on the cache
76
- * breakpoint established at `stats.lastRequestAt`.
77
- *
78
- * - Claude: Anthropic's documented TTL (5min default, 1h beta). Computed
79
- * as max(0, ttl - (now - lastWriteAt)).
80
- * - Other CLIs: returns null. We do not observe the provider's actual
81
- * cache state, so any number we'd return would be a guess. session_get
82
- * and cache_state resources should report null for these.
83
- *
84
- * Note: this is "best effort". A cache eviction inside Anthropic's
85
- * window will NOT be visible to us — the warning may be optimistic
86
- * (see risks section in dag.toml).
87
- */
88
56
  export function computeTtlRemaining(stats, cli, ttlPolicy) {
89
57
  if (cli !== "claude")
90
58
  return null;
@@ -177,16 +145,8 @@ export function computeGlobalCacheStats(db, opts = {}) {
177
145
  let totalRead = 0;
178
146
  let totalCreation = 0;
179
147
  let totalSavings = 0;
180
- // Rec #3: κ-explicit metrics. A row is "κ-explicit" iff it has
181
- // `cache_control_blocks > 0` — i.e. the gateway emitted at least one
182
- // caller-supplied `cache_control` marker. Rows with NULL or 0 are
183
- // either pre-v4 or non-κ Claude / non-Claude requests.
184
148
  let explicitRows = 0;
185
149
  let explicitHits = 0;
186
- // Per-prefix reuse tracking: collect cache_creation_tokens for every
187
- // row keyed by stable_prefix_hash, ordered ascending by datetime_utc.
188
- // For each group with >1 row, drop the first (the cache-write call)
189
- // and average the rest (the cache-read calls).
190
150
  const perPrefix = new Map();
191
151
  for (const row of rows) {
192
152
  totalRequests += 1;
@@ -235,8 +195,6 @@ export function computeGlobalCacheStats(db, opts = {}) {
235
195
  continue;
236
196
  stablePrefixReuseCount += 1;
237
197
  arr.sort((a, b) => a.datetime_utc < b.datetime_utc ? -1 : a.datetime_utc > b.datetime_utc ? 1 : 0);
238
- // Every row after the first-by-time in this prefix group (the reuse
239
- // calls). Iterate the tail directly rather than index-walking `arr`.
240
198
  const [, ...afterFirst] = arr;
241
199
  for (const entry of afterFirst) {
242
200
  creationAfterFirstSum += entry.cache_creation_tokens;
@@ -269,20 +227,6 @@ export function computeGlobalCacheStats(db, opts = {}) {
269
227
  avgCacheCreationAfterFirstCall,
270
228
  };
271
229
  }
272
- //──────────────────────────────────────────────────────────────────────────────
273
- // Read-back of a single persisted request by correlation id.
274
- //
275
- // The flight recorder already persists every request's `response` column on
276
- // logComplete (flight-recorder.ts), regardless of sync vs async. But the only
277
- // MCP read-back surface — llm_job_result — is keyed on an async job id and
278
- // reads the AsyncJobManager, not the recorder. So a *sync* response (which has
279
- // async_job_id = NULL and is handed back inline exactly once) has no retrieval
280
- // path after the fact. This helper closes that gap: given the correlationId
281
- // that every sync/async response echoes in `structuredContent.correlationId`,
282
- // it returns the persisted row from the recorder. Pure read-only — uses the
283
- // same FlightRecorderQuery surface as the cache aggregates above.
284
- //──────────────────────────────────────────────────────────────────────────────
285
- /** Default response truncation budget, matching llm_job_result's maxChars. */
286
230
  export const PERSISTED_REQUEST_DEFAULT_MAX_CHARS = 200_000;
287
231
  function parseThinkingBlocks(raw) {
288
232
  if (!raw)
@@ -295,12 +239,6 @@ function parseThinkingBlocks(raw) {
295
239
  return null;
296
240
  }
297
241
  }
298
- /**
299
- * Fetch a single persisted request by correlation id from the flight recorder.
300
- * Returns null when no row matches (including a NoopFlightRecorder, which
301
- * yields no rows — i.e. flight recording disabled). The response is truncated
302
- * to `maxChars`; the full pre-truncation length is reported via responseChars.
303
- */
304
242
  export function readPersistedRequest(db, correlationId, opts = {}) {
305
243
  const maxChars = opts.maxChars ?? PERSISTED_REQUEST_DEFAULT_MAX_CHARS;
306
244
  const rows = db.queryRequests(`SELECT r.id, r.cli, r.model, r.prompt, r.response, r.session_id,
@@ -113,7 +113,6 @@ function toClaudeServerDef(server) {
113
113
  if (server === "ref_tools" && process.env.REF_API_KEY) {
114
114
  env.REF_API_KEY = process.env.REF_API_KEY;
115
115
  }
116
- // sqry should always be usable without env, but exa/ref_tools typically need credentials.
117
116
  if ((server === "exa" && !env.EXA_API_KEY) || (server === "ref_tools" && !env.REF_API_KEY)) {
118
117
  return null;
119
118
  }
@@ -24,14 +24,6 @@ export interface CliUpgradePlan {
24
24
  note?: string;
25
25
  }
26
26
  export type MistralInstallMethod = "pip" | "uv" | "brew" | "unknown";
27
- /**
28
- * Detect how Vibe was installed on this machine. Vibe does not self-update, so
29
- * cli_upgrade has to dispatch to the package manager that owns the binary.
30
- *
31
- * Probe order: pip → uv → brew. The first one that returns a positive signal
32
- * wins; if none do, callers should surface an actionable error rather than
33
- * blindly running `vibe update` (a command that does not exist).
34
- */
35
27
  export declare function detectMistralInstallMethod(exec?: (cmd: string, args: string[]) => {
36
28
  exitCode: number | null;
37
29
  stdout: string;
@@ -3,14 +3,6 @@ import { executeCli } from "./executor.js";
3
3
  import { getProviderRuntimeStatus } from "./provider-status.js";
4
4
  const MISTRAL_VIBE_PACKAGE = "mistral-vibe";
5
5
  const LEGACY_VIBE_PACKAGE = "vibe-cli";
6
- /**
7
- * Detect how Vibe was installed on this machine. Vibe does not self-update, so
8
- * cli_upgrade has to dispatch to the package manager that owns the binary.
9
- *
10
- * Probe order: pip → uv → brew. The first one that returns a positive signal
11
- * wins; if none do, callers should surface an actionable error rather than
12
- * blindly running `vibe update` (a command that does not exist).
13
- */
14
6
  export function detectMistralInstallMethod(exec = (cmd, args) => {
15
7
  const result = spawnSync(cmd, args, { encoding: "utf8", timeout: 5_000, windowsHide: true });
16
8
  return {
@@ -155,10 +147,6 @@ export async function getCliVersions(cli) {
155
147
  }
156
148
  function buildMistralUpgradePlan(normalizedTarget, detectMistral) {
157
149
  const method = detectMistral();
158
- // Vibe ships no self-update command. cli_upgrade dispatches to the installer
159
- // it detects; if none can be detected the caller gets an actionable error
160
- // (we surface it as a no-op plan with `command: ""` so runCliUpgrade can
161
- // throw before spawning anything).
162
150
  if (method === "pip") {
163
151
  const pkg = normalizedTarget === "latest"
164
152
  ? MISTRAL_VIBE_PACKAGE
@@ -1,23 +1,3 @@
1
- /**
2
- * Parser for Codex CLI `--json` JSONL event stream.
3
- *
4
- * Codex emits one JSON object per line, e.g.:
5
- * {"type":"thread.started","thread_id":"t-abc"}
6
- * {"type":"turn.started","turn_id":"u-001"}
7
- * {"type":"item.started","item":{...}}
8
- * {"type":"item.completed","item":{"type":"agent_message","text":"..."}}
9
- * {"type":"turn.completed","usage":{"input_tokens":...,"output_tokens":...,...}}
10
- * {"type":"turn.failed","error":{...}}
11
- * {"type":"error","message":"..."}
12
- *
13
- * This parser is lenient: malformed lines are skipped, partial streams are
14
- * tolerated (usage is `undefined` if no turn.completed event arrived), and
15
- * error events are surfaced.
16
- *
17
- * Cost is intentionally NOT computed here — Codex does not price client-side
18
- * and U23 only plumbs tokens. A future unit can compute cost from the model
19
- * registry.
20
- */
21
1
  export interface CodexUsage {
22
2
  input_tokens: number;
23
3
  output_tokens: number;
@@ -1,23 +1,3 @@
1
- /**
2
- * Parser for Codex CLI `--json` JSONL event stream.
3
- *
4
- * Codex emits one JSON object per line, e.g.:
5
- * {"type":"thread.started","thread_id":"t-abc"}
6
- * {"type":"turn.started","turn_id":"u-001"}
7
- * {"type":"item.started","item":{...}}
8
- * {"type":"item.completed","item":{"type":"agent_message","text":"..."}}
9
- * {"type":"turn.completed","usage":{"input_tokens":...,"output_tokens":...,...}}
10
- * {"type":"turn.failed","error":{...}}
11
- * {"type":"error","message":"..."}
12
- *
13
- * This parser is lenient: malformed lines are skipped, partial streams are
14
- * tolerated (usage is `undefined` if no turn.completed event arrived), and
15
- * error events are surfaced.
16
- *
17
- * Cost is intentionally NOT computed here — Codex does not price client-side
18
- * and U23 only plumbs tokens. A future unit can compute cost from the model
19
- * registry.
20
- */
21
1
  export function parseCodexJsonStream(stdout) {
22
2
  const lines = stdout.split("\n").filter(line => line.trim().length > 0);
23
3
  const result = {};
@@ -28,7 +8,6 @@ export function parseCodexJsonStream(stdout) {
28
8
  parsed = JSON.parse(line);
29
9
  }
30
10
  catch {
31
- // Skip preamble/garbage lines that aren't valid JSON.
32
11
  continue;
33
12
  }
34
13
  if (!parsed || typeof parsed !== "object") {
package/dist/config.d.ts CHANGED
@@ -13,11 +13,6 @@ export interface Config {
13
13
  database?: DatabaseConfig;
14
14
  sessionTtl: number;
15
15
  }
16
- /**
17
- * Load configuration from environment variables.
18
- * Always returns a Config object with base fields.
19
- * Database fields are populated when DATABASE_URL is set.
20
- */
21
16
  export declare function loadConfig(): Config;
22
17
  export declare const PERSISTENCE_BACKENDS: readonly ["sqlite", "postgres", "memory", "none"];
23
18
  export type PersistenceBackend = (typeof PERSISTENCE_BACKENDS)[number];
@@ -30,32 +25,16 @@ export interface PersistenceConfig {
30
25
  retentionDays: number;
31
26
  dedupWindowMs: number;
32
27
  acknowledgeEphemeral: boolean;
33
- /** True iff async-job tools should be registered on the MCP server. */
34
28
  asyncJobsEnabled: boolean;
35
- /** Audit trail: which inputs (file, env vars) contributed to the resolved config. */
36
29
  sources: PersistenceConfigSources;
37
30
  }
38
31
  export interface PersistenceConfigSources {
39
32
  configFile: string | null;
40
33
  envOverrides: string[];
41
34
  }
42
- /**
43
- * Load and validate the persistence config from (in order, last-write-wins):
44
- * 1. Built-in defaults (backend=sqlite, default retention/dedup).
45
- * 2. ~/.llm-cli-gateway/config.toml (or $LLM_GATEWAY_CONFIG).
46
- * 3. Legacy env vars (with deprecation warning).
47
- *
48
- * Throws on incoherent configs (memory/none + asyncJobsEnabled without ack).
49
- */
50
35
  export declare function loadPersistenceConfig(logger?: Logger): PersistenceConfig;
51
36
  export declare const ANTHROPIC_TTL_SECONDS_VALUES: readonly [300, 3600];
52
37
  export type AnthropicTtlSeconds = (typeof ANTHROPIC_TTL_SECONDS_VALUES)[number];
53
- /**
54
- * Per-Anthropic-model-family minimum cacheable tokens. Sourced from
55
- * docs/personal-mcp/PROVIDER_CACHE_SURFACES.md (Anthropic API docs as of
56
- * 2026-05-26). Models below the threshold cannot be cached even with
57
- * cache_control set — Anthropic silently returns un-cached.
58
- */
59
38
  export declare const DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL: {
60
39
  readonly sonnet: 1024;
61
40
  readonly opus: 4096;
@@ -73,19 +52,9 @@ export interface CacheAwarenessConfig {
73
52
  haiku: number;
74
53
  default: number;
75
54
  };
76
- /** Audit trail: file the config was loaded from (or null if defaults). */
77
55
  sources: {
78
56
  configFile: string | null;
79
57
  };
80
58
  }
81
- /**
82
- * Load [cache_awareness] from ~/.llm-cli-gateway/config.toml. Defaults: all
83
- * behaviour off, per-model min-token thresholds from PROVIDER_CACHE_SURFACES.md.
84
- */
85
59
  export declare function loadCacheAwarenessConfig(logger?: Logger): CacheAwarenessConfig;
86
- /**
87
- * Look up the per-model-family threshold. `modelName` is the user-facing model
88
- * string (e.g. "claude-sonnet-4-6", "claude-opus-4-7"). Falls back to `default`
89
- * when the family is unrecognised.
90
- */
91
60
  export declare function minStableTokensForModel(config: CacheAwarenessConfig, modelName: string): number;
package/dist/config.js CHANGED
@@ -4,30 +4,22 @@ import path from "path";
4
4
  import { createRequire } from "module";
5
5
  import { z } from "zod/v3";
6
6
  import { logWarn, noopLogger } from "./logger.js";
7
- // Zod schemas for configuration validation
8
7
  const DatabaseUrlSchema = z
9
8
  .string()
10
9
  .url()
11
10
  .refine(url => url.startsWith("postgresql://") || url.startsWith("postgres://"), {
12
11
  message: "Database URL must start with postgresql:// or postgres://",
13
12
  });
14
- export const DEFAULT_SESSION_TTL_SECONDS = 2592000; // 30 days
15
- /**
16
- * Load configuration from environment variables.
17
- * Always returns a Config object with base fields.
18
- * Database fields are populated when DATABASE_URL is set.
19
- */
13
+ export const DEFAULT_SESSION_TTL_SECONDS = 2592000;
20
14
  export function loadConfig() {
21
15
  const databaseUrl = process.env.DATABASE_URL;
22
16
  const rawSessionTtl = parseInt(process.env.SESSION_TTL || String(DEFAULT_SESSION_TTL_SECONDS), 10);
23
17
  const sessionTtl = Number.isFinite(rawSessionTtl) && rawSessionTtl > 0
24
18
  ? rawSessionTtl
25
19
  : DEFAULT_SESSION_TTL_SECONDS;
26
- // If no database config, return base config (file-based storage)
27
20
  if (!databaseUrl) {
28
21
  return { sessionTtl };
29
22
  }
30
- // Validate URL
31
23
  try {
32
24
  DatabaseUrlSchema.parse(databaseUrl);
33
25
  }
@@ -47,25 +39,9 @@ export function loadConfig() {
47
39
  sessionTtl,
48
40
  };
49
41
  }
50
- //──────────────────────────────────────────────────────────────────────────────
51
- // Persistence configuration
52
- //
53
- // The async job store is now driven by a typed config (TOML file +
54
- // validated env-var overrides) instead of a single LLM_GATEWAY_LOGS_DB env
55
- // var. The structural invariant: `*_request_async` tools are only registered
56
- // when a real durable store is attached, so silent in-memory loss after the
57
- // 1h TTL becomes impossible.
58
- //
59
- // Backends:
60
- // - "sqlite": durable on disk (default).
61
- // - "postgres": durable in Postgres (interface only — impl not yet shipped).
62
- // - "memory": in-process MemoryJobStore. Process-lifetime durability only.
63
- // Requires acknowledgeEphemeral=true to register async tools.
64
- // - "none": no store. Async tools are NOT registered.
65
- //──────────────────────────────────────────────────────────────────────────────
66
42
  export const PERSISTENCE_BACKENDS = ["sqlite", "postgres", "memory", "none"];
67
43
  export const DEFAULT_JOB_RETENTION_DAYS = 30;
68
- export const DEFAULT_DEDUP_WINDOW_MS = 60 * 60 * 1000; // 1 hour
44
+ export const DEFAULT_DEDUP_WINDOW_MS = 60 * 60 * 1000;
69
45
  const PersistenceSchema = z
70
46
  .object({
71
47
  backend: z.enum(PERSISTENCE_BACKENDS).default("sqlite"),
@@ -80,10 +56,6 @@ const DEFAULT_SQLITE_PATH = path.join(os.homedir(), ".llm-cli-gateway", "logs.db
80
56
  function defaultPersistenceConfigPath() {
81
57
  return (process.env.LLM_GATEWAY_CONFIG ?? path.join(os.homedir(), ".llm-cli-gateway", "config.toml"));
82
58
  }
83
- /**
84
- * Read and parse the optional TOML config file. Returns the raw `[persistence]`
85
- * table (if present) and the file path. Missing file is fine — defaults apply.
86
- */
87
59
  function readPersistenceFile(configPath, logger) {
88
60
  if (!existsSync(configPath)) {
89
61
  return { raw: undefined, sourcePath: null };
@@ -100,18 +72,10 @@ function readPersistenceFile(configPath, logger) {
100
72
  return { raw: undefined, sourcePath: null };
101
73
  }
102
74
  }
103
- /**
104
- * Apply legacy env-var overrides on top of the file/defaults. Each application
105
- * appends a string to `sources.envOverrides` and emits a one-time deprecation
106
- * warning so operators can migrate to the config file.
107
- */
108
75
  function applyEnvOverrides(base, logger, sources) {
109
76
  const out = { ...base };
110
77
  const jobsDbEnv = process.env.LLM_GATEWAY_JOBS_DB;
111
78
  const logsDbEnv = process.env.LLM_GATEWAY_LOGS_DB;
112
- // Empty string is treated as "not set" — only an explicitly non-empty value
113
- // (or the literal "none") overrides the file/defaults. This avoids the
114
- // old footgun where `LLM_GATEWAY_LOGS_DB=` silently disabled persistence.
115
79
  const dbEnvRaw = jobsDbEnv && jobsDbEnv.length > 0
116
80
  ? jobsDbEnv
117
81
  : logsDbEnv && logsDbEnv.length > 0
@@ -160,14 +124,6 @@ function applyEnvOverrides(base, logger, sources) {
160
124
  function expandHome(p) {
161
125
  return p.startsWith("~/") ? path.join(os.homedir(), p.slice(2)) : p;
162
126
  }
163
- /**
164
- * Load and validate the persistence config from (in order, last-write-wins):
165
- * 1. Built-in defaults (backend=sqlite, default retention/dedup).
166
- * 2. ~/.llm-cli-gateway/config.toml (or $LLM_GATEWAY_CONFIG).
167
- * 3. Legacy env vars (with deprecation warning).
168
- *
169
- * Throws on incoherent configs (memory/none + asyncJobsEnabled without ack).
170
- */
171
127
  export function loadPersistenceConfig(logger = noopLogger) {
172
128
  const configPath = defaultPersistenceConfigPath();
173
129
  const { raw, sourcePath } = readPersistenceFile(configPath, logger);
@@ -209,24 +165,7 @@ export function loadPersistenceConfig(logger = noopLogger) {
209
165
  sources,
210
166
  };
211
167
  }
212
- //──────────────────────────────────────────────────────────────────────────────
213
- // Cache-awareness configuration
214
- //
215
- // Reads the [cache_awareness] block from the same ~/.llm-cli-gateway/config.toml
216
- // file as [persistence], but uses a SEPARATE loader and schema. Keeping the two
217
- // independent means a malformed [cache_awareness] never breaks persistence
218
- // loading and vice versa. No env-var overrides — purely TOML.
219
- //
220
- // All defaults are "off"; behavioural changes (slice 1 cache_control, slice 3
221
- // TTL warnings) ship dormant until operators opt in.
222
- //──────────────────────────────────────────────────────────────────────────────
223
168
  export const ANTHROPIC_TTL_SECONDS_VALUES = [300, 3600];
224
- /**
225
- * Per-Anthropic-model-family minimum cacheable tokens. Sourced from
226
- * docs/personal-mcp/PROVIDER_CACHE_SURFACES.md (Anthropic API docs as of
227
- * 2026-05-26). Models below the threshold cannot be cached even with
228
- * cache_control set — Anthropic silently returns un-cached.
229
- */
230
169
  export const DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL = {
231
170
  sonnet: 1024,
232
171
  opus: 4096,
@@ -275,10 +214,6 @@ function readCacheAwarenessFile(configPath, logger) {
275
214
  return { raw: undefined, sourcePath: null };
276
215
  }
277
216
  }
278
- /**
279
- * Load [cache_awareness] from ~/.llm-cli-gateway/config.toml. Defaults: all
280
- * behaviour off, per-model min-token thresholds from PROVIDER_CACHE_SURFACES.md.
281
- */
282
217
  export function loadCacheAwarenessConfig(logger = noopLogger) {
283
218
  const configPath = defaultPersistenceConfigPath();
284
219
  const { raw, sourcePath } = readCacheAwarenessFile(configPath, logger);
@@ -302,11 +237,6 @@ export function loadCacheAwarenessConfig(logger = noopLogger) {
302
237
  sources: { configFile: sourcePath },
303
238
  };
304
239
  }
305
- /**
306
- * Look up the per-model-family threshold. `modelName` is the user-facing model
307
- * string (e.g. "claude-sonnet-4-6", "claude-opus-4-7"). Falls back to `default`
308
- * when the family is unrecognised.
309
- */
310
240
  export function minStableTokensForModel(config, modelName) {
311
241
  const lower = modelName.toLowerCase();
312
242
  const table = config.minStableTokensForCacheControl;
package/dist/db.d.ts CHANGED
@@ -7,32 +7,14 @@ export interface HealthCheckResult {
7
7
  latency: number;
8
8
  };
9
9
  }
10
- /**
11
- * Database connection manager for PostgreSQL-backed sessions.
12
- */
13
10
  export declare class DatabaseConnection {
14
11
  private logger;
15
12
  private pool;
16
13
  private config;
17
14
  constructor(config: Config, logger?: Logger);
18
- /**
19
- * Initialize connection to PostgreSQL.
20
- */
21
15
  connect(): Promise<void>;
22
- /**
23
- * Graceful shutdown - close all connections
24
- */
25
16
  disconnect(): Promise<void>;
26
- /**
27
- * Health check for PostgreSQL.
28
- */
29
17
  healthCheck(): Promise<HealthCheckResult>;
30
- /**
31
- * Get PostgreSQL pool
32
- */
33
18
  getPool(): Pool;
34
19
  }
35
- /**
36
- * Factory function to create and connect DatabaseConnection
37
- */
38
20
  export declare function createDatabaseConnection(config: Config, logger?: Logger): Promise<DatabaseConnection>;