llm-cli-gateway 1.17.4 → 1.17.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +1 -1
- package/dist/approval-manager.js +0 -8
- package/dist/async-job-manager.d.ts +0 -113
- package/dist/async-job-manager.js +6 -124
- package/dist/cache-stats.d.ts +0 -89
- package/dist/cache-stats.js +0 -62
- package/dist/claude-mcp-config.js +0 -1
- package/dist/cli-updater.d.ts +0 -8
- package/dist/cli-updater.js +0 -12
- package/dist/codex-json-parser.d.ts +0 -20
- package/dist/codex-json-parser.js +0 -21
- package/dist/config.d.ts +0 -31
- package/dist/config.js +2 -72
- package/dist/db.d.ts +0 -18
- package/dist/db.js +0 -22
- package/dist/doctor.d.ts +0 -49
- package/dist/doctor.js +0 -47
- package/dist/endpoint-exposure.js +0 -1
- package/dist/executor.d.ts +0 -19
- package/dist/executor.js +3 -38
- package/dist/flight-recorder.d.ts +0 -26
- package/dist/flight-recorder.js +1 -70
- package/dist/gemini-json-parser.d.ts +0 -25
- package/dist/gemini-json-parser.js +0 -28
- package/dist/health.d.ts +0 -3
- package/dist/health.js +0 -3
- package/dist/index.d.ts +1 -221
- package/dist/index.js +14 -563
- package/dist/job-store.d.ts +0 -74
- package/dist/job-store.js +1 -73
- package/dist/logger.d.ts +0 -7
- package/dist/logger.js +0 -6
- package/dist/migrate-sessions.d.ts +0 -3
- package/dist/migrate-sessions.js +0 -16
- package/dist/migrate.js +1 -18
- package/dist/mistral-meta-json-parser.js +0 -67
- package/dist/model-registry.js +0 -13
- package/dist/pricing.d.ts +0 -46
- package/dist/pricing.js +0 -47
- package/dist/process-monitor.d.ts +0 -15
- package/dist/process-monitor.js +2 -31
- package/dist/prompt-parts.d.ts +0 -25
- package/dist/prompt-parts.js +0 -11
- package/dist/provider-status.d.ts +0 -8
- package/dist/provider-status.js +0 -11
- package/dist/request-helpers.d.ts +0 -334
- package/dist/request-helpers.js +1 -229
- package/dist/resources.d.ts +0 -20
- package/dist/resources.js +1 -34
- package/dist/retry.d.ts +0 -45
- package/dist/retry.js +3 -40
- package/dist/session-manager-pg.d.ts +0 -32
- package/dist/session-manager-pg.js +0 -32
- package/dist/session-manager.d.ts +0 -21
- package/dist/session-manager.js +1 -15
- package/dist/stream-json-parser.d.ts +0 -18
- package/dist/stream-json-parser.js +0 -22
- package/dist/upstream-contracts.d.ts +0 -55
- package/dist/upstream-contracts.js +0 -77
- package/dist/validation-orchestrator.js +0 -3
- package/dist/worktree-manager.d.ts +0 -9
- package/dist/worktree-manager.js +0 -21
- package/package.json +1 -1
package/dist/cache-stats.d.ts
CHANGED
|
@@ -1,44 +1,16 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Cache observability aggregates.
|
|
3
|
-
*
|
|
4
|
-
* Pure read-only aggregation over the FlightRecorder's `requests` table.
|
|
5
|
-
* No new storage — every value is computed at query time from existing
|
|
6
|
-
* columns (`cache_read_tokens`, `cache_creation_tokens`, `stable_prefix_*`,
|
|
7
|
-
* `datetime_utc`, etc.).
|
|
8
|
-
*
|
|
9
|
-
* COALESCE / NULL handling: rows from before the v3 migration have NULL
|
|
10
|
-
* for stable_prefix_*. Rows from CLIs whose parser does not surface cache
|
|
11
|
-
* tokens (gemini, grok, mistral, and codex until its parser is fixed)
|
|
12
|
-
* have NULL for cache_read_tokens / cache_creation_tokens. All aggregates
|
|
13
|
-
* tolerate NULL via COALESCE(col, 0) — never divides by zero.
|
|
14
|
-
*/
|
|
15
1
|
import type { FlightRecorderQuery } from "./flight-recorder.js";
|
|
16
2
|
export type CacheStatsCli = "claude" | "codex" | "gemini" | "grok" | "mistral";
|
|
17
3
|
export interface SessionCacheStats {
|
|
18
4
|
sessionId: string;
|
|
19
5
|
cli: CacheStatsCli | null;
|
|
20
|
-
/** Total cache_read_tokens across all rows in this session. */
|
|
21
6
|
totalCacheReadTokens: number;
|
|
22
|
-
/** Total cache_creation_tokens across all rows in this session. */
|
|
23
7
|
totalCacheCreationTokens: number;
|
|
24
|
-
/** Number of rows in this session. */
|
|
25
8
|
requestCount: number;
|
|
26
|
-
/** Number of rows where cache_read_tokens > 0. */
|
|
27
9
|
hitCount: number;
|
|
28
|
-
/** hitCount / requestCount (0 when requestCount = 0). */
|
|
29
10
|
hitRate: number;
|
|
30
|
-
/** Distinct stable_prefix_hash values seen in this session. */
|
|
31
11
|
distinctPrefixCount: number;
|
|
32
|
-
/** Last time any row in this session was written (datetime_utc max). ISO string or null. */
|
|
33
12
|
lastRequestAt: string | null;
|
|
34
|
-
/** Estimated USD saved by cache reads in this session (best-effort). */
|
|
35
13
|
estimatedSavingsUsd: number;
|
|
36
|
-
/**
|
|
37
|
-
* Slice 3: best-effort remaining TTL on the Anthropic cache breakpoint
|
|
38
|
-
* established at lastRequestAt. Null for non-claude CLIs (we have no
|
|
39
|
-
* read on their cache state) and null when lastRequestAt is null.
|
|
40
|
-
* Computed by computeTtlRemaining(); see ttlPolicy parameter.
|
|
41
|
-
*/
|
|
42
14
|
ttlRemainingMs: number | null;
|
|
43
15
|
}
|
|
44
16
|
export interface PrefixCacheStats {
|
|
@@ -48,7 +20,6 @@ export interface PrefixCacheStats {
|
|
|
48
20
|
hitRate: number;
|
|
49
21
|
totalCacheReadTokens: number;
|
|
50
22
|
totalCacheCreationTokens: number;
|
|
51
|
-
/** Distinct CLI x model combos that hashed to this prefix. */
|
|
52
23
|
cliBreakdown: Array<{
|
|
53
24
|
cli: CacheStatsCli;
|
|
54
25
|
model: string;
|
|
@@ -59,7 +30,6 @@ export interface PrefixCacheStats {
|
|
|
59
30
|
estimatedSavingsUsd: number;
|
|
60
31
|
}
|
|
61
32
|
export interface GlobalCacheStats {
|
|
62
|
-
/** Optional window: rows since (now - lastNHours * 3600s). */
|
|
63
33
|
windowHours: number | null;
|
|
64
34
|
totalRequests: number;
|
|
65
35
|
totalHits: number;
|
|
@@ -76,27 +46,6 @@ export interface GlobalCacheStats {
|
|
|
76
46
|
estimatedSavingsUsd: number;
|
|
77
47
|
}>;
|
|
78
48
|
estimatedSavingsUsd: number;
|
|
79
|
-
/**
|
|
80
|
-
* Rec #3 (slice κ): derived metrics that distinguish gateway-driven
|
|
81
|
-
* κ-explicit `cache_control` breakpoints from Claude Code's
|
|
82
|
-
* own baseline cache reads.
|
|
83
|
-
*
|
|
84
|
-
* - explicitCacheControlRows: rows where the gateway emitted at
|
|
85
|
-
* least one `cache_control` marker (`cache_control_blocks > 0`).
|
|
86
|
-
* - explicitCacheControlHits: those rows whose `cache_read_tokens
|
|
87
|
-
* > 0` — closest signal we have to "the caller's marked block
|
|
88
|
-
* actually hit Anthropic's cache" (still includes Claude Code's
|
|
89
|
-
* baseline cache reads on top, which is unavoidable without
|
|
90
|
-
* per-block token accounting from Anthropic).
|
|
91
|
-
* - explicitCacheControlHitRate: ratio explicit hits / explicit rows.
|
|
92
|
-
* - stablePrefixReuseCount: distinct `stable_prefix_hash` values
|
|
93
|
-
* that appear in >1 row in-window (i.e. real reuse opportunities).
|
|
94
|
-
* - avgCacheCreationAfterFirstCall: averaged across stable-prefix
|
|
95
|
-
* reuse groups, the cache_creation_tokens on rows AFTER the
|
|
96
|
-
* first-by-datetime in each group. Drops sharply when caller
|
|
97
|
-
* blocks are reused; stays high when Claude Code's session-wrap
|
|
98
|
-
* floor dominates.
|
|
99
|
-
*/
|
|
100
49
|
explicitCacheControlRows: number;
|
|
101
50
|
explicitCacheControlHits: number;
|
|
102
51
|
explicitCacheControlHitRate: number;
|
|
@@ -105,38 +54,15 @@ export interface GlobalCacheStats {
|
|
|
105
54
|
}
|
|
106
55
|
export declare function computeSessionCacheStats(db: FlightRecorderQuery, sessionId: string): SessionCacheStats;
|
|
107
56
|
export interface TtlPolicy {
|
|
108
|
-
/**
|
|
109
|
-
* Seconds: how long Anthropic holds a cache entry after the last
|
|
110
|
-
* write. Default 300 (5 minutes). Set to 3600 when the operator has
|
|
111
|
-
* opted into Anthropic's 1-hour cache TTL via
|
|
112
|
-
* `[cache_awareness].anthropic_ttl_seconds = 3600`.
|
|
113
|
-
*/
|
|
114
57
|
anthropicTtlSeconds: 300 | 3600;
|
|
115
|
-
/** Defaults to `() => Date.now()`. Overridable for deterministic tests. */
|
|
116
58
|
now?: () => number;
|
|
117
59
|
}
|
|
118
|
-
/**
|
|
119
|
-
* Slice 3: compute the best-effort milliseconds remaining on the cache
|
|
120
|
-
* breakpoint established at `stats.lastRequestAt`.
|
|
121
|
-
*
|
|
122
|
-
* - Claude: Anthropic's documented TTL (5min default, 1h beta). Computed
|
|
123
|
-
* as max(0, ttl - (now - lastWriteAt)).
|
|
124
|
-
* - Other CLIs: returns null. We do not observe the provider's actual
|
|
125
|
-
* cache state, so any number we'd return would be a guess. session_get
|
|
126
|
-
* and cache_state resources should report null for these.
|
|
127
|
-
*
|
|
128
|
-
* Note: this is "best effort". A cache eviction inside Anthropic's
|
|
129
|
-
* window will NOT be visible to us — the warning may be optimistic
|
|
130
|
-
* (see risks section in dag.toml).
|
|
131
|
-
*/
|
|
132
60
|
export declare function computeTtlRemaining(stats: SessionCacheStats, cli: CacheStatsCli | null, ttlPolicy: TtlPolicy): number | null;
|
|
133
61
|
export declare function computePrefixCacheStats(db: FlightRecorderQuery, stablePrefixHash: string): PrefixCacheStats;
|
|
134
62
|
export interface GlobalCacheStatsOpts {
|
|
135
|
-
/** If set, restrict to rows whose datetime_utc is within the last N hours. */
|
|
136
63
|
lastNHours?: number;
|
|
137
64
|
}
|
|
138
65
|
export declare function computeGlobalCacheStats(db: FlightRecorderQuery, opts?: GlobalCacheStatsOpts): GlobalCacheStats;
|
|
139
|
-
/** Default response truncation budget, matching llm_job_result's maxChars. */
|
|
140
66
|
export declare const PERSISTED_REQUEST_DEFAULT_MAX_CHARS = 200000;
|
|
141
67
|
export interface PersistedRequestRecord {
|
|
142
68
|
correlationId: string;
|
|
@@ -151,35 +77,20 @@ export interface PersistedRequestRecord {
|
|
|
151
77
|
retryCount: number | null;
|
|
152
78
|
circuitBreakerState: string | null;
|
|
153
79
|
costUsd: number | null;
|
|
154
|
-
/** NULL for sync requests; the async job UUID for *_request_async rows. */
|
|
155
80
|
asyncJobId: string | null;
|
|
156
81
|
inputTokens: number | null;
|
|
157
82
|
outputTokens: number | null;
|
|
158
83
|
cacheReadTokens: number | null;
|
|
159
84
|
cacheCreationTokens: number | null;
|
|
160
|
-
/** Full character length of the persisted prompt (always reported). */
|
|
161
85
|
promptChars: number;
|
|
162
|
-
/** Full character length of the persisted response (pre-truncation). */
|
|
163
86
|
responseChars: number;
|
|
164
|
-
/** True when `response` was clipped to `maxChars`. */
|
|
165
87
|
responseTruncated: boolean;
|
|
166
|
-
/** Persisted response text, truncated to maxChars. NULL if the row never completed. */
|
|
167
88
|
response: string | null;
|
|
168
|
-
/** Only present when includePrompt = true. */
|
|
169
89
|
prompt?: string;
|
|
170
|
-
/** Parsed thinking blocks (claude), or null. */
|
|
171
90
|
thinkingBlocks: string[] | null;
|
|
172
91
|
}
|
|
173
92
|
export interface ReadPersistedRequestOptions {
|
|
174
|
-
/** Truncate the returned response to this many characters. Default 200000. */
|
|
175
93
|
maxChars?: number;
|
|
176
|
-
/** Include the full persisted prompt text in the result. Default false. */
|
|
177
94
|
includePrompt?: boolean;
|
|
178
95
|
}
|
|
179
|
-
/**
|
|
180
|
-
* Fetch a single persisted request by correlation id from the flight recorder.
|
|
181
|
-
* Returns null when no row matches (including a NoopFlightRecorder, which
|
|
182
|
-
* yields no rows — i.e. flight recording disabled). The response is truncated
|
|
183
|
-
* to `maxChars`; the full pre-truncation length is reported via responseChars.
|
|
184
|
-
*/
|
|
185
96
|
export declare function readPersistedRequest(db: FlightRecorderQuery, correlationId: string, opts?: ReadPersistedRequestOptions): PersistedRequestRecord | null;
|
package/dist/cache-stats.js
CHANGED
|
@@ -1,17 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Cache observability aggregates.
|
|
3
|
-
*
|
|
4
|
-
* Pure read-only aggregation over the FlightRecorder's `requests` table.
|
|
5
|
-
* No new storage — every value is computed at query time from existing
|
|
6
|
-
* columns (`cache_read_tokens`, `cache_creation_tokens`, `stable_prefix_*`,
|
|
7
|
-
* `datetime_utc`, etc.).
|
|
8
|
-
*
|
|
9
|
-
* COALESCE / NULL handling: rows from before the v3 migration have NULL
|
|
10
|
-
* for stable_prefix_*. Rows from CLIs whose parser does not surface cache
|
|
11
|
-
* tokens (gemini, grok, mistral, and codex until its parser is fixed)
|
|
12
|
-
* have NULL for cache_read_tokens / cache_creation_tokens. All aggregates
|
|
13
|
-
* tolerate NULL via COALESCE(col, 0) — never divides by zero.
|
|
14
|
-
*/
|
|
15
1
|
import { estimateCacheSavingsUsd } from "./pricing.js";
|
|
16
2
|
function safeNum(n) {
|
|
17
3
|
return typeof n === "number" && Number.isFinite(n) ? n : 0;
|
|
@@ -64,27 +50,9 @@ export function computeSessionCacheStats(db, sessionId) {
|
|
|
64
50
|
distinctPrefixCount: prefixSet.size,
|
|
65
51
|
lastRequestAt: lastAt,
|
|
66
52
|
estimatedSavingsUsd,
|
|
67
|
-
// ttlRemainingMs is populated by computeTtlRemaining() — the field
|
|
68
|
-
// exists on the type so the resource shape is uniform, but its value
|
|
69
|
-
// is left null here. Callers (session_get / cache_state resources)
|
|
70
|
-
// apply the configured TTL policy and set the field.
|
|
71
53
|
ttlRemainingMs: null,
|
|
72
54
|
};
|
|
73
55
|
}
|
|
74
|
-
/**
|
|
75
|
-
* Slice 3: compute the best-effort milliseconds remaining on the cache
|
|
76
|
-
* breakpoint established at `stats.lastRequestAt`.
|
|
77
|
-
*
|
|
78
|
-
* - Claude: Anthropic's documented TTL (5min default, 1h beta). Computed
|
|
79
|
-
* as max(0, ttl - (now - lastWriteAt)).
|
|
80
|
-
* - Other CLIs: returns null. We do not observe the provider's actual
|
|
81
|
-
* cache state, so any number we'd return would be a guess. session_get
|
|
82
|
-
* and cache_state resources should report null for these.
|
|
83
|
-
*
|
|
84
|
-
* Note: this is "best effort". A cache eviction inside Anthropic's
|
|
85
|
-
* window will NOT be visible to us — the warning may be optimistic
|
|
86
|
-
* (see risks section in dag.toml).
|
|
87
|
-
*/
|
|
88
56
|
export function computeTtlRemaining(stats, cli, ttlPolicy) {
|
|
89
57
|
if (cli !== "claude")
|
|
90
58
|
return null;
|
|
@@ -177,16 +145,8 @@ export function computeGlobalCacheStats(db, opts = {}) {
|
|
|
177
145
|
let totalRead = 0;
|
|
178
146
|
let totalCreation = 0;
|
|
179
147
|
let totalSavings = 0;
|
|
180
|
-
// Rec #3: κ-explicit metrics. A row is "κ-explicit" iff it has
|
|
181
|
-
// `cache_control_blocks > 0` — i.e. the gateway emitted at least one
|
|
182
|
-
// caller-supplied `cache_control` marker. Rows with NULL or 0 are
|
|
183
|
-
// either pre-v4 or non-κ Claude / non-Claude requests.
|
|
184
148
|
let explicitRows = 0;
|
|
185
149
|
let explicitHits = 0;
|
|
186
|
-
// Per-prefix reuse tracking: collect cache_creation_tokens for every
|
|
187
|
-
// row keyed by stable_prefix_hash, ordered ascending by datetime_utc.
|
|
188
|
-
// For each group with >1 row, drop the first (the cache-write call)
|
|
189
|
-
// and average the rest (the cache-read calls).
|
|
190
150
|
const perPrefix = new Map();
|
|
191
151
|
for (const row of rows) {
|
|
192
152
|
totalRequests += 1;
|
|
@@ -235,8 +195,6 @@ export function computeGlobalCacheStats(db, opts = {}) {
|
|
|
235
195
|
continue;
|
|
236
196
|
stablePrefixReuseCount += 1;
|
|
237
197
|
arr.sort((a, b) => a.datetime_utc < b.datetime_utc ? -1 : a.datetime_utc > b.datetime_utc ? 1 : 0);
|
|
238
|
-
// Every row after the first-by-time in this prefix group (the reuse
|
|
239
|
-
// calls). Iterate the tail directly rather than index-walking `arr`.
|
|
240
198
|
const [, ...afterFirst] = arr;
|
|
241
199
|
for (const entry of afterFirst) {
|
|
242
200
|
creationAfterFirstSum += entry.cache_creation_tokens;
|
|
@@ -269,20 +227,6 @@ export function computeGlobalCacheStats(db, opts = {}) {
|
|
|
269
227
|
avgCacheCreationAfterFirstCall,
|
|
270
228
|
};
|
|
271
229
|
}
|
|
272
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
273
|
-
// Read-back of a single persisted request by correlation id.
|
|
274
|
-
//
|
|
275
|
-
// The flight recorder already persists every request's `response` column on
|
|
276
|
-
// logComplete (flight-recorder.ts), regardless of sync vs async. But the only
|
|
277
|
-
// MCP read-back surface — llm_job_result — is keyed on an async job id and
|
|
278
|
-
// reads the AsyncJobManager, not the recorder. So a *sync* response (which has
|
|
279
|
-
// async_job_id = NULL and is handed back inline exactly once) has no retrieval
|
|
280
|
-
// path after the fact. This helper closes that gap: given the correlationId
|
|
281
|
-
// that every sync/async response echoes in `structuredContent.correlationId`,
|
|
282
|
-
// it returns the persisted row from the recorder. Pure read-only — uses the
|
|
283
|
-
// same FlightRecorderQuery surface as the cache aggregates above.
|
|
284
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
285
|
-
/** Default response truncation budget, matching llm_job_result's maxChars. */
|
|
286
230
|
export const PERSISTED_REQUEST_DEFAULT_MAX_CHARS = 200_000;
|
|
287
231
|
function parseThinkingBlocks(raw) {
|
|
288
232
|
if (!raw)
|
|
@@ -295,12 +239,6 @@ function parseThinkingBlocks(raw) {
|
|
|
295
239
|
return null;
|
|
296
240
|
}
|
|
297
241
|
}
|
|
298
|
-
/**
|
|
299
|
-
* Fetch a single persisted request by correlation id from the flight recorder.
|
|
300
|
-
* Returns null when no row matches (including a NoopFlightRecorder, which
|
|
301
|
-
* yields no rows — i.e. flight recording disabled). The response is truncated
|
|
302
|
-
* to `maxChars`; the full pre-truncation length is reported via responseChars.
|
|
303
|
-
*/
|
|
304
242
|
export function readPersistedRequest(db, correlationId, opts = {}) {
|
|
305
243
|
const maxChars = opts.maxChars ?? PERSISTED_REQUEST_DEFAULT_MAX_CHARS;
|
|
306
244
|
const rows = db.queryRequests(`SELECT r.id, r.cli, r.model, r.prompt, r.response, r.session_id,
|
|
@@ -113,7 +113,6 @@ function toClaudeServerDef(server) {
|
|
|
113
113
|
if (server === "ref_tools" && process.env.REF_API_KEY) {
|
|
114
114
|
env.REF_API_KEY = process.env.REF_API_KEY;
|
|
115
115
|
}
|
|
116
|
-
// sqry should always be usable without env, but exa/ref_tools typically need credentials.
|
|
117
116
|
if ((server === "exa" && !env.EXA_API_KEY) || (server === "ref_tools" && !env.REF_API_KEY)) {
|
|
118
117
|
return null;
|
|
119
118
|
}
|
package/dist/cli-updater.d.ts
CHANGED
|
@@ -24,14 +24,6 @@ export interface CliUpgradePlan {
|
|
|
24
24
|
note?: string;
|
|
25
25
|
}
|
|
26
26
|
export type MistralInstallMethod = "pip" | "uv" | "brew" | "unknown";
|
|
27
|
-
/**
|
|
28
|
-
* Detect how Vibe was installed on this machine. Vibe does not self-update, so
|
|
29
|
-
* cli_upgrade has to dispatch to the package manager that owns the binary.
|
|
30
|
-
*
|
|
31
|
-
* Probe order: pip → uv → brew. The first one that returns a positive signal
|
|
32
|
-
* wins; if none do, callers should surface an actionable error rather than
|
|
33
|
-
* blindly running `vibe update` (a command that does not exist).
|
|
34
|
-
*/
|
|
35
27
|
export declare function detectMistralInstallMethod(exec?: (cmd: string, args: string[]) => {
|
|
36
28
|
exitCode: number | null;
|
|
37
29
|
stdout: string;
|
package/dist/cli-updater.js
CHANGED
|
@@ -3,14 +3,6 @@ import { executeCli } from "./executor.js";
|
|
|
3
3
|
import { getProviderRuntimeStatus } from "./provider-status.js";
|
|
4
4
|
const MISTRAL_VIBE_PACKAGE = "mistral-vibe";
|
|
5
5
|
const LEGACY_VIBE_PACKAGE = "vibe-cli";
|
|
6
|
-
/**
|
|
7
|
-
* Detect how Vibe was installed on this machine. Vibe does not self-update, so
|
|
8
|
-
* cli_upgrade has to dispatch to the package manager that owns the binary.
|
|
9
|
-
*
|
|
10
|
-
* Probe order: pip → uv → brew. The first one that returns a positive signal
|
|
11
|
-
* wins; if none do, callers should surface an actionable error rather than
|
|
12
|
-
* blindly running `vibe update` (a command that does not exist).
|
|
13
|
-
*/
|
|
14
6
|
export function detectMistralInstallMethod(exec = (cmd, args) => {
|
|
15
7
|
const result = spawnSync(cmd, args, { encoding: "utf8", timeout: 5_000, windowsHide: true });
|
|
16
8
|
return {
|
|
@@ -155,10 +147,6 @@ export async function getCliVersions(cli) {
|
|
|
155
147
|
}
|
|
156
148
|
function buildMistralUpgradePlan(normalizedTarget, detectMistral) {
|
|
157
149
|
const method = detectMistral();
|
|
158
|
-
// Vibe ships no self-update command. cli_upgrade dispatches to the installer
|
|
159
|
-
// it detects; if none can be detected the caller gets an actionable error
|
|
160
|
-
// (we surface it as a no-op plan with `command: ""` so runCliUpgrade can
|
|
161
|
-
// throw before spawning anything).
|
|
162
150
|
if (method === "pip") {
|
|
163
151
|
const pkg = normalizedTarget === "latest"
|
|
164
152
|
? MISTRAL_VIBE_PACKAGE
|
|
@@ -1,23 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Parser for Codex CLI `--json` JSONL event stream.
|
|
3
|
-
*
|
|
4
|
-
* Codex emits one JSON object per line, e.g.:
|
|
5
|
-
* {"type":"thread.started","thread_id":"t-abc"}
|
|
6
|
-
* {"type":"turn.started","turn_id":"u-001"}
|
|
7
|
-
* {"type":"item.started","item":{...}}
|
|
8
|
-
* {"type":"item.completed","item":{"type":"agent_message","text":"..."}}
|
|
9
|
-
* {"type":"turn.completed","usage":{"input_tokens":...,"output_tokens":...,...}}
|
|
10
|
-
* {"type":"turn.failed","error":{...}}
|
|
11
|
-
* {"type":"error","message":"..."}
|
|
12
|
-
*
|
|
13
|
-
* This parser is lenient: malformed lines are skipped, partial streams are
|
|
14
|
-
* tolerated (usage is `undefined` if no turn.completed event arrived), and
|
|
15
|
-
* error events are surfaced.
|
|
16
|
-
*
|
|
17
|
-
* Cost is intentionally NOT computed here — Codex does not price client-side
|
|
18
|
-
* and U23 only plumbs tokens. A future unit can compute cost from the model
|
|
19
|
-
* registry.
|
|
20
|
-
*/
|
|
21
1
|
export interface CodexUsage {
|
|
22
2
|
input_tokens: number;
|
|
23
3
|
output_tokens: number;
|
|
@@ -1,23 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Parser for Codex CLI `--json` JSONL event stream.
|
|
3
|
-
*
|
|
4
|
-
* Codex emits one JSON object per line, e.g.:
|
|
5
|
-
* {"type":"thread.started","thread_id":"t-abc"}
|
|
6
|
-
* {"type":"turn.started","turn_id":"u-001"}
|
|
7
|
-
* {"type":"item.started","item":{...}}
|
|
8
|
-
* {"type":"item.completed","item":{"type":"agent_message","text":"..."}}
|
|
9
|
-
* {"type":"turn.completed","usage":{"input_tokens":...,"output_tokens":...,...}}
|
|
10
|
-
* {"type":"turn.failed","error":{...}}
|
|
11
|
-
* {"type":"error","message":"..."}
|
|
12
|
-
*
|
|
13
|
-
* This parser is lenient: malformed lines are skipped, partial streams are
|
|
14
|
-
* tolerated (usage is `undefined` if no turn.completed event arrived), and
|
|
15
|
-
* error events are surfaced.
|
|
16
|
-
*
|
|
17
|
-
* Cost is intentionally NOT computed here — Codex does not price client-side
|
|
18
|
-
* and U23 only plumbs tokens. A future unit can compute cost from the model
|
|
19
|
-
* registry.
|
|
20
|
-
*/
|
|
21
1
|
export function parseCodexJsonStream(stdout) {
|
|
22
2
|
const lines = stdout.split("\n").filter(line => line.trim().length > 0);
|
|
23
3
|
const result = {};
|
|
@@ -28,7 +8,6 @@ export function parseCodexJsonStream(stdout) {
|
|
|
28
8
|
parsed = JSON.parse(line);
|
|
29
9
|
}
|
|
30
10
|
catch {
|
|
31
|
-
// Skip preamble/garbage lines that aren't valid JSON.
|
|
32
11
|
continue;
|
|
33
12
|
}
|
|
34
13
|
if (!parsed || typeof parsed !== "object") {
|
package/dist/config.d.ts
CHANGED
|
@@ -13,11 +13,6 @@ export interface Config {
|
|
|
13
13
|
database?: DatabaseConfig;
|
|
14
14
|
sessionTtl: number;
|
|
15
15
|
}
|
|
16
|
-
/**
|
|
17
|
-
* Load configuration from environment variables.
|
|
18
|
-
* Always returns a Config object with base fields.
|
|
19
|
-
* Database fields are populated when DATABASE_URL is set.
|
|
20
|
-
*/
|
|
21
16
|
export declare function loadConfig(): Config;
|
|
22
17
|
export declare const PERSISTENCE_BACKENDS: readonly ["sqlite", "postgres", "memory", "none"];
|
|
23
18
|
export type PersistenceBackend = (typeof PERSISTENCE_BACKENDS)[number];
|
|
@@ -30,32 +25,16 @@ export interface PersistenceConfig {
|
|
|
30
25
|
retentionDays: number;
|
|
31
26
|
dedupWindowMs: number;
|
|
32
27
|
acknowledgeEphemeral: boolean;
|
|
33
|
-
/** True iff async-job tools should be registered on the MCP server. */
|
|
34
28
|
asyncJobsEnabled: boolean;
|
|
35
|
-
/** Audit trail: which inputs (file, env vars) contributed to the resolved config. */
|
|
36
29
|
sources: PersistenceConfigSources;
|
|
37
30
|
}
|
|
38
31
|
export interface PersistenceConfigSources {
|
|
39
32
|
configFile: string | null;
|
|
40
33
|
envOverrides: string[];
|
|
41
34
|
}
|
|
42
|
-
/**
|
|
43
|
-
* Load and validate the persistence config from (in order, last-write-wins):
|
|
44
|
-
* 1. Built-in defaults (backend=sqlite, default retention/dedup).
|
|
45
|
-
* 2. ~/.llm-cli-gateway/config.toml (or $LLM_GATEWAY_CONFIG).
|
|
46
|
-
* 3. Legacy env vars (with deprecation warning).
|
|
47
|
-
*
|
|
48
|
-
* Throws on incoherent configs (memory/none + asyncJobsEnabled without ack).
|
|
49
|
-
*/
|
|
50
35
|
export declare function loadPersistenceConfig(logger?: Logger): PersistenceConfig;
|
|
51
36
|
export declare const ANTHROPIC_TTL_SECONDS_VALUES: readonly [300, 3600];
|
|
52
37
|
export type AnthropicTtlSeconds = (typeof ANTHROPIC_TTL_SECONDS_VALUES)[number];
|
|
53
|
-
/**
|
|
54
|
-
* Per-Anthropic-model-family minimum cacheable tokens. Sourced from
|
|
55
|
-
* docs/personal-mcp/PROVIDER_CACHE_SURFACES.md (Anthropic API docs as of
|
|
56
|
-
* 2026-05-26). Models below the threshold cannot be cached even with
|
|
57
|
-
* cache_control set — Anthropic silently returns un-cached.
|
|
58
|
-
*/
|
|
59
38
|
export declare const DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL: {
|
|
60
39
|
readonly sonnet: 1024;
|
|
61
40
|
readonly opus: 4096;
|
|
@@ -73,19 +52,9 @@ export interface CacheAwarenessConfig {
|
|
|
73
52
|
haiku: number;
|
|
74
53
|
default: number;
|
|
75
54
|
};
|
|
76
|
-
/** Audit trail: file the config was loaded from (or null if defaults). */
|
|
77
55
|
sources: {
|
|
78
56
|
configFile: string | null;
|
|
79
57
|
};
|
|
80
58
|
}
|
|
81
|
-
/**
|
|
82
|
-
* Load [cache_awareness] from ~/.llm-cli-gateway/config.toml. Defaults: all
|
|
83
|
-
* behaviour off, per-model min-token thresholds from PROVIDER_CACHE_SURFACES.md.
|
|
84
|
-
*/
|
|
85
59
|
export declare function loadCacheAwarenessConfig(logger?: Logger): CacheAwarenessConfig;
|
|
86
|
-
/**
|
|
87
|
-
* Look up the per-model-family threshold. `modelName` is the user-facing model
|
|
88
|
-
* string (e.g. "claude-sonnet-4-6", "claude-opus-4-7"). Falls back to `default`
|
|
89
|
-
* when the family is unrecognised.
|
|
90
|
-
*/
|
|
91
60
|
export declare function minStableTokensForModel(config: CacheAwarenessConfig, modelName: string): number;
|
package/dist/config.js
CHANGED
|
@@ -4,30 +4,22 @@ import path from "path";
|
|
|
4
4
|
import { createRequire } from "module";
|
|
5
5
|
import { z } from "zod/v3";
|
|
6
6
|
import { logWarn, noopLogger } from "./logger.js";
|
|
7
|
-
// Zod schemas for configuration validation
|
|
8
7
|
const DatabaseUrlSchema = z
|
|
9
8
|
.string()
|
|
10
9
|
.url()
|
|
11
10
|
.refine(url => url.startsWith("postgresql://") || url.startsWith("postgres://"), {
|
|
12
11
|
message: "Database URL must start with postgresql:// or postgres://",
|
|
13
12
|
});
|
|
14
|
-
export const DEFAULT_SESSION_TTL_SECONDS = 2592000;
|
|
15
|
-
/**
|
|
16
|
-
* Load configuration from environment variables.
|
|
17
|
-
* Always returns a Config object with base fields.
|
|
18
|
-
* Database fields are populated when DATABASE_URL is set.
|
|
19
|
-
*/
|
|
13
|
+
export const DEFAULT_SESSION_TTL_SECONDS = 2592000;
|
|
20
14
|
export function loadConfig() {
|
|
21
15
|
const databaseUrl = process.env.DATABASE_URL;
|
|
22
16
|
const rawSessionTtl = parseInt(process.env.SESSION_TTL || String(DEFAULT_SESSION_TTL_SECONDS), 10);
|
|
23
17
|
const sessionTtl = Number.isFinite(rawSessionTtl) && rawSessionTtl > 0
|
|
24
18
|
? rawSessionTtl
|
|
25
19
|
: DEFAULT_SESSION_TTL_SECONDS;
|
|
26
|
-
// If no database config, return base config (file-based storage)
|
|
27
20
|
if (!databaseUrl) {
|
|
28
21
|
return { sessionTtl };
|
|
29
22
|
}
|
|
30
|
-
// Validate URL
|
|
31
23
|
try {
|
|
32
24
|
DatabaseUrlSchema.parse(databaseUrl);
|
|
33
25
|
}
|
|
@@ -47,25 +39,9 @@ export function loadConfig() {
|
|
|
47
39
|
sessionTtl,
|
|
48
40
|
};
|
|
49
41
|
}
|
|
50
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
51
|
-
// Persistence configuration
|
|
52
|
-
//
|
|
53
|
-
// The async job store is now driven by a typed config (TOML file +
|
|
54
|
-
// validated env-var overrides) instead of a single LLM_GATEWAY_LOGS_DB env
|
|
55
|
-
// var. The structural invariant: `*_request_async` tools are only registered
|
|
56
|
-
// when a real durable store is attached, so silent in-memory loss after the
|
|
57
|
-
// 1h TTL becomes impossible.
|
|
58
|
-
//
|
|
59
|
-
// Backends:
|
|
60
|
-
// - "sqlite": durable on disk (default).
|
|
61
|
-
// - "postgres": durable in Postgres (interface only — impl not yet shipped).
|
|
62
|
-
// - "memory": in-process MemoryJobStore. Process-lifetime durability only.
|
|
63
|
-
// Requires acknowledgeEphemeral=true to register async tools.
|
|
64
|
-
// - "none": no store. Async tools are NOT registered.
|
|
65
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
66
42
|
export const PERSISTENCE_BACKENDS = ["sqlite", "postgres", "memory", "none"];
|
|
67
43
|
export const DEFAULT_JOB_RETENTION_DAYS = 30;
|
|
68
|
-
export const DEFAULT_DEDUP_WINDOW_MS = 60 * 60 * 1000;
|
|
44
|
+
export const DEFAULT_DEDUP_WINDOW_MS = 60 * 60 * 1000;
|
|
69
45
|
const PersistenceSchema = z
|
|
70
46
|
.object({
|
|
71
47
|
backend: z.enum(PERSISTENCE_BACKENDS).default("sqlite"),
|
|
@@ -80,10 +56,6 @@ const DEFAULT_SQLITE_PATH = path.join(os.homedir(), ".llm-cli-gateway", "logs.db
|
|
|
80
56
|
function defaultPersistenceConfigPath() {
|
|
81
57
|
return (process.env.LLM_GATEWAY_CONFIG ?? path.join(os.homedir(), ".llm-cli-gateway", "config.toml"));
|
|
82
58
|
}
|
|
83
|
-
/**
|
|
84
|
-
* Read and parse the optional TOML config file. Returns the raw `[persistence]`
|
|
85
|
-
* table (if present) and the file path. Missing file is fine — defaults apply.
|
|
86
|
-
*/
|
|
87
59
|
function readPersistenceFile(configPath, logger) {
|
|
88
60
|
if (!existsSync(configPath)) {
|
|
89
61
|
return { raw: undefined, sourcePath: null };
|
|
@@ -100,18 +72,10 @@ function readPersistenceFile(configPath, logger) {
|
|
|
100
72
|
return { raw: undefined, sourcePath: null };
|
|
101
73
|
}
|
|
102
74
|
}
|
|
103
|
-
/**
|
|
104
|
-
* Apply legacy env-var overrides on top of the file/defaults. Each application
|
|
105
|
-
* appends a string to `sources.envOverrides` and emits a one-time deprecation
|
|
106
|
-
* warning so operators can migrate to the config file.
|
|
107
|
-
*/
|
|
108
75
|
function applyEnvOverrides(base, logger, sources) {
|
|
109
76
|
const out = { ...base };
|
|
110
77
|
const jobsDbEnv = process.env.LLM_GATEWAY_JOBS_DB;
|
|
111
78
|
const logsDbEnv = process.env.LLM_GATEWAY_LOGS_DB;
|
|
112
|
-
// Empty string is treated as "not set" — only an explicitly non-empty value
|
|
113
|
-
// (or the literal "none") overrides the file/defaults. This avoids the
|
|
114
|
-
// old footgun where `LLM_GATEWAY_LOGS_DB=` silently disabled persistence.
|
|
115
79
|
const dbEnvRaw = jobsDbEnv && jobsDbEnv.length > 0
|
|
116
80
|
? jobsDbEnv
|
|
117
81
|
: logsDbEnv && logsDbEnv.length > 0
|
|
@@ -160,14 +124,6 @@ function applyEnvOverrides(base, logger, sources) {
|
|
|
160
124
|
function expandHome(p) {
|
|
161
125
|
return p.startsWith("~/") ? path.join(os.homedir(), p.slice(2)) : p;
|
|
162
126
|
}
|
|
163
|
-
/**
|
|
164
|
-
* Load and validate the persistence config from (in order, last-write-wins):
|
|
165
|
-
* 1. Built-in defaults (backend=sqlite, default retention/dedup).
|
|
166
|
-
* 2. ~/.llm-cli-gateway/config.toml (or $LLM_GATEWAY_CONFIG).
|
|
167
|
-
* 3. Legacy env vars (with deprecation warning).
|
|
168
|
-
*
|
|
169
|
-
* Throws on incoherent configs (memory/none + asyncJobsEnabled without ack).
|
|
170
|
-
*/
|
|
171
127
|
export function loadPersistenceConfig(logger = noopLogger) {
|
|
172
128
|
const configPath = defaultPersistenceConfigPath();
|
|
173
129
|
const { raw, sourcePath } = readPersistenceFile(configPath, logger);
|
|
@@ -209,24 +165,7 @@ export function loadPersistenceConfig(logger = noopLogger) {
|
|
|
209
165
|
sources,
|
|
210
166
|
};
|
|
211
167
|
}
|
|
212
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
213
|
-
// Cache-awareness configuration
|
|
214
|
-
//
|
|
215
|
-
// Reads the [cache_awareness] block from the same ~/.llm-cli-gateway/config.toml
|
|
216
|
-
// file as [persistence], but uses a SEPARATE loader and schema. Keeping the two
|
|
217
|
-
// independent means a malformed [cache_awareness] never breaks persistence
|
|
218
|
-
// loading and vice versa. No env-var overrides — purely TOML.
|
|
219
|
-
//
|
|
220
|
-
// All defaults are "off"; behavioural changes (slice 1 cache_control, slice 3
|
|
221
|
-
// TTL warnings) ship dormant until operators opt in.
|
|
222
|
-
//──────────────────────────────────────────────────────────────────────────────
|
|
223
168
|
export const ANTHROPIC_TTL_SECONDS_VALUES = [300, 3600];
|
|
224
|
-
/**
|
|
225
|
-
* Per-Anthropic-model-family minimum cacheable tokens. Sourced from
|
|
226
|
-
* docs/personal-mcp/PROVIDER_CACHE_SURFACES.md (Anthropic API docs as of
|
|
227
|
-
* 2026-05-26). Models below the threshold cannot be cached even with
|
|
228
|
-
* cache_control set — Anthropic silently returns un-cached.
|
|
229
|
-
*/
|
|
230
169
|
export const DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL = {
|
|
231
170
|
sonnet: 1024,
|
|
232
171
|
opus: 4096,
|
|
@@ -275,10 +214,6 @@ function readCacheAwarenessFile(configPath, logger) {
|
|
|
275
214
|
return { raw: undefined, sourcePath: null };
|
|
276
215
|
}
|
|
277
216
|
}
|
|
278
|
-
/**
|
|
279
|
-
* Load [cache_awareness] from ~/.llm-cli-gateway/config.toml. Defaults: all
|
|
280
|
-
* behaviour off, per-model min-token thresholds from PROVIDER_CACHE_SURFACES.md.
|
|
281
|
-
*/
|
|
282
217
|
export function loadCacheAwarenessConfig(logger = noopLogger) {
|
|
283
218
|
const configPath = defaultPersistenceConfigPath();
|
|
284
219
|
const { raw, sourcePath } = readCacheAwarenessFile(configPath, logger);
|
|
@@ -302,11 +237,6 @@ export function loadCacheAwarenessConfig(logger = noopLogger) {
|
|
|
302
237
|
sources: { configFile: sourcePath },
|
|
303
238
|
};
|
|
304
239
|
}
|
|
305
|
-
/**
|
|
306
|
-
* Look up the per-model-family threshold. `modelName` is the user-facing model
|
|
307
|
-
* string (e.g. "claude-sonnet-4-6", "claude-opus-4-7"). Falls back to `default`
|
|
308
|
-
* when the family is unrecognised.
|
|
309
|
-
*/
|
|
310
240
|
export function minStableTokensForModel(config, modelName) {
|
|
311
241
|
const lower = modelName.toLowerCase();
|
|
312
242
|
const table = config.minStableTokensForCacheControl;
|
package/dist/db.d.ts
CHANGED
|
@@ -7,32 +7,14 @@ export interface HealthCheckResult {
|
|
|
7
7
|
latency: number;
|
|
8
8
|
};
|
|
9
9
|
}
|
|
10
|
-
/**
|
|
11
|
-
* Database connection manager for PostgreSQL-backed sessions.
|
|
12
|
-
*/
|
|
13
10
|
export declare class DatabaseConnection {
|
|
14
11
|
private logger;
|
|
15
12
|
private pool;
|
|
16
13
|
private config;
|
|
17
14
|
constructor(config: Config, logger?: Logger);
|
|
18
|
-
/**
|
|
19
|
-
* Initialize connection to PostgreSQL.
|
|
20
|
-
*/
|
|
21
15
|
connect(): Promise<void>;
|
|
22
|
-
/**
|
|
23
|
-
* Graceful shutdown - close all connections
|
|
24
|
-
*/
|
|
25
16
|
disconnect(): Promise<void>;
|
|
26
|
-
/**
|
|
27
|
-
* Health check for PostgreSQL.
|
|
28
|
-
*/
|
|
29
17
|
healthCheck(): Promise<HealthCheckResult>;
|
|
30
|
-
/**
|
|
31
|
-
* Get PostgreSQL pool
|
|
32
|
-
*/
|
|
33
18
|
getPool(): Pool;
|
|
34
19
|
}
|
|
35
|
-
/**
|
|
36
|
-
* Factory function to create and connect DatabaseConnection
|
|
37
|
-
*/
|
|
38
20
|
export declare function createDatabaseConnection(config: Config, logger?: Logger): Promise<DatabaseConnection>;
|