llm-cli-gateway 1.5.35 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +201 -0
- package/README.md +35 -4
- package/dist/cache-stats.d.ts +112 -0
- package/dist/cache-stats.js +225 -0
- package/dist/config.d.ts +41 -0
- package/dist/config.js +109 -0
- package/dist/doctor.d.ts +42 -1
- package/dist/doctor.js +121 -2
- package/dist/flight-recorder.d.ts +27 -0
- package/dist/flight-recorder.js +79 -2
- package/dist/index.d.ts +46 -9
- package/dist/index.js +395 -67
- package/dist/pricing.d.ts +54 -0
- package/dist/pricing.js +100 -0
- package/dist/prompt-parts.d.ts +38 -0
- package/dist/prompt-parts.js +42 -0
- package/dist/resources.d.ts +32 -1
- package/dist/resources.js +52 -1
- package/package.json +2 -1
- package/setup/status.schema.json +39 -0
- package/socket.yml +29 -0
package/dist/config.js
CHANGED
|
@@ -227,3 +227,112 @@ export function loadPersistenceConfig(logger = noopLogger) {
|
|
|
227
227
|
sources,
|
|
228
228
|
};
|
|
229
229
|
}
|
|
230
|
+
//──────────────────────────────────────────────────────────────────────────────
|
|
231
|
+
// Cache-awareness configuration
|
|
232
|
+
//
|
|
233
|
+
// Reads the [cache_awareness] block from the same ~/.llm-cli-gateway/config.toml
|
|
234
|
+
// file as [persistence], but uses a SEPARATE loader and schema. Keeping the two
|
|
235
|
+
// independent means a malformed [cache_awareness] never breaks persistence
|
|
236
|
+
// loading and vice versa. No env-var overrides — purely TOML.
|
|
237
|
+
//
|
|
238
|
+
// All defaults are "off"; behavioural changes (slice 1 cache_control, slice 3
|
|
239
|
+
// TTL warnings) ship dormant until operators opt in.
|
|
240
|
+
//──────────────────────────────────────────────────────────────────────────────
|
|
241
|
+
export const ANTHROPIC_TTL_SECONDS_VALUES = [300, 3600];
|
|
242
|
+
/**
|
|
243
|
+
* Per-Anthropic-model-family minimum cacheable tokens. Sourced from
|
|
244
|
+
* docs/personal-mcp/PROVIDER_CACHE_SURFACES.md (Anthropic API docs as of
|
|
245
|
+
* 2026-05-26). Models below the threshold cannot be cached even with
|
|
246
|
+
* cache_control set — Anthropic silently returns un-cached.
|
|
247
|
+
*/
|
|
248
|
+
export const DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL = {
|
|
249
|
+
sonnet: 1024,
|
|
250
|
+
opus: 4096,
|
|
251
|
+
haiku: 4096,
|
|
252
|
+
default: 4096,
|
|
253
|
+
};
|
|
254
|
+
const MinStableTokensSchema = z
|
|
255
|
+
.object({
|
|
256
|
+
sonnet: z.number().int().positive().default(DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL.sonnet),
|
|
257
|
+
opus: z.number().int().positive().default(DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL.opus),
|
|
258
|
+
haiku: z.number().int().positive().default(DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL.haiku),
|
|
259
|
+
default: z
|
|
260
|
+
.number()
|
|
261
|
+
.int()
|
|
262
|
+
.positive()
|
|
263
|
+
.default(DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL.default),
|
|
264
|
+
})
|
|
265
|
+
.strict()
|
|
266
|
+
.default({
|
|
267
|
+
sonnet: DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL.sonnet,
|
|
268
|
+
opus: DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL.opus,
|
|
269
|
+
haiku: DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL.haiku,
|
|
270
|
+
default: DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL.default,
|
|
271
|
+
});
|
|
272
|
+
const CacheAwarenessSchema = z
|
|
273
|
+
.object({
|
|
274
|
+
emit_anthropic_cache_control: z.boolean().default(false),
|
|
275
|
+
anthropic_ttl_seconds: z.union([z.literal(300), z.literal(3600)]).default(300),
|
|
276
|
+
warn_on_ttl_expiry: z.boolean().default(false),
|
|
277
|
+
min_stable_tokens_for_cache_control: MinStableTokensSchema,
|
|
278
|
+
})
|
|
279
|
+
.strict();
|
|
280
|
+
function readCacheAwarenessFile(configPath, logger) {
|
|
281
|
+
if (!existsSync(configPath)) {
|
|
282
|
+
return { raw: undefined, sourcePath: null };
|
|
283
|
+
}
|
|
284
|
+
try {
|
|
285
|
+
const require = createRequire(import.meta.url);
|
|
286
|
+
const TOML = require("smol-toml");
|
|
287
|
+
const text = readFileSync(configPath, "utf-8");
|
|
288
|
+
const parsed = TOML.parse(text);
|
|
289
|
+
return { raw: parsed?.cache_awareness, sourcePath: configPath };
|
|
290
|
+
}
|
|
291
|
+
catch (err) {
|
|
292
|
+
logger.error(`Failed to parse gateway config at ${configPath}; using cache_awareness defaults`, err);
|
|
293
|
+
return { raw: undefined, sourcePath: null };
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
/**
|
|
297
|
+
* Load [cache_awareness] from ~/.llm-cli-gateway/config.toml. Defaults: all
|
|
298
|
+
* behaviour off, per-model min-token thresholds from PROVIDER_CACHE_SURFACES.md.
|
|
299
|
+
*/
|
|
300
|
+
export function loadCacheAwarenessConfig(logger = noopLogger) {
|
|
301
|
+
const configPath = defaultPersistenceConfigPath();
|
|
302
|
+
const { raw, sourcePath } = readCacheAwarenessFile(configPath, logger);
|
|
303
|
+
let parsed;
|
|
304
|
+
try {
|
|
305
|
+
parsed = CacheAwarenessSchema.parse(raw ?? {});
|
|
306
|
+
}
|
|
307
|
+
catch (err) {
|
|
308
|
+
throw new Error(`Invalid [cache_awareness] config: ${err instanceof Error ? err.message : String(err)}`);
|
|
309
|
+
}
|
|
310
|
+
return {
|
|
311
|
+
emitAnthropicCacheControl: parsed.emit_anthropic_cache_control,
|
|
312
|
+
anthropicTtlSeconds: parsed.anthropic_ttl_seconds,
|
|
313
|
+
warnOnTtlExpiry: parsed.warn_on_ttl_expiry,
|
|
314
|
+
minStableTokensForCacheControl: {
|
|
315
|
+
sonnet: parsed.min_stable_tokens_for_cache_control.sonnet,
|
|
316
|
+
opus: parsed.min_stable_tokens_for_cache_control.opus,
|
|
317
|
+
haiku: parsed.min_stable_tokens_for_cache_control.haiku,
|
|
318
|
+
default: parsed.min_stable_tokens_for_cache_control.default,
|
|
319
|
+
},
|
|
320
|
+
sources: { configFile: sourcePath },
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
/**
|
|
324
|
+
* Look up the per-model-family threshold. `modelName` is the user-facing model
|
|
325
|
+
* string (e.g. "claude-sonnet-4-6", "claude-opus-4-7"). Falls back to `default`
|
|
326
|
+
* when the family is unrecognised.
|
|
327
|
+
*/
|
|
328
|
+
export function minStableTokensForModel(config, modelName) {
|
|
329
|
+
const lower = modelName.toLowerCase();
|
|
330
|
+
const table = config.minStableTokensForCacheControl;
|
|
331
|
+
if (lower.includes("sonnet"))
|
|
332
|
+
return table.sonnet;
|
|
333
|
+
if (lower.includes("opus"))
|
|
334
|
+
return table.opus;
|
|
335
|
+
if (lower.includes("haiku"))
|
|
336
|
+
return table.haiku;
|
|
337
|
+
return table.default;
|
|
338
|
+
}
|
package/dist/doctor.d.ts
CHANGED
|
@@ -1,5 +1,31 @@
|
|
|
1
1
|
import { type EndpointExposureReport } from "./endpoint-exposure.js";
|
|
2
2
|
import { type ProviderLoginStatus } from "./provider-status.js";
|
|
3
|
+
import type { FlightRecorderQuery } from "./flight-recorder.js";
|
|
4
|
+
import { type CacheAwarenessConfig } from "./config.js";
|
|
5
|
+
export type CliType = "claude" | "codex" | "gemini" | "grok" | "mistral";
|
|
6
|
+
/**
|
|
7
|
+
* Slice 3 cross-cutting: doctor report block summarising the gateway's
|
|
8
|
+
* cache-awareness posture. Always PRESENT in the report (zeroed when the
|
|
9
|
+
* flight recorder has no rows for the last 24h).
|
|
10
|
+
*
|
|
11
|
+
* `enabled_features` is an empty array (NOT omitted) when all flags are
|
|
12
|
+
* off so callers can distinguish "configured but dormant" from
|
|
13
|
+
* "cache_awareness block missing".
|
|
14
|
+
*/
|
|
15
|
+
export interface CacheAwarenessReport {
|
|
16
|
+
enabled_features: Array<"anthropic_cache_control" | "ttl_warnings">;
|
|
17
|
+
last_24h: {
|
|
18
|
+
hit_rate: number;
|
|
19
|
+
total_hits: number;
|
|
20
|
+
total_requests: number;
|
|
21
|
+
estimated_savings_usd: number;
|
|
22
|
+
};
|
|
23
|
+
per_cli: Partial<Record<CliType, {
|
|
24
|
+
hit_rate: number;
|
|
25
|
+
total_hits: number;
|
|
26
|
+
total_cache_read_tokens: number;
|
|
27
|
+
}>>;
|
|
28
|
+
}
|
|
3
29
|
export interface VibeSessionLoggingStatus {
|
|
4
30
|
config_path: string;
|
|
5
31
|
config_present: boolean;
|
|
@@ -105,7 +131,22 @@ export interface DoctorReport {
|
|
|
105
131
|
gemini_config: GeminiConfigStatus;
|
|
106
132
|
vibe_session_logging: VibeSessionLoggingStatus;
|
|
107
133
|
};
|
|
134
|
+
cache_awareness: CacheAwarenessReport;
|
|
108
135
|
next_actions: string[];
|
|
109
136
|
}
|
|
110
|
-
export
|
|
137
|
+
export interface CreateDoctorReportOptions {
|
|
138
|
+
env?: NodeJS.ProcessEnv;
|
|
139
|
+
/**
|
|
140
|
+
* Optional read access to the flight recorder. Drives the
|
|
141
|
+
* cache_awareness.last_24h and per_cli aggregates. When absent, those
|
|
142
|
+
* blocks report zeroed aggregates (still PRESENT in the report).
|
|
143
|
+
*/
|
|
144
|
+
flightRecorder?: FlightRecorderQuery;
|
|
145
|
+
/**
|
|
146
|
+
* Optional CacheAwarenessConfig. Drives `enabled_features`. When
|
|
147
|
+
* absent, `enabled_features` is empty (all behaviour considered off).
|
|
148
|
+
*/
|
|
149
|
+
cacheAwareness?: CacheAwarenessConfig;
|
|
150
|
+
}
|
|
151
|
+
export declare function createDoctorReport(envOrOptions?: NodeJS.ProcessEnv | CreateDoctorReportOptions): DoctorReport;
|
|
111
152
|
export declare function printDoctorJson(): void;
|
package/dist/doctor.js
CHANGED
|
@@ -6,6 +6,9 @@ import { loadAuthConfig } from "./auth.js";
|
|
|
6
6
|
import { createEndpointExposureReport, redactDiagnosticUrl, } from "./endpoint-exposure.js";
|
|
7
7
|
import { listProviderRuntimeStatuses, } from "./provider-status.js";
|
|
8
8
|
import { CLAUDE_MCP_SERVER_NAMES } from "./claude-mcp-config.js";
|
|
9
|
+
import { loadCacheAwarenessConfig } from "./config.js";
|
|
10
|
+
import { computeGlobalCacheStats } from "./cache-stats.js";
|
|
11
|
+
import { FlightRecorder, resolveFlightRecorderDbPath } from "./flight-recorder.js";
|
|
9
12
|
/**
|
|
10
13
|
* Probe ~/.vibe/config.toml to see whether session_logging is enabled. Vibe
|
|
11
14
|
* persists session logs (which sessionId/--continue depends on) only when
|
|
@@ -190,7 +193,71 @@ function chatGPTConnectorUrl(env, rawPublicUrl) {
|
|
|
190
193
|
return null;
|
|
191
194
|
}
|
|
192
195
|
}
|
|
193
|
-
|
|
196
|
+
/**
|
|
197
|
+
* Build the cache_awareness block. ALWAYS present in the report; fields
|
|
198
|
+
* are zeroed when the flight recorder is missing or empty.
|
|
199
|
+
*/
|
|
200
|
+
function buildCacheAwarenessReport(opts) {
|
|
201
|
+
const enabled = [];
|
|
202
|
+
if (opts.cacheAwareness?.emitAnthropicCacheControl) {
|
|
203
|
+
enabled.push("anthropic_cache_control");
|
|
204
|
+
}
|
|
205
|
+
if (opts.cacheAwareness?.warnOnTtlExpiry) {
|
|
206
|
+
enabled.push("ttl_warnings");
|
|
207
|
+
}
|
|
208
|
+
if (!opts.flightRecorder) {
|
|
209
|
+
return {
|
|
210
|
+
enabled_features: enabled,
|
|
211
|
+
last_24h: {
|
|
212
|
+
hit_rate: 0,
|
|
213
|
+
total_hits: 0,
|
|
214
|
+
total_requests: 0,
|
|
215
|
+
estimated_savings_usd: 0,
|
|
216
|
+
},
|
|
217
|
+
per_cli: {},
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
let stats;
|
|
221
|
+
try {
|
|
222
|
+
stats = computeGlobalCacheStats(opts.flightRecorder, { lastNHours: 24 });
|
|
223
|
+
}
|
|
224
|
+
catch {
|
|
225
|
+
return {
|
|
226
|
+
enabled_features: enabled,
|
|
227
|
+
last_24h: {
|
|
228
|
+
hit_rate: 0,
|
|
229
|
+
total_hits: 0,
|
|
230
|
+
total_requests: 0,
|
|
231
|
+
estimated_savings_usd: 0,
|
|
232
|
+
},
|
|
233
|
+
per_cli: {},
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
const perCli = {};
|
|
237
|
+
for (const entry of stats.perCli) {
|
|
238
|
+
perCli[entry.cli] = {
|
|
239
|
+
hit_rate: entry.hitRate,
|
|
240
|
+
total_hits: entry.hitCount,
|
|
241
|
+
total_cache_read_tokens: entry.totalCacheReadTokens,
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
return {
|
|
245
|
+
enabled_features: enabled,
|
|
246
|
+
last_24h: {
|
|
247
|
+
hit_rate: stats.hitRate,
|
|
248
|
+
total_hits: stats.totalHits,
|
|
249
|
+
total_requests: stats.totalRequests,
|
|
250
|
+
estimated_savings_usd: stats.estimatedSavingsUsd,
|
|
251
|
+
},
|
|
252
|
+
per_cli: perCli,
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
export function createDoctorReport(envOrOptions = process.env) {
|
|
256
|
+
// Preserve back-compat: previous signature accepted a bare `env` object.
|
|
257
|
+
const opts = isCreateDoctorReportOptions(envOrOptions)
|
|
258
|
+
? envOrOptions
|
|
259
|
+
: { env: envOrOptions };
|
|
260
|
+
const env = opts.env ?? process.env;
|
|
194
261
|
const auth = loadAuthConfig(env);
|
|
195
262
|
const transport = defaultTransport(env);
|
|
196
263
|
const rawPublicUrl = env.LLM_GATEWAY_PUBLIC_URL || null;
|
|
@@ -237,6 +304,7 @@ export function createDoctorReport(env = process.env) {
|
|
|
237
304
|
},
|
|
238
305
|
endpoint_exposure: endpointExposure,
|
|
239
306
|
client_config: clientConfigStatus(),
|
|
307
|
+
cache_awareness: buildCacheAwarenessReport(opts),
|
|
240
308
|
next_actions: [],
|
|
241
309
|
};
|
|
242
310
|
if (transport === "http" && auth.required && !auth.tokenConfigured) {
|
|
@@ -271,7 +339,58 @@ export function createDoctorReport(env = process.env) {
|
|
|
271
339
|
return report;
|
|
272
340
|
}
|
|
273
341
|
export function printDoctorJson() {
|
|
274
|
-
|
|
342
|
+
// Load cache-awareness config + open the flight recorder so the doctor
|
|
343
|
+
// command can populate cache_awareness.last_24h. Both are best-effort —
|
|
344
|
+
// failures degrade to the zeroed block (buildCacheAwarenessReport
|
|
345
|
+
// handles missing deps).
|
|
346
|
+
let cacheAwareness;
|
|
347
|
+
let flightRecorder;
|
|
348
|
+
try {
|
|
349
|
+
cacheAwareness = loadCacheAwarenessConfig();
|
|
350
|
+
}
|
|
351
|
+
catch {
|
|
352
|
+
// ignore
|
|
353
|
+
}
|
|
354
|
+
try {
|
|
355
|
+
const dbPath = resolveFlightRecorderDbPath();
|
|
356
|
+
if (dbPath)
|
|
357
|
+
flightRecorder = new FlightRecorder(dbPath);
|
|
358
|
+
}
|
|
359
|
+
catch {
|
|
360
|
+
// ignore
|
|
361
|
+
}
|
|
362
|
+
const report = createDoctorReport({
|
|
363
|
+
env: process.env,
|
|
364
|
+
cacheAwareness,
|
|
365
|
+
flightRecorder,
|
|
366
|
+
});
|
|
367
|
+
process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
|
|
368
|
+
if (flightRecorder) {
|
|
369
|
+
try {
|
|
370
|
+
flightRecorder.close();
|
|
371
|
+
}
|
|
372
|
+
catch {
|
|
373
|
+
// best effort
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
function isCreateDoctorReportOptions(value) {
|
|
378
|
+
// CreateDoctorReportOptions carries either `env` (an object) or
|
|
379
|
+
// `flightRecorder` (an object). A NodeJS.ProcessEnv is a flat
|
|
380
|
+
// Record<string, string|undefined> — even if a shell happens to export
|
|
381
|
+
// `env=production` or `flightRecorder=...`, the value at that key is a
|
|
382
|
+
// STRING, not an object, so the typeof checks here cannot collide.
|
|
383
|
+
if (value === null || typeof value !== "object")
|
|
384
|
+
return false;
|
|
385
|
+
if (Object.prototype.hasOwnProperty.call(value, "flightRecorder")) {
|
|
386
|
+
const candidate = value.flightRecorder;
|
|
387
|
+
return candidate === undefined || typeof candidate === "object";
|
|
388
|
+
}
|
|
389
|
+
if (Object.prototype.hasOwnProperty.call(value, "env")) {
|
|
390
|
+
const candidate = value.env;
|
|
391
|
+
return candidate === undefined || typeof candidate === "object";
|
|
392
|
+
}
|
|
393
|
+
return false;
|
|
275
394
|
}
|
|
276
395
|
function doctorProviderStatus(provider) {
|
|
277
396
|
return {
|
|
@@ -6,6 +6,8 @@ export interface FlightLogStart {
|
|
|
6
6
|
system?: string;
|
|
7
7
|
sessionId?: string;
|
|
8
8
|
asyncJobId?: string;
|
|
9
|
+
stablePrefixHash?: string;
|
|
10
|
+
stablePrefixTokens?: number;
|
|
9
11
|
}
|
|
10
12
|
export interface FlightLogResult {
|
|
11
13
|
response: string;
|
|
@@ -36,15 +38,40 @@ export declare class FlightRecorder {
|
|
|
36
38
|
constructor(dbPath: string);
|
|
37
39
|
logStart(entry: FlightLogStart): void;
|
|
38
40
|
logComplete(correlationId: string, result: FlightLogResult): void;
|
|
41
|
+
/**
|
|
42
|
+
* Read-only query over the requests + gateway_metadata tables. Used by
|
|
43
|
+
* cache-stats / MCP resources / doctor without exposing a second SQLite
|
|
44
|
+
* connection. better-sqlite3 in WAL mode handles concurrent readers
|
|
45
|
+
* inside a single process safely.
|
|
46
|
+
*
|
|
47
|
+
* Safety:
|
|
48
|
+
* - Caller MUST pass parameterised SQL — direct string interpolation of
|
|
49
|
+
* untrusted values is unsafe.
|
|
50
|
+
* - The compiled statement's `.readonly` flag is checked at runtime;
|
|
51
|
+
* anything that can mutate rows (INSERT/UPDATE/DELETE, including the
|
|
52
|
+
* `RETURNING` forms that better-sqlite3 surfaces via `.all()`) throws.
|
|
53
|
+
* This blocks the writer-disguised-as-reader vector codex-r1/F3
|
|
54
|
+
* flagged, even when the caller is internal gateway code.
|
|
55
|
+
*/
|
|
56
|
+
queryRequests<T = Record<string, unknown>>(sql: string, ...params: unknown[]): T[];
|
|
39
57
|
flush(): void;
|
|
40
58
|
close(): void;
|
|
41
59
|
}
|
|
42
60
|
export declare class NoopFlightRecorder {
|
|
43
61
|
logStart(_entry: FlightLogStart): void;
|
|
44
62
|
logComplete(_correlationId: string, _result: FlightLogResult): void;
|
|
63
|
+
queryRequests<T = Record<string, unknown>>(_sql: string, ..._params: unknown[]): T[];
|
|
45
64
|
flush(): void;
|
|
46
65
|
close(): void;
|
|
47
66
|
}
|
|
48
67
|
export type FlightRecorderLike = FlightRecorder | NoopFlightRecorder;
|
|
68
|
+
/**
|
|
69
|
+
* Read-only subset of FlightRecorder used by cache-stats / MCP resources /
|
|
70
|
+
* doctor. Accepts either FlightRecorder or NoopFlightRecorder; the noop
|
|
71
|
+
* returns `[]` from every query so downstream aggregation is empty by design.
|
|
72
|
+
*/
|
|
73
|
+
export interface FlightRecorderQuery {
|
|
74
|
+
queryRequests<T = Record<string, unknown>>(sql: string, ...params: unknown[]): T[];
|
|
75
|
+
}
|
|
49
76
|
export declare function createFlightRecorder(logger: LoggerLike): FlightRecorderLike;
|
|
50
77
|
export {};
|
package/dist/flight-recorder.js
CHANGED
|
@@ -1,3 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Flight recorder: SQLite-backed request log.
|
|
3
|
+
*
|
|
4
|
+
* Read access for cache-stats / MCP resources / doctor goes through the
|
|
5
|
+
* `queryRequests<T>(sql, ...params)` method exposed on both `FlightRecorder`
|
|
6
|
+
* and `NoopFlightRecorder` (the `FlightRecorderQuery` interface, see bottom
|
|
7
|
+
* of file). This is Option A from
|
|
8
|
+
* docs/plans/cache-awareness.dag.toml#expose-flight-recorder-read-access —
|
|
9
|
+
* a single read-only query surface on the existing class, NOT a sibling
|
|
10
|
+
* read-only SQLite connection. better-sqlite3 in WAL mode handles
|
|
11
|
+
* concurrent readers inside a single process safely, so the additional
|
|
12
|
+
* connection isn't needed and would have to be threaded through
|
|
13
|
+
* GatewayServerRuntime as a separate field.
|
|
14
|
+
*
|
|
15
|
+
* Callers MUST pass parameterised SQL — string-interpolation of untrusted
|
|
16
|
+
* values is unsafe even on a "read-only" query.
|
|
17
|
+
*/
|
|
1
18
|
import { chmodSync, existsSync, mkdirSync } from "fs";
|
|
2
19
|
import os from "os";
|
|
3
20
|
import path from "path";
|
|
@@ -18,6 +35,26 @@ function ensureRequestsCacheColumns(db) {
|
|
|
18
35
|
db.exec("ALTER TABLE requests ADD COLUMN cache_creation_tokens INTEGER");
|
|
19
36
|
}
|
|
20
37
|
}
|
|
38
|
+
/**
|
|
39
|
+
* Idempotent v3 migration: add `stable_prefix_hash` / `stable_prefix_tokens`
|
|
40
|
+
* columns plus their index. Populated only for new rows that carry a
|
|
41
|
+
* promptParts structure (slice 1); legacy rows keep NULL forever.
|
|
42
|
+
*
|
|
43
|
+
* Read access for cache-stats / MCP resources / doctor goes through the
|
|
44
|
+
* read-only `queryRequests()` method on FlightRecorder (no separate read
|
|
45
|
+
* connection — better-sqlite3 in WAL mode handles concurrent readers).
|
|
46
|
+
*/
|
|
47
|
+
function ensureStablePrefixColumns(db) {
|
|
48
|
+
const rows = db.prepare("PRAGMA table_info(requests)").all?.() ?? [];
|
|
49
|
+
const names = new Set(rows.map((row) => (row && typeof row.name === "string" ? row.name : "")));
|
|
50
|
+
if (!names.has("stable_prefix_hash")) {
|
|
51
|
+
db.exec("ALTER TABLE requests ADD COLUMN stable_prefix_hash TEXT");
|
|
52
|
+
}
|
|
53
|
+
if (!names.has("stable_prefix_tokens")) {
|
|
54
|
+
db.exec("ALTER TABLE requests ADD COLUMN stable_prefix_tokens INTEGER");
|
|
55
|
+
}
|
|
56
|
+
db.exec("CREATE INDEX IF NOT EXISTS idx_requests_stable_hash ON requests(stable_prefix_hash)");
|
|
57
|
+
}
|
|
21
58
|
export function resolveFlightRecorderDbPath() {
|
|
22
59
|
const configured = process.env.LLM_GATEWAY_LOGS_DB;
|
|
23
60
|
if (configured !== undefined) {
|
|
@@ -131,6 +168,14 @@ export class FlightRecorder {
|
|
|
131
168
|
this.db
|
|
132
169
|
.prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(2, ?)")
|
|
133
170
|
.run(new Date().toISOString());
|
|
171
|
+
// Migration v3: stable_prefix_hash / stable_prefix_tokens columns plus
|
|
172
|
+
// their index. Populated only for new rows whose request carried a
|
|
173
|
+
// promptParts structure (slice 1 of cache-awareness); legacy rows keep
|
|
174
|
+
// NULL intentionally.
|
|
175
|
+
ensureStablePrefixColumns(this.db);
|
|
176
|
+
this.db
|
|
177
|
+
.prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(3, ?)")
|
|
178
|
+
.run(new Date().toISOString());
|
|
134
179
|
if (process.platform !== "win32") {
|
|
135
180
|
try {
|
|
136
181
|
chmodSync(dbPath, 0o600);
|
|
@@ -140,8 +185,10 @@ export class FlightRecorder {
|
|
|
140
185
|
}
|
|
141
186
|
}
|
|
142
187
|
const insertRequest = this.db.prepare(`
|
|
143
|
-
INSERT INTO requests (id, cli, model, prompt, system, session_id, datetime_utc
|
|
144
|
-
|
|
188
|
+
INSERT INTO requests (id, cli, model, prompt, system, session_id, datetime_utc,
|
|
189
|
+
stable_prefix_hash, stable_prefix_tokens)
|
|
190
|
+
VALUES (@id, @cli, @model, @prompt, @system, @session_id, @datetime_utc,
|
|
191
|
+
@stable_prefix_hash, @stable_prefix_tokens)
|
|
145
192
|
`);
|
|
146
193
|
const insertMetadata = this.db.prepare(`
|
|
147
194
|
INSERT INTO gateway_metadata (request_id, async_job_id, status)
|
|
@@ -156,6 +203,8 @@ export class FlightRecorder {
|
|
|
156
203
|
system: entry.system || null,
|
|
157
204
|
session_id: entry.sessionId || null,
|
|
158
205
|
datetime_utc: new Date().toISOString(),
|
|
206
|
+
stable_prefix_hash: entry.stablePrefixHash ?? null,
|
|
207
|
+
stable_prefix_tokens: entry.stablePrefixTokens ?? null,
|
|
159
208
|
});
|
|
160
209
|
insertMetadata.run({
|
|
161
210
|
request_id: entry.correlationId,
|
|
@@ -218,6 +267,31 @@ export class FlightRecorder {
|
|
|
218
267
|
logComplete(correlationId, result) {
|
|
219
268
|
this.updateCompleteTxn(correlationId, result);
|
|
220
269
|
}
|
|
270
|
+
/**
|
|
271
|
+
* Read-only query over the requests + gateway_metadata tables. Used by
|
|
272
|
+
* cache-stats / MCP resources / doctor without exposing a second SQLite
|
|
273
|
+
* connection. better-sqlite3 in WAL mode handles concurrent readers
|
|
274
|
+
* inside a single process safely.
|
|
275
|
+
*
|
|
276
|
+
* Safety:
|
|
277
|
+
* - Caller MUST pass parameterised SQL — direct string interpolation of
|
|
278
|
+
* untrusted values is unsafe.
|
|
279
|
+
* - The compiled statement's `.readonly` flag is checked at runtime;
|
|
280
|
+
* anything that can mutate rows (INSERT/UPDATE/DELETE, including the
|
|
281
|
+
* `RETURNING` forms that better-sqlite3 surfaces via `.all()`) throws.
|
|
282
|
+
* This blocks the writer-disguised-as-reader vector codex-r1/F3
|
|
283
|
+
* flagged, even when the caller is internal gateway code.
|
|
284
|
+
*/
|
|
285
|
+
queryRequests(sql, ...params) {
|
|
286
|
+
const stmt = this.db.prepare(sql);
|
|
287
|
+
if (stmt.readonly === false) {
|
|
288
|
+
throw new Error("FlightRecorder.queryRequests refuses non-readonly SQL — use a transaction or a separate write surface for INSERT/UPDATE/DELETE.");
|
|
289
|
+
}
|
|
290
|
+
if (!stmt.all) {
|
|
291
|
+
return [];
|
|
292
|
+
}
|
|
293
|
+
return stmt.all(...params);
|
|
294
|
+
}
|
|
221
295
|
flush() {
|
|
222
296
|
// No-op: better-sqlite3 writes synchronously.
|
|
223
297
|
}
|
|
@@ -228,6 +302,9 @@ export class FlightRecorder {
|
|
|
228
302
|
export class NoopFlightRecorder {
|
|
229
303
|
logStart(_entry) { }
|
|
230
304
|
logComplete(_correlationId, _result) { }
|
|
305
|
+
queryRequests(_sql, ..._params) {
|
|
306
|
+
return [];
|
|
307
|
+
}
|
|
231
308
|
flush() { }
|
|
232
309
|
close() { }
|
|
233
310
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -4,7 +4,7 @@ import { z } from "zod";
|
|
|
4
4
|
import { ISessionManager } from "./session-manager.js";
|
|
5
5
|
import { ResourceProvider } from "./resources.js";
|
|
6
6
|
import { PerformanceMetrics } from "./metrics.js";
|
|
7
|
-
import { type PersistenceConfig } from "./config.js";
|
|
7
|
+
import { type PersistenceConfig, type CacheAwarenessConfig } from "./config.js";
|
|
8
8
|
import { DatabaseConnection } from "./db.js";
|
|
9
9
|
import { AsyncJobManager } from "./async-job-manager.js";
|
|
10
10
|
import { ApprovalManager, ApprovalRecord } from "./approval-manager.js";
|
|
@@ -12,6 +12,22 @@ import { ReviewIntegrityResult } from "./review-integrity.js";
|
|
|
12
12
|
import { ClaudeMcpConfigResult, ClaudeMcpServerName } from "./claude-mcp-config.js";
|
|
13
13
|
import { type MistralAgentMode, type ClaudePermissionMode, type CodexSandboxMode, type CodexAskForApproval, type ClaudeEffortLevel } from "./request-helpers.js";
|
|
14
14
|
import { FlightRecorderLike } from "./flight-recorder.js";
|
|
15
|
+
import { type PromptParts } from "./prompt-parts.js";
|
|
16
|
+
/**
|
|
17
|
+
* Slice 3: structured warning entries attached to tool responses.
|
|
18
|
+
* Distinct from review-integrity warnings (which are text-appended to
|
|
19
|
+
* the user-visible response). These are programmatic signals for caller
|
|
20
|
+
* agents to react to.
|
|
21
|
+
*/
|
|
22
|
+
export interface WarningEntry {
|
|
23
|
+
/** Stable machine-readable code, e.g. "cache_ttl_expiring_soon". */
|
|
24
|
+
code: string;
|
|
25
|
+
/** Optional human-readable message for surfaces that render text. */
|
|
26
|
+
message?: string;
|
|
27
|
+
/** Code-specific payload — left open for future warning types. */
|
|
28
|
+
ttlRemainingMs?: number;
|
|
29
|
+
[key: string]: unknown;
|
|
30
|
+
}
|
|
15
31
|
type ExtendedToolResponse = {
|
|
16
32
|
content: {
|
|
17
33
|
type: "text";
|
|
@@ -28,6 +44,8 @@ type ExtendedToolResponse = {
|
|
|
28
44
|
missing?: ClaudeMcpServerName[];
|
|
29
45
|
};
|
|
30
46
|
reviewIntegrity?: ReviewIntegrityResult;
|
|
47
|
+
/** Slice 3: structured warnings (e.g. cache_ttl_expiring_soon). */
|
|
48
|
+
warnings?: WarningEntry[];
|
|
31
49
|
};
|
|
32
50
|
declare const logger: {
|
|
33
51
|
info: (message: string, ...args: any[]) => void;
|
|
@@ -49,6 +67,7 @@ export interface GatewayServerDeps {
|
|
|
49
67
|
flightRecorder?: FlightRecorderLike;
|
|
50
68
|
logger?: GatewayLogger;
|
|
51
69
|
persistence?: PersistenceConfig;
|
|
70
|
+
cacheAwareness?: CacheAwarenessConfig;
|
|
52
71
|
}
|
|
53
72
|
interface GatewayServerRuntime {
|
|
54
73
|
sessionManager: ISessionManager;
|
|
@@ -60,6 +79,7 @@ interface GatewayServerRuntime {
|
|
|
60
79
|
flightRecorder: FlightRecorderLike;
|
|
61
80
|
logger: GatewayLogger;
|
|
62
81
|
persistence: PersistenceConfig;
|
|
82
|
+
cacheAwareness: CacheAwarenessConfig;
|
|
63
83
|
}
|
|
64
84
|
interface CliRequestPrep {
|
|
65
85
|
corrId: string;
|
|
@@ -70,9 +90,19 @@ interface CliRequestPrep {
|
|
|
70
90
|
approvalDecision: ApprovalRecord | null;
|
|
71
91
|
reviewIntegrity?: ReviewIntegrityResult;
|
|
72
92
|
args: string[];
|
|
93
|
+
/**
|
|
94
|
+
* Sha256 of the assembled prompt's stable prefix bytes when the caller
|
|
95
|
+
* supplied `promptParts`. Null when the legacy `prompt` field was used.
|
|
96
|
+
* Populated by `resolvePromptOrPartsForPrep` and threaded into the
|
|
97
|
+
* flight-recorder row by the caller's safeFlightStart entry.
|
|
98
|
+
*/
|
|
99
|
+
stablePrefixHash: string | null;
|
|
100
|
+
/** Heuristic token count (bytes/4) of the same stable prefix. */
|
|
101
|
+
stablePrefixTokens: number | null;
|
|
73
102
|
}
|
|
74
103
|
export declare function prepareClaudeRequest(params: {
|
|
75
|
-
prompt
|
|
104
|
+
prompt?: string;
|
|
105
|
+
promptParts?: PromptParts;
|
|
76
106
|
model?: string;
|
|
77
107
|
outputFormat: "text" | "json" | "stream-json";
|
|
78
108
|
allowedTools?: string[];
|
|
@@ -106,7 +136,8 @@ export interface CodexRequestPrep extends CliRequestPrep {
|
|
|
106
136
|
cleanup?: () => void;
|
|
107
137
|
}
|
|
108
138
|
export declare function prepareCodexRequest(params: {
|
|
109
|
-
prompt
|
|
139
|
+
prompt?: string;
|
|
140
|
+
promptParts?: PromptParts;
|
|
110
141
|
model?: string;
|
|
111
142
|
fullAuto: boolean;
|
|
112
143
|
sandboxMode?: CodexSandboxMode;
|
|
@@ -138,7 +169,8 @@ export declare function prepareCodexRequest(params: {
|
|
|
138
169
|
ignoreRules?: boolean;
|
|
139
170
|
}, runtime?: GatewayServerRuntime): CodexRequestPrep | ExtendedToolResponse;
|
|
140
171
|
export declare function prepareGeminiRequest(params: {
|
|
141
|
-
prompt
|
|
172
|
+
prompt?: string;
|
|
173
|
+
promptParts?: PromptParts;
|
|
142
174
|
model?: string;
|
|
143
175
|
approvalMode?: string;
|
|
144
176
|
approvalStrategy: "legacy" | "mcp_managed";
|
|
@@ -161,7 +193,8 @@ export declare function prepareGeminiRequest(params: {
|
|
|
161
193
|
attachments?: string[];
|
|
162
194
|
}, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
|
|
163
195
|
export declare function prepareMistralRequest(params: {
|
|
164
|
-
prompt
|
|
196
|
+
prompt?: string;
|
|
197
|
+
promptParts?: PromptParts;
|
|
165
198
|
model?: string;
|
|
166
199
|
outputFormat?: string;
|
|
167
200
|
permissionMode?: MistralAgentMode;
|
|
@@ -179,7 +212,8 @@ export declare function prepareMistralRequest(params: {
|
|
|
179
212
|
mistralEnv: Record<string, string>;
|
|
180
213
|
}) | ExtendedToolResponse;
|
|
181
214
|
export interface GeminiRequestParams {
|
|
182
|
-
prompt
|
|
215
|
+
prompt?: string;
|
|
216
|
+
promptParts?: PromptParts;
|
|
183
217
|
model?: string;
|
|
184
218
|
sessionId?: string;
|
|
185
219
|
resumeLatest: boolean;
|
|
@@ -218,7 +252,8 @@ export interface AsyncHandlerDeps extends HandlerDeps {
|
|
|
218
252
|
export declare function handleGeminiRequest(deps: HandlerDeps, params: GeminiRequestParams): Promise<ExtendedToolResponse>;
|
|
219
253
|
export declare function handleGeminiRequestAsync(deps: AsyncHandlerDeps, params: Omit<GeminiRequestParams, "optimizeResponse">): Promise<ExtendedToolResponse>;
|
|
220
254
|
export interface GrokRequestParams {
|
|
221
|
-
prompt
|
|
255
|
+
prompt?: string;
|
|
256
|
+
promptParts?: PromptParts;
|
|
222
257
|
model?: string;
|
|
223
258
|
outputFormat?: string;
|
|
224
259
|
sessionId?: string;
|
|
@@ -242,7 +277,8 @@ export interface GrokRequestParams {
|
|
|
242
277
|
export declare function handleGrokRequest(deps: HandlerDeps, params: GrokRequestParams): Promise<ExtendedToolResponse>;
|
|
243
278
|
export declare function handleGrokRequestAsync(deps: AsyncHandlerDeps, params: Omit<GrokRequestParams, "optimizeResponse">): Promise<ExtendedToolResponse>;
|
|
244
279
|
export interface MistralRequestParams {
|
|
245
|
-
prompt
|
|
280
|
+
prompt?: string;
|
|
281
|
+
promptParts?: PromptParts;
|
|
246
282
|
model?: string;
|
|
247
283
|
outputFormat?: string;
|
|
248
284
|
sessionId?: string;
|
|
@@ -265,7 +301,8 @@ export interface MistralRequestParams {
|
|
|
265
301
|
export declare function handleMistralRequest(deps: HandlerDeps, params: MistralRequestParams): Promise<ExtendedToolResponse>;
|
|
266
302
|
export declare function handleMistralRequestAsync(deps: AsyncHandlerDeps, params: Omit<MistralRequestParams, "optimizeResponse">): Promise<ExtendedToolResponse>;
|
|
267
303
|
export declare function handleCodexRequestAsync(deps: AsyncHandlerDeps, params: {
|
|
268
|
-
prompt
|
|
304
|
+
prompt?: string;
|
|
305
|
+
promptParts?: PromptParts;
|
|
269
306
|
model?: string;
|
|
270
307
|
fullAuto: boolean;
|
|
271
308
|
sandboxMode?: CodexSandboxMode;
|