llm-cli-gateway 1.5.35 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/config.js CHANGED
@@ -227,3 +227,112 @@ export function loadPersistenceConfig(logger = noopLogger) {
227
227
  sources,
228
228
  };
229
229
  }
230
+ //──────────────────────────────────────────────────────────────────────────────
231
+ // Cache-awareness configuration
232
+ //
233
+ // Reads the [cache_awareness] block from the same ~/.llm-cli-gateway/config.toml
234
+ // file as [persistence], but uses a SEPARATE loader and schema. Keeping the two
235
+ // independent means a malformed [cache_awareness] never breaks persistence
236
+ // loading and vice versa. No env-var overrides — purely TOML.
237
+ //
238
+ // All defaults are "off"; behavioural changes (slice 1 cache_control, slice 3
239
+ // TTL warnings) ship dormant until operators opt in.
240
+ //──────────────────────────────────────────────────────────────────────────────
241
+ export const ANTHROPIC_TTL_SECONDS_VALUES = [300, 3600];
242
+ /**
243
+ * Per-Anthropic-model-family minimum cacheable tokens. Sourced from
244
+ * docs/personal-mcp/PROVIDER_CACHE_SURFACES.md (Anthropic API docs as of
245
+ * 2026-05-26). Models below the threshold cannot be cached even with
246
+ * cache_control set — Anthropic silently returns un-cached.
247
+ */
248
+ export const DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL = {
249
+ sonnet: 1024,
250
+ opus: 4096,
251
+ haiku: 4096,
252
+ default: 4096,
253
+ };
254
+ const MinStableTokensSchema = z
255
+ .object({
256
+ sonnet: z.number().int().positive().default(DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL.sonnet),
257
+ opus: z.number().int().positive().default(DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL.opus),
258
+ haiku: z.number().int().positive().default(DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL.haiku),
259
+ default: z
260
+ .number()
261
+ .int()
262
+ .positive()
263
+ .default(DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL.default),
264
+ })
265
+ .strict()
266
+ .default({
267
+ sonnet: DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL.sonnet,
268
+ opus: DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL.opus,
269
+ haiku: DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL.haiku,
270
+ default: DEFAULT_MIN_STABLE_TOKENS_FOR_CACHE_CONTROL.default,
271
+ });
272
+ const CacheAwarenessSchema = z
273
+ .object({
274
+ emit_anthropic_cache_control: z.boolean().default(false),
275
+ anthropic_ttl_seconds: z.union([z.literal(300), z.literal(3600)]).default(300),
276
+ warn_on_ttl_expiry: z.boolean().default(false),
277
+ min_stable_tokens_for_cache_control: MinStableTokensSchema,
278
+ })
279
+ .strict();
280
+ function readCacheAwarenessFile(configPath, logger) {
281
+ if (!existsSync(configPath)) {
282
+ return { raw: undefined, sourcePath: null };
283
+ }
284
+ try {
285
+ const require = createRequire(import.meta.url);
286
+ const TOML = require("smol-toml");
287
+ const text = readFileSync(configPath, "utf-8");
288
+ const parsed = TOML.parse(text);
289
+ return { raw: parsed?.cache_awareness, sourcePath: configPath };
290
+ }
291
+ catch (err) {
292
+ logger.error(`Failed to parse gateway config at ${configPath}; using cache_awareness defaults`, err);
293
+ return { raw: undefined, sourcePath: null };
294
+ }
295
+ }
296
+ /**
297
+ * Load [cache_awareness] from ~/.llm-cli-gateway/config.toml. Defaults: all
298
+ * behaviour off, per-model min-token thresholds from PROVIDER_CACHE_SURFACES.md.
299
+ */
300
+ export function loadCacheAwarenessConfig(logger = noopLogger) {
301
+ const configPath = defaultPersistenceConfigPath();
302
+ const { raw, sourcePath } = readCacheAwarenessFile(configPath, logger);
303
+ let parsed;
304
+ try {
305
+ parsed = CacheAwarenessSchema.parse(raw ?? {});
306
+ }
307
+ catch (err) {
308
+ throw new Error(`Invalid [cache_awareness] config: ${err instanceof Error ? err.message : String(err)}`);
309
+ }
310
+ return {
311
+ emitAnthropicCacheControl: parsed.emit_anthropic_cache_control,
312
+ anthropicTtlSeconds: parsed.anthropic_ttl_seconds,
313
+ warnOnTtlExpiry: parsed.warn_on_ttl_expiry,
314
+ minStableTokensForCacheControl: {
315
+ sonnet: parsed.min_stable_tokens_for_cache_control.sonnet,
316
+ opus: parsed.min_stable_tokens_for_cache_control.opus,
317
+ haiku: parsed.min_stable_tokens_for_cache_control.haiku,
318
+ default: parsed.min_stable_tokens_for_cache_control.default,
319
+ },
320
+ sources: { configFile: sourcePath },
321
+ };
322
+ }
323
+ /**
324
+ * Look up the per-model-family threshold. `modelName` is the user-facing model
325
+ * string (e.g. "claude-sonnet-4-6", "claude-opus-4-7"). Falls back to `default`
326
+ * when the family is unrecognised.
327
+ */
328
+ export function minStableTokensForModel(config, modelName) {
329
+ const lower = modelName.toLowerCase();
330
+ const table = config.minStableTokensForCacheControl;
331
+ if (lower.includes("sonnet"))
332
+ return table.sonnet;
333
+ if (lower.includes("opus"))
334
+ return table.opus;
335
+ if (lower.includes("haiku"))
336
+ return table.haiku;
337
+ return table.default;
338
+ }
package/dist/doctor.d.ts CHANGED
@@ -1,5 +1,31 @@
1
1
  import { type EndpointExposureReport } from "./endpoint-exposure.js";
2
2
  import { type ProviderLoginStatus } from "./provider-status.js";
3
+ import type { FlightRecorderQuery } from "./flight-recorder.js";
4
+ import { type CacheAwarenessConfig } from "./config.js";
5
+ export type CliType = "claude" | "codex" | "gemini" | "grok" | "mistral";
6
+ /**
7
+ * Slice 3 cross-cutting: doctor report block summarising the gateway's
8
+ * cache-awareness posture. Always PRESENT in the report (zeroed when the
9
+ * flight recorder has no rows for the last 24h).
10
+ *
11
+ * `enabled_features` is an empty array (NOT omitted) when all flags are
12
+ * off so callers can distinguish "configured but dormant" from
13
+ * "cache_awareness block missing".
14
+ */
15
+ export interface CacheAwarenessReport {
16
+ enabled_features: Array<"anthropic_cache_control" | "ttl_warnings">;
17
+ last_24h: {
18
+ hit_rate: number;
19
+ total_hits: number;
20
+ total_requests: number;
21
+ estimated_savings_usd: number;
22
+ };
23
+ per_cli: Partial<Record<CliType, {
24
+ hit_rate: number;
25
+ total_hits: number;
26
+ total_cache_read_tokens: number;
27
+ }>>;
28
+ }
3
29
  export interface VibeSessionLoggingStatus {
4
30
  config_path: string;
5
31
  config_present: boolean;
@@ -105,7 +131,22 @@ export interface DoctorReport {
105
131
  gemini_config: GeminiConfigStatus;
106
132
  vibe_session_logging: VibeSessionLoggingStatus;
107
133
  };
134
+ cache_awareness: CacheAwarenessReport;
108
135
  next_actions: string[];
109
136
  }
110
- export declare function createDoctorReport(env?: NodeJS.ProcessEnv): DoctorReport;
137
+ export interface CreateDoctorReportOptions {
138
+ env?: NodeJS.ProcessEnv;
139
+ /**
140
+ * Optional read access to the flight recorder. Drives the
141
+ * cache_awareness.last_24h and per_cli aggregates. When absent, those
142
+ * blocks report zeroed aggregates (still PRESENT in the report).
143
+ */
144
+ flightRecorder?: FlightRecorderQuery;
145
+ /**
146
+ * Optional CacheAwarenessConfig. Drives `enabled_features`. When
147
+ * absent, `enabled_features` is empty (all behaviour considered off).
148
+ */
149
+ cacheAwareness?: CacheAwarenessConfig;
150
+ }
151
+ export declare function createDoctorReport(envOrOptions?: NodeJS.ProcessEnv | CreateDoctorReportOptions): DoctorReport;
111
152
  export declare function printDoctorJson(): void;
package/dist/doctor.js CHANGED
@@ -6,6 +6,9 @@ import { loadAuthConfig } from "./auth.js";
6
6
  import { createEndpointExposureReport, redactDiagnosticUrl, } from "./endpoint-exposure.js";
7
7
  import { listProviderRuntimeStatuses, } from "./provider-status.js";
8
8
  import { CLAUDE_MCP_SERVER_NAMES } from "./claude-mcp-config.js";
9
+ import { loadCacheAwarenessConfig } from "./config.js";
10
+ import { computeGlobalCacheStats } from "./cache-stats.js";
11
+ import { FlightRecorder, resolveFlightRecorderDbPath } from "./flight-recorder.js";
9
12
  /**
10
13
  * Probe ~/.vibe/config.toml to see whether session_logging is enabled. Vibe
11
14
  * persists session logs (which sessionId/--continue depends on) only when
@@ -190,7 +193,71 @@ function chatGPTConnectorUrl(env, rawPublicUrl) {
190
193
  return null;
191
194
  }
192
195
  }
193
- export function createDoctorReport(env = process.env) {
196
+ /**
197
+ * Build the cache_awareness block. ALWAYS present in the report; fields
198
+ * are zeroed when the flight recorder is missing or empty.
199
+ */
200
+ function buildCacheAwarenessReport(opts) {
201
+ const enabled = [];
202
+ if (opts.cacheAwareness?.emitAnthropicCacheControl) {
203
+ enabled.push("anthropic_cache_control");
204
+ }
205
+ if (opts.cacheAwareness?.warnOnTtlExpiry) {
206
+ enabled.push("ttl_warnings");
207
+ }
208
+ if (!opts.flightRecorder) {
209
+ return {
210
+ enabled_features: enabled,
211
+ last_24h: {
212
+ hit_rate: 0,
213
+ total_hits: 0,
214
+ total_requests: 0,
215
+ estimated_savings_usd: 0,
216
+ },
217
+ per_cli: {},
218
+ };
219
+ }
220
+ let stats;
221
+ try {
222
+ stats = computeGlobalCacheStats(opts.flightRecorder, { lastNHours: 24 });
223
+ }
224
+ catch {
225
+ return {
226
+ enabled_features: enabled,
227
+ last_24h: {
228
+ hit_rate: 0,
229
+ total_hits: 0,
230
+ total_requests: 0,
231
+ estimated_savings_usd: 0,
232
+ },
233
+ per_cli: {},
234
+ };
235
+ }
236
+ const perCli = {};
237
+ for (const entry of stats.perCli) {
238
+ perCli[entry.cli] = {
239
+ hit_rate: entry.hitRate,
240
+ total_hits: entry.hitCount,
241
+ total_cache_read_tokens: entry.totalCacheReadTokens,
242
+ };
243
+ }
244
+ return {
245
+ enabled_features: enabled,
246
+ last_24h: {
247
+ hit_rate: stats.hitRate,
248
+ total_hits: stats.totalHits,
249
+ total_requests: stats.totalRequests,
250
+ estimated_savings_usd: stats.estimatedSavingsUsd,
251
+ },
252
+ per_cli: perCli,
253
+ };
254
+ }
255
+ export function createDoctorReport(envOrOptions = process.env) {
256
+ // Preserve back-compat: previous signature accepted a bare `env` object.
257
+ const opts = isCreateDoctorReportOptions(envOrOptions)
258
+ ? envOrOptions
259
+ : { env: envOrOptions };
260
+ const env = opts.env ?? process.env;
194
261
  const auth = loadAuthConfig(env);
195
262
  const transport = defaultTransport(env);
196
263
  const rawPublicUrl = env.LLM_GATEWAY_PUBLIC_URL || null;
@@ -237,6 +304,7 @@ export function createDoctorReport(env = process.env) {
237
304
  },
238
305
  endpoint_exposure: endpointExposure,
239
306
  client_config: clientConfigStatus(),
307
+ cache_awareness: buildCacheAwarenessReport(opts),
240
308
  next_actions: [],
241
309
  };
242
310
  if (transport === "http" && auth.required && !auth.tokenConfigured) {
@@ -271,7 +339,58 @@ export function createDoctorReport(env = process.env) {
271
339
  return report;
272
340
  }
273
341
  export function printDoctorJson() {
274
- process.stdout.write(`${JSON.stringify(createDoctorReport(), null, 2)}\n`);
342
+ // Load cache-awareness config + open the flight recorder so the doctor
343
+ // command can populate cache_awareness.last_24h. Both are best-effort —
344
+ // failures degrade to the zeroed block (buildCacheAwarenessReport
345
+ // handles missing deps).
346
+ let cacheAwareness;
347
+ let flightRecorder;
348
+ try {
349
+ cacheAwareness = loadCacheAwarenessConfig();
350
+ }
351
+ catch {
352
+ // ignore
353
+ }
354
+ try {
355
+ const dbPath = resolveFlightRecorderDbPath();
356
+ if (dbPath)
357
+ flightRecorder = new FlightRecorder(dbPath);
358
+ }
359
+ catch {
360
+ // ignore
361
+ }
362
+ const report = createDoctorReport({
363
+ env: process.env,
364
+ cacheAwareness,
365
+ flightRecorder,
366
+ });
367
+ process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
368
+ if (flightRecorder) {
369
+ try {
370
+ flightRecorder.close();
371
+ }
372
+ catch {
373
+ // best effort
374
+ }
375
+ }
376
+ }
377
+ function isCreateDoctorReportOptions(value) {
378
+ // CreateDoctorReportOptions carries either `env` (an object) or
379
+ // `flightRecorder` (an object). A NodeJS.ProcessEnv is a flat
380
+ // Record<string, string|undefined> — even if a shell happens to export
381
+ // `env=production` or `flightRecorder=...`, the value at that key is a
382
+ // STRING, not an object, so the typeof checks here cannot collide.
383
+ if (value === null || typeof value !== "object")
384
+ return false;
385
+ if (Object.prototype.hasOwnProperty.call(value, "flightRecorder")) {
386
+ const candidate = value.flightRecorder;
387
+ return candidate === undefined || typeof candidate === "object";
388
+ }
389
+ if (Object.prototype.hasOwnProperty.call(value, "env")) {
390
+ const candidate = value.env;
391
+ return candidate === undefined || typeof candidate === "object";
392
+ }
393
+ return false;
275
394
  }
276
395
  function doctorProviderStatus(provider) {
277
396
  return {
@@ -6,6 +6,8 @@ export interface FlightLogStart {
6
6
  system?: string;
7
7
  sessionId?: string;
8
8
  asyncJobId?: string;
9
+ stablePrefixHash?: string;
10
+ stablePrefixTokens?: number;
9
11
  }
10
12
  export interface FlightLogResult {
11
13
  response: string;
@@ -36,15 +38,40 @@ export declare class FlightRecorder {
36
38
  constructor(dbPath: string);
37
39
  logStart(entry: FlightLogStart): void;
38
40
  logComplete(correlationId: string, result: FlightLogResult): void;
41
+ /**
42
+ * Read-only query over the requests + gateway_metadata tables. Used by
43
+ * cache-stats / MCP resources / doctor without exposing a second SQLite
44
+ * connection. better-sqlite3 in WAL mode handles concurrent readers
45
+ * inside a single process safely.
46
+ *
47
+ * Safety:
48
+ * - Caller MUST pass parameterised SQL — direct string interpolation of
49
+ * untrusted values is unsafe.
50
+ * - The compiled statement's `.readonly` flag is checked at runtime;
51
+ * anything that can mutate rows (INSERT/UPDATE/DELETE, including the
52
+ * `RETURNING` forms that better-sqlite3 surfaces via `.all()`) throws.
53
+ * This blocks the writer-disguised-as-reader vector codex-r1/F3
54
+ * flagged, even when the caller is internal gateway code.
55
+ */
56
+ queryRequests<T = Record<string, unknown>>(sql: string, ...params: unknown[]): T[];
39
57
  flush(): void;
40
58
  close(): void;
41
59
  }
42
60
  export declare class NoopFlightRecorder {
43
61
  logStart(_entry: FlightLogStart): void;
44
62
  logComplete(_correlationId: string, _result: FlightLogResult): void;
63
+ queryRequests<T = Record<string, unknown>>(_sql: string, ..._params: unknown[]): T[];
45
64
  flush(): void;
46
65
  close(): void;
47
66
  }
48
67
  export type FlightRecorderLike = FlightRecorder | NoopFlightRecorder;
68
+ /**
69
+ * Read-only subset of FlightRecorder used by cache-stats / MCP resources /
70
+ * doctor. Accepts either FlightRecorder or NoopFlightRecorder; the noop
71
+ * returns `[]` from every query so downstream aggregation is empty by design.
72
+ */
73
+ export interface FlightRecorderQuery {
74
+ queryRequests<T = Record<string, unknown>>(sql: string, ...params: unknown[]): T[];
75
+ }
49
76
  export declare function createFlightRecorder(logger: LoggerLike): FlightRecorderLike;
50
77
  export {};
@@ -1,3 +1,20 @@
1
+ /**
2
+ * Flight recorder: SQLite-backed request log.
3
+ *
4
+ * Read access for cache-stats / MCP resources / doctor goes through the
5
+ * `queryRequests<T>(sql, ...params)` method exposed on both `FlightRecorder`
6
+ * and `NoopFlightRecorder` (the `FlightRecorderQuery` interface, see bottom
7
+ * of file). This is Option A from
8
+ * docs/plans/cache-awareness.dag.toml#expose-flight-recorder-read-access —
9
+ * a single read-only query surface on the existing class, NOT a sibling
10
+ * read-only SQLite connection. better-sqlite3 in WAL mode handles
11
+ * concurrent readers inside a single process safely, so the additional
12
+ * connection isn't needed and would have to be threaded through
13
+ * GatewayServerRuntime as a separate field.
14
+ *
15
+ * Callers MUST pass parameterised SQL — string-interpolation of untrusted
16
+ * values is unsafe even on a "read-only" query.
17
+ */
1
18
  import { chmodSync, existsSync, mkdirSync } from "fs";
2
19
  import os from "os";
3
20
  import path from "path";
@@ -18,6 +35,26 @@ function ensureRequestsCacheColumns(db) {
18
35
  db.exec("ALTER TABLE requests ADD COLUMN cache_creation_tokens INTEGER");
19
36
  }
20
37
  }
38
+ /**
39
+ * Idempotent v3 migration: add `stable_prefix_hash` / `stable_prefix_tokens`
40
+ * columns plus their index. Populated only for new rows that carry a
41
+ * promptParts structure (slice 1); legacy rows keep NULL forever.
42
+ *
43
+ * Read access for cache-stats / MCP resources / doctor goes through the
44
+ * read-only `queryRequests()` method on FlightRecorder (no separate read
45
+ * connection — better-sqlite3 in WAL mode handles concurrent readers).
46
+ */
47
+ function ensureStablePrefixColumns(db) {
48
+ const rows = db.prepare("PRAGMA table_info(requests)").all?.() ?? [];
49
+ const names = new Set(rows.map((row) => (row && typeof row.name === "string" ? row.name : "")));
50
+ if (!names.has("stable_prefix_hash")) {
51
+ db.exec("ALTER TABLE requests ADD COLUMN stable_prefix_hash TEXT");
52
+ }
53
+ if (!names.has("stable_prefix_tokens")) {
54
+ db.exec("ALTER TABLE requests ADD COLUMN stable_prefix_tokens INTEGER");
55
+ }
56
+ db.exec("CREATE INDEX IF NOT EXISTS idx_requests_stable_hash ON requests(stable_prefix_hash)");
57
+ }
21
58
  export function resolveFlightRecorderDbPath() {
22
59
  const configured = process.env.LLM_GATEWAY_LOGS_DB;
23
60
  if (configured !== undefined) {
@@ -131,6 +168,14 @@ export class FlightRecorder {
131
168
  this.db
132
169
  .prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(2, ?)")
133
170
  .run(new Date().toISOString());
171
+ // Migration v3: stable_prefix_hash / stable_prefix_tokens columns plus
172
+ // their index. Populated only for new rows whose request carried a
173
+ // promptParts structure (slice 1 of cache-awareness); legacy rows keep
174
+ // NULL intentionally.
175
+ ensureStablePrefixColumns(this.db);
176
+ this.db
177
+ .prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(3, ?)")
178
+ .run(new Date().toISOString());
134
179
  if (process.platform !== "win32") {
135
180
  try {
136
181
  chmodSync(dbPath, 0o600);
@@ -140,8 +185,10 @@ export class FlightRecorder {
140
185
  }
141
186
  }
142
187
  const insertRequest = this.db.prepare(`
143
- INSERT INTO requests (id, cli, model, prompt, system, session_id, datetime_utc)
144
- VALUES (@id, @cli, @model, @prompt, @system, @session_id, @datetime_utc)
188
+ INSERT INTO requests (id, cli, model, prompt, system, session_id, datetime_utc,
189
+ stable_prefix_hash, stable_prefix_tokens)
190
+ VALUES (@id, @cli, @model, @prompt, @system, @session_id, @datetime_utc,
191
+ @stable_prefix_hash, @stable_prefix_tokens)
145
192
  `);
146
193
  const insertMetadata = this.db.prepare(`
147
194
  INSERT INTO gateway_metadata (request_id, async_job_id, status)
@@ -156,6 +203,8 @@ export class FlightRecorder {
156
203
  system: entry.system || null,
157
204
  session_id: entry.sessionId || null,
158
205
  datetime_utc: new Date().toISOString(),
206
+ stable_prefix_hash: entry.stablePrefixHash ?? null,
207
+ stable_prefix_tokens: entry.stablePrefixTokens ?? null,
159
208
  });
160
209
  insertMetadata.run({
161
210
  request_id: entry.correlationId,
@@ -218,6 +267,31 @@ export class FlightRecorder {
218
267
  logComplete(correlationId, result) {
219
268
  this.updateCompleteTxn(correlationId, result);
220
269
  }
270
+ /**
271
+ * Read-only query over the requests + gateway_metadata tables. Used by
272
+ * cache-stats / MCP resources / doctor without exposing a second SQLite
273
+ * connection. better-sqlite3 in WAL mode handles concurrent readers
274
+ * inside a single process safely.
275
+ *
276
+ * Safety:
277
+ * - Caller MUST pass parameterised SQL — direct string interpolation of
278
+ * untrusted values is unsafe.
279
+ * - The compiled statement's `.readonly` flag is checked at runtime;
280
+ * anything that can mutate rows (INSERT/UPDATE/DELETE, including the
281
+ * `RETURNING` forms that better-sqlite3 surfaces via `.all()`) throws.
282
+ * This blocks the writer-disguised-as-reader vector codex-r1/F3
283
+ * flagged, even when the caller is internal gateway code.
284
+ */
285
+ queryRequests(sql, ...params) {
286
+ const stmt = this.db.prepare(sql);
287
+ if (stmt.readonly === false) {
288
+ throw new Error("FlightRecorder.queryRequests refuses non-readonly SQL — use a transaction or a separate write surface for INSERT/UPDATE/DELETE.");
289
+ }
290
+ if (!stmt.all) {
291
+ return [];
292
+ }
293
+ return stmt.all(...params);
294
+ }
221
295
  flush() {
222
296
  // No-op: better-sqlite3 writes synchronously.
223
297
  }
@@ -228,6 +302,9 @@ export class FlightRecorder {
228
302
  export class NoopFlightRecorder {
229
303
  logStart(_entry) { }
230
304
  logComplete(_correlationId, _result) { }
305
+ queryRequests(_sql, ..._params) {
306
+ return [];
307
+ }
231
308
  flush() { }
232
309
  close() { }
233
310
  }
package/dist/index.d.ts CHANGED
@@ -4,7 +4,7 @@ import { z } from "zod";
4
4
  import { ISessionManager } from "./session-manager.js";
5
5
  import { ResourceProvider } from "./resources.js";
6
6
  import { PerformanceMetrics } from "./metrics.js";
7
- import { type PersistenceConfig } from "./config.js";
7
+ import { type PersistenceConfig, type CacheAwarenessConfig } from "./config.js";
8
8
  import { DatabaseConnection } from "./db.js";
9
9
  import { AsyncJobManager } from "./async-job-manager.js";
10
10
  import { ApprovalManager, ApprovalRecord } from "./approval-manager.js";
@@ -12,6 +12,22 @@ import { ReviewIntegrityResult } from "./review-integrity.js";
12
12
  import { ClaudeMcpConfigResult, ClaudeMcpServerName } from "./claude-mcp-config.js";
13
13
  import { type MistralAgentMode, type ClaudePermissionMode, type CodexSandboxMode, type CodexAskForApproval, type ClaudeEffortLevel } from "./request-helpers.js";
14
14
  import { FlightRecorderLike } from "./flight-recorder.js";
15
+ import { type PromptParts } from "./prompt-parts.js";
16
+ /**
17
+ * Slice 3: structured warning entries attached to tool responses.
18
+ * Distinct from review-integrity warnings (which are text-appended to
19
+ * the user-visible response). These are programmatic signals for caller
20
+ * agents to react to.
21
+ */
22
+ export interface WarningEntry {
23
+ /** Stable machine-readable code, e.g. "cache_ttl_expiring_soon". */
24
+ code: string;
25
+ /** Optional human-readable message for surfaces that render text. */
26
+ message?: string;
27
+ /** Code-specific payload — left open for future warning types. */
28
+ ttlRemainingMs?: number;
29
+ [key: string]: unknown;
30
+ }
15
31
  type ExtendedToolResponse = {
16
32
  content: {
17
33
  type: "text";
@@ -28,6 +44,8 @@ type ExtendedToolResponse = {
28
44
  missing?: ClaudeMcpServerName[];
29
45
  };
30
46
  reviewIntegrity?: ReviewIntegrityResult;
47
+ /** Slice 3: structured warnings (e.g. cache_ttl_expiring_soon). */
48
+ warnings?: WarningEntry[];
31
49
  };
32
50
  declare const logger: {
33
51
  info: (message: string, ...args: any[]) => void;
@@ -49,6 +67,7 @@ export interface GatewayServerDeps {
49
67
  flightRecorder?: FlightRecorderLike;
50
68
  logger?: GatewayLogger;
51
69
  persistence?: PersistenceConfig;
70
+ cacheAwareness?: CacheAwarenessConfig;
52
71
  }
53
72
  interface GatewayServerRuntime {
54
73
  sessionManager: ISessionManager;
@@ -60,6 +79,7 @@ interface GatewayServerRuntime {
60
79
  flightRecorder: FlightRecorderLike;
61
80
  logger: GatewayLogger;
62
81
  persistence: PersistenceConfig;
82
+ cacheAwareness: CacheAwarenessConfig;
63
83
  }
64
84
  interface CliRequestPrep {
65
85
  corrId: string;
@@ -70,9 +90,19 @@ interface CliRequestPrep {
70
90
  approvalDecision: ApprovalRecord | null;
71
91
  reviewIntegrity?: ReviewIntegrityResult;
72
92
  args: string[];
93
+ /**
94
+ * Sha256 of the assembled prompt's stable prefix bytes when the caller
95
+ * supplied `promptParts`. Null when the legacy `prompt` field was used.
96
+ * Populated by `resolvePromptOrPartsForPrep` and threaded into the
97
+ * flight-recorder row by the caller's safeFlightStart entry.
98
+ */
99
+ stablePrefixHash: string | null;
100
+ /** Heuristic token count (bytes/4) of the same stable prefix. */
101
+ stablePrefixTokens: number | null;
73
102
  }
74
103
  export declare function prepareClaudeRequest(params: {
75
- prompt: string;
104
+ prompt?: string;
105
+ promptParts?: PromptParts;
76
106
  model?: string;
77
107
  outputFormat: "text" | "json" | "stream-json";
78
108
  allowedTools?: string[];
@@ -106,7 +136,8 @@ export interface CodexRequestPrep extends CliRequestPrep {
106
136
  cleanup?: () => void;
107
137
  }
108
138
  export declare function prepareCodexRequest(params: {
109
- prompt: string;
139
+ prompt?: string;
140
+ promptParts?: PromptParts;
110
141
  model?: string;
111
142
  fullAuto: boolean;
112
143
  sandboxMode?: CodexSandboxMode;
@@ -138,7 +169,8 @@ export declare function prepareCodexRequest(params: {
138
169
  ignoreRules?: boolean;
139
170
  }, runtime?: GatewayServerRuntime): CodexRequestPrep | ExtendedToolResponse;
140
171
  export declare function prepareGeminiRequest(params: {
141
- prompt: string;
172
+ prompt?: string;
173
+ promptParts?: PromptParts;
142
174
  model?: string;
143
175
  approvalMode?: string;
144
176
  approvalStrategy: "legacy" | "mcp_managed";
@@ -161,7 +193,8 @@ export declare function prepareGeminiRequest(params: {
161
193
  attachments?: string[];
162
194
  }, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
163
195
  export declare function prepareMistralRequest(params: {
164
- prompt: string;
196
+ prompt?: string;
197
+ promptParts?: PromptParts;
165
198
  model?: string;
166
199
  outputFormat?: string;
167
200
  permissionMode?: MistralAgentMode;
@@ -179,7 +212,8 @@ export declare function prepareMistralRequest(params: {
179
212
  mistralEnv: Record<string, string>;
180
213
  }) | ExtendedToolResponse;
181
214
  export interface GeminiRequestParams {
182
- prompt: string;
215
+ prompt?: string;
216
+ promptParts?: PromptParts;
183
217
  model?: string;
184
218
  sessionId?: string;
185
219
  resumeLatest: boolean;
@@ -218,7 +252,8 @@ export interface AsyncHandlerDeps extends HandlerDeps {
218
252
  export declare function handleGeminiRequest(deps: HandlerDeps, params: GeminiRequestParams): Promise<ExtendedToolResponse>;
219
253
  export declare function handleGeminiRequestAsync(deps: AsyncHandlerDeps, params: Omit<GeminiRequestParams, "optimizeResponse">): Promise<ExtendedToolResponse>;
220
254
  export interface GrokRequestParams {
221
- prompt: string;
255
+ prompt?: string;
256
+ promptParts?: PromptParts;
222
257
  model?: string;
223
258
  outputFormat?: string;
224
259
  sessionId?: string;
@@ -242,7 +277,8 @@ export interface GrokRequestParams {
242
277
  export declare function handleGrokRequest(deps: HandlerDeps, params: GrokRequestParams): Promise<ExtendedToolResponse>;
243
278
  export declare function handleGrokRequestAsync(deps: AsyncHandlerDeps, params: Omit<GrokRequestParams, "optimizeResponse">): Promise<ExtendedToolResponse>;
244
279
  export interface MistralRequestParams {
245
- prompt: string;
280
+ prompt?: string;
281
+ promptParts?: PromptParts;
246
282
  model?: string;
247
283
  outputFormat?: string;
248
284
  sessionId?: string;
@@ -265,7 +301,8 @@ export interface MistralRequestParams {
265
301
  export declare function handleMistralRequest(deps: HandlerDeps, params: MistralRequestParams): Promise<ExtendedToolResponse>;
266
302
  export declare function handleMistralRequestAsync(deps: AsyncHandlerDeps, params: Omit<MistralRequestParams, "optimizeResponse">): Promise<ExtendedToolResponse>;
267
303
  export declare function handleCodexRequestAsync(deps: AsyncHandlerDeps, params: {
268
- prompt: string;
304
+ prompt?: string;
305
+ promptParts?: PromptParts;
269
306
  model?: string;
270
307
  fullAuto: boolean;
271
308
  sandboxMode?: CodexSandboxMode;