llm-cli-gateway 1.13.2 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +371 -44
- package/dist/async-job-manager.d.ts +15 -1
- package/dist/async-job-manager.js +31 -6
- package/dist/cache-stats.d.ts +26 -0
- package/dist/cache-stats.js +45 -2
- package/dist/executor.d.ts +8 -0
- package/dist/executor.js +7 -2
- package/dist/flight-recorder.d.ts +7 -0
- package/dist/flight-recorder.js +27 -2
- package/dist/index.d.ts +126 -1
- package/dist/index.js +480 -50
- package/dist/prompt-parts.d.ts +74 -0
- package/dist/prompt-parts.js +47 -0
- package/dist/session-manager.d.ts +20 -2
- package/dist/session-manager.js +28 -3
- package/dist/upstream-contracts.d.ts +8 -1
- package/dist/upstream-contracts.js +37 -1
- package/dist/worktree-manager.d.ts +41 -0
- package/dist/worktree-manager.js +214 -0
- package/package.json +2 -1
|
@@ -16,6 +16,13 @@ export interface AsyncJobFlightRecorderEntry {
|
|
|
16
16
|
sessionId?: string;
|
|
17
17
|
stablePrefixHash?: string;
|
|
18
18
|
stablePrefixTokens?: number;
|
|
19
|
+
/**
|
|
20
|
+
* Slice κ: count of caller-supplied prompt-parts content blocks the
|
|
21
|
+
* gateway emitted with explicit Anthropic `cache_control` markers
|
|
22
|
+
* (ttl='1h'). Only set for Claude requests that opt into κ; left
|
|
23
|
+
* undefined elsewhere so legacy rows stay NULL.
|
|
24
|
+
*/
|
|
25
|
+
cacheControlBlocks?: number;
|
|
19
26
|
}
|
|
20
27
|
/**
|
|
21
28
|
* Slice 1.5 usage-extraction callback. Closures MUST be constructed from
|
|
@@ -66,6 +73,13 @@ export interface StartJobOptions {
|
|
|
66
73
|
* therefore do NOT collide on dedup.
|
|
67
74
|
*/
|
|
68
75
|
env?: Record<string, string>;
|
|
76
|
+
/**
|
|
77
|
+
* Slice κ: optional UTF-8 payload to pipe into the child's stdin.
|
|
78
|
+
* Participates in the dedup key — two requests with identical argv
|
|
79
|
+
* but different stdin do NOT collide. When set, stdio[0] is "pipe";
|
|
80
|
+
* when unset, stdio[0] stays "ignore" (regression-protected).
|
|
81
|
+
*/
|
|
82
|
+
stdin?: string;
|
|
69
83
|
/**
|
|
70
84
|
* Optional hook fired exactly once when the job reaches a terminal state.
|
|
71
85
|
* Used by callers that own per-request resources (outputSchema temp files,
|
|
@@ -168,7 +182,7 @@ export declare class AsyncJobManager {
|
|
|
168
182
|
* Existing callers keep working unchanged; forceRefresh is exposed as a trailing
|
|
169
183
|
* optional param for the dedup-aware path.
|
|
170
184
|
*/
|
|
171
|
-
startJob(cli: LlmCli, args: string[], correlationId: string, cwd?: string, idleTimeoutMs?: number, outputFormat?: string, forceRefresh?: boolean, env?: Record<string, string>, onComplete?: () => void, flightRecorderEntry?: AsyncJobFlightRecorderEntry, extractUsage?: AsyncJobUsageExtractor, writeFlightStart?: boolean): AsyncJobSnapshot;
|
|
185
|
+
startJob(cli: LlmCli, args: string[], correlationId: string, cwd?: string, idleTimeoutMs?: number, outputFormat?: string, forceRefresh?: boolean, env?: Record<string, string>, onComplete?: () => void, flightRecorderEntry?: AsyncJobFlightRecorderEntry, extractUsage?: AsyncJobUsageExtractor, writeFlightStart?: boolean, stdin?: string): AsyncJobSnapshot;
|
|
172
186
|
/**
|
|
173
187
|
* Start a job, with optional dedup against recent identical requests.
|
|
174
188
|
* Returns `{ snapshot, deduped }` so callers can log/report the short-circuit.
|
|
@@ -207,8 +207,22 @@ export class AsyncJobManager {
|
|
|
207
207
|
* (sorted keys → JSON-stringified). This prevents two Mistral requests with the
|
|
208
208
|
* same argv but different `VIBE_ACTIVE_MODEL` from deduping onto each other.
|
|
209
209
|
*/
|
|
210
|
-
buildRequestKey(cli, args, env) {
|
|
211
|
-
|
|
210
|
+
buildRequestKey(cli, args, env, stdin, cwd) {
|
|
211
|
+
// Slice κ: stdin participates in the dedup key. Two Claude requests
|
|
212
|
+
// with identical argv but different cache_control content blocks
|
|
213
|
+
// would otherwise collide on dedup and the second caller would get
|
|
214
|
+
// the wrong response. The legacy "no stdin" code path passes
|
|
215
|
+
// stdin=undefined, which serialises to the same empty marker the
|
|
216
|
+
// previous version emitted — non-κ dedup is unchanged.
|
|
217
|
+
// Slice λ: cwd participates similarly. Two requests with identical
|
|
218
|
+
// argv but different worktrees would otherwise collide on dedup and
|
|
219
|
+
// the second caller would receive a response executed in the wrong
|
|
220
|
+
// worktree. cwd=undefined preserves the pre-λ key shape — non-λ
|
|
221
|
+
// dedup is unchanged.
|
|
222
|
+
const extraEnv = canonicaliseEnvForKey(env);
|
|
223
|
+
const withStdin = stdin === undefined ? extraEnv : `${extraEnv}|stdin:${stdin}`;
|
|
224
|
+
const extra = cwd === undefined ? withStdin : `${withStdin}|cwd:${cwd}`;
|
|
225
|
+
return computeRequestKey(cli, args, extra);
|
|
212
226
|
}
|
|
213
227
|
fireOnComplete(job) {
|
|
214
228
|
if (job.onCompleteFired)
|
|
@@ -417,13 +431,14 @@ export class AsyncJobManager {
|
|
|
417
431
|
* Existing callers keep working unchanged; forceRefresh is exposed as a trailing
|
|
418
432
|
* optional param for the dedup-aware path.
|
|
419
433
|
*/
|
|
420
|
-
startJob(cli, args, correlationId, cwd, idleTimeoutMs, outputFormat, forceRefresh, env, onComplete, flightRecorderEntry, extractUsage, writeFlightStart) {
|
|
434
|
+
startJob(cli, args, correlationId, cwd, idleTimeoutMs, outputFormat, forceRefresh, env, onComplete, flightRecorderEntry, extractUsage, writeFlightStart, stdin) {
|
|
421
435
|
return this.startJobWithDedup(cli, args, correlationId, {
|
|
422
436
|
cwd,
|
|
423
437
|
idleTimeoutMs,
|
|
424
438
|
outputFormat,
|
|
425
439
|
forceRefresh,
|
|
426
440
|
env,
|
|
441
|
+
stdin,
|
|
427
442
|
onComplete,
|
|
428
443
|
flightRecorderEntry,
|
|
429
444
|
extractUsage,
|
|
@@ -439,8 +454,8 @@ export class AsyncJobManager {
|
|
|
439
454
|
* is returned without spawning a new process. forceRefresh skips dedup entirely.
|
|
440
455
|
*/
|
|
441
456
|
startJobWithDedup(cli, args, correlationId, opts = {}) {
|
|
442
|
-
const { cwd, idleTimeoutMs, outputFormat, forceRefresh, env: extraEnv, onComplete, flightRecorderEntry, extractUsage, writeFlightStart, } = opts;
|
|
443
|
-
const requestKey = this.buildRequestKey(cli, args, extraEnv);
|
|
457
|
+
const { cwd, idleTimeoutMs, outputFormat, forceRefresh, env: extraEnv, stdin, onComplete, flightRecorderEntry, extractUsage, writeFlightStart, } = opts;
|
|
458
|
+
const requestKey = this.buildRequestKey(cli, args, extraEnv, stdin, cwd);
|
|
444
459
|
if (!forceRefresh && this.store) {
|
|
445
460
|
try {
|
|
446
461
|
const existing = this.store.findByRequestKey(requestKey);
|
|
@@ -489,9 +504,18 @@ export class AsyncJobManager {
|
|
|
489
504
|
const baseEnv = envWithExtendedPath(process.env, getExtendedPath());
|
|
490
505
|
const child = spawnCliProcess(command, args, {
|
|
491
506
|
cwd,
|
|
492
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
507
|
+
stdio: stdin === undefined ? ["ignore", "pipe", "pipe"] : ["pipe", "pipe", "pipe"],
|
|
493
508
|
env: { ...baseEnv, ...(extraEnv ?? {}) },
|
|
494
509
|
});
|
|
510
|
+
if (stdin !== undefined && child.stdin) {
|
|
511
|
+
try {
|
|
512
|
+
child.stdin.write(stdin);
|
|
513
|
+
}
|
|
514
|
+
catch (err) {
|
|
515
|
+
this.logger.error(`Job ${id} failed to write stdin payload`, err);
|
|
516
|
+
}
|
|
517
|
+
child.stdin.end();
|
|
518
|
+
}
|
|
495
519
|
// Single cleanup flag to prevent double-unregister
|
|
496
520
|
let groupCleaned = false;
|
|
497
521
|
const cleanupGroup = () => {
|
|
@@ -560,6 +584,7 @@ export class AsyncJobManager {
|
|
|
560
584
|
asyncJobId: id,
|
|
561
585
|
stablePrefixHash: flightRecorderEntry.stablePrefixHash,
|
|
562
586
|
stablePrefixTokens: flightRecorderEntry.stablePrefixTokens,
|
|
587
|
+
cacheControlBlocks: flightRecorderEntry.cacheControlBlocks,
|
|
563
588
|
});
|
|
564
589
|
}
|
|
565
590
|
catch (err) {
|
package/dist/cache-stats.d.ts
CHANGED
|
@@ -76,6 +76,32 @@ export interface GlobalCacheStats {
|
|
|
76
76
|
estimatedSavingsUsd: number;
|
|
77
77
|
}>;
|
|
78
78
|
estimatedSavingsUsd: number;
|
|
79
|
+
/**
|
|
80
|
+
* Rec #3 (slice κ): derived metrics that distinguish gateway-driven
|
|
81
|
+
* κ-explicit `cache_control` breakpoints from Claude Code's
|
|
82
|
+
* own baseline cache reads.
|
|
83
|
+
*
|
|
84
|
+
* - explicitCacheControlRows: rows where the gateway emitted at
|
|
85
|
+
* least one `cache_control` marker (`cache_control_blocks > 0`).
|
|
86
|
+
* - explicitCacheControlHits: those rows whose `cache_read_tokens
|
|
87
|
+
* > 0` — closest signal we have to "the caller's marked block
|
|
88
|
+
* actually hit Anthropic's cache" (still includes Claude Code's
|
|
89
|
+
* baseline cache reads on top, which is unavoidable without
|
|
90
|
+
* per-block token accounting from Anthropic).
|
|
91
|
+
* - explicitCacheControlHitRate: ratio explicit hits / explicit rows.
|
|
92
|
+
* - stablePrefixReuseCount: distinct `stable_prefix_hash` values
|
|
93
|
+
* that appear in >1 row in-window (i.e. real reuse opportunities).
|
|
94
|
+
* - avgCacheCreationAfterFirstCall: averaged across stable-prefix
|
|
95
|
+
* reuse groups, the cache_creation_tokens on rows AFTER the
|
|
96
|
+
* first-by-datetime in each group. Drops sharply when caller
|
|
97
|
+
* blocks are reused; stays high when Claude Code's session-wrap
|
|
98
|
+
* floor dominates.
|
|
99
|
+
*/
|
|
100
|
+
explicitCacheControlRows: number;
|
|
101
|
+
explicitCacheControlHits: number;
|
|
102
|
+
explicitCacheControlHitRate: number;
|
|
103
|
+
stablePrefixReuseCount: number;
|
|
104
|
+
avgCacheCreationAfterFirstCall: number | null;
|
|
79
105
|
}
|
|
80
106
|
export declare function computeSessionCacheStats(db: FlightRecorderQuery, sessionId: string): SessionCacheStats;
|
|
81
107
|
export interface TtlPolicy {
|
package/dist/cache-stats.js
CHANGED
|
@@ -159,14 +159,16 @@ export function computeGlobalCacheStats(db, opts = {}) {
|
|
|
159
159
|
COALESCE(cache_read_tokens, 0) AS cache_read_tokens,
|
|
160
160
|
COALESCE(cache_creation_tokens, 0) AS cache_creation_tokens,
|
|
161
161
|
stable_prefix_hash,
|
|
162
|
-
datetime_utc
|
|
162
|
+
datetime_utc,
|
|
163
|
+
cache_control_blocks
|
|
163
164
|
FROM requests
|
|
164
165
|
WHERE datetime_utc >= ?`
|
|
165
166
|
: `SELECT cli, model,
|
|
166
167
|
COALESCE(cache_read_tokens, 0) AS cache_read_tokens,
|
|
167
168
|
COALESCE(cache_creation_tokens, 0) AS cache_creation_tokens,
|
|
168
169
|
stable_prefix_hash,
|
|
169
|
-
datetime_utc
|
|
170
|
+
datetime_utc,
|
|
171
|
+
cache_control_blocks
|
|
170
172
|
FROM requests`;
|
|
171
173
|
const rows = sinceIso ? db.queryRequests(sql, sinceIso) : db.queryRequests(sql);
|
|
172
174
|
const perCliMap = new Map();
|
|
@@ -175,6 +177,17 @@ export function computeGlobalCacheStats(db, opts = {}) {
|
|
|
175
177
|
let totalRead = 0;
|
|
176
178
|
let totalCreation = 0;
|
|
177
179
|
let totalSavings = 0;
|
|
180
|
+
// Rec #3: κ-explicit metrics. A row is "κ-explicit" iff it has
|
|
181
|
+
// `cache_control_blocks > 0` — i.e. the gateway emitted at least one
|
|
182
|
+
// caller-supplied `cache_control` marker. Rows with NULL or 0 are
|
|
183
|
+
// either pre-v4 or non-κ Claude / non-Claude requests.
|
|
184
|
+
let explicitRows = 0;
|
|
185
|
+
let explicitHits = 0;
|
|
186
|
+
// Per-prefix reuse tracking: collect cache_creation_tokens for every
|
|
187
|
+
// row keyed by stable_prefix_hash, ordered ascending by datetime_utc.
|
|
188
|
+
// For each group with >1 row, drop the first (the cache-write call)
|
|
189
|
+
// and average the rest (the cache-read calls).
|
|
190
|
+
const perPrefix = new Map();
|
|
178
191
|
for (const row of rows) {
|
|
179
192
|
totalRequests += 1;
|
|
180
193
|
const reads = safeNum(row.cache_read_tokens);
|
|
@@ -183,6 +196,17 @@ export function computeGlobalCacheStats(db, opts = {}) {
|
|
|
183
196
|
totalCreation += creation;
|
|
184
197
|
if (reads > 0)
|
|
185
198
|
totalHits += 1;
|
|
199
|
+
const ccBlocks = safeNum(row.cache_control_blocks);
|
|
200
|
+
if (ccBlocks > 0) {
|
|
201
|
+
explicitRows += 1;
|
|
202
|
+
if (reads > 0)
|
|
203
|
+
explicitHits += 1;
|
|
204
|
+
}
|
|
205
|
+
if (row.stable_prefix_hash) {
|
|
206
|
+
const arr = perPrefix.get(row.stable_prefix_hash) ?? [];
|
|
207
|
+
arr.push({ datetime_utc: row.datetime_utc, cache_creation_tokens: creation });
|
|
208
|
+
perPrefix.set(row.stable_prefix_hash, arr);
|
|
209
|
+
}
|
|
186
210
|
if (!isCacheStatsCli(row.cli))
|
|
187
211
|
continue;
|
|
188
212
|
const cli = row.cli;
|
|
@@ -203,6 +227,20 @@ export function computeGlobalCacheStats(db, opts = {}) {
|
|
|
203
227
|
agg.estimatedSavingsUsd += savings;
|
|
204
228
|
perCliMap.set(cli, agg);
|
|
205
229
|
}
|
|
230
|
+
let stablePrefixReuseCount = 0;
|
|
231
|
+
let creationAfterFirstSum = 0;
|
|
232
|
+
let creationAfterFirstCount = 0;
|
|
233
|
+
for (const arr of perPrefix.values()) {
|
|
234
|
+
if (arr.length <= 1)
|
|
235
|
+
continue;
|
|
236
|
+
stablePrefixReuseCount += 1;
|
|
237
|
+
arr.sort((a, b) => a.datetime_utc < b.datetime_utc ? -1 : a.datetime_utc > b.datetime_utc ? 1 : 0);
|
|
238
|
+
for (let i = 1; i < arr.length; i++) {
|
|
239
|
+
creationAfterFirstSum += arr[i].cache_creation_tokens;
|
|
240
|
+
creationAfterFirstCount += 1;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
const avgCacheCreationAfterFirstCall = creationAfterFirstCount > 0 ? creationAfterFirstSum / creationAfterFirstCount : null;
|
|
206
244
|
const perCli = Array.from(perCliMap.entries()).map(([cli, agg]) => ({
|
|
207
245
|
cli,
|
|
208
246
|
requestCount: agg.requestCount,
|
|
@@ -221,5 +259,10 @@ export function computeGlobalCacheStats(db, opts = {}) {
|
|
|
221
259
|
totalCacheCreationTokens: totalCreation,
|
|
222
260
|
perCli,
|
|
223
261
|
estimatedSavingsUsd: totalSavings,
|
|
262
|
+
explicitCacheControlRows: explicitRows,
|
|
263
|
+
explicitCacheControlHits: explicitHits,
|
|
264
|
+
explicitCacheControlHitRate: explicitRows > 0 ? explicitHits / explicitRows : 0,
|
|
265
|
+
stablePrefixReuseCount,
|
|
266
|
+
avgCacheCreationAfterFirstCall,
|
|
224
267
|
};
|
|
225
268
|
}
|
package/dist/executor.d.ts
CHANGED
|
@@ -7,6 +7,14 @@ export interface ExecuteOptions {
|
|
|
7
7
|
logger?: Logger;
|
|
8
8
|
/** Extra environment variables to inject; merged after PATH. */
|
|
9
9
|
env?: NodeJS.ProcessEnv;
|
|
10
|
+
/**
|
|
11
|
+
* Slice κ: optional UTF-8 payload to write to the child's stdin
|
|
12
|
+
* immediately after spawn. When provided, stdio for stdin switches
|
|
13
|
+
* from "ignore" to "pipe" so the CLI can read the payload (used by
|
|
14
|
+
* `claude --input-format stream-json`). Undefined preserves the
|
|
15
|
+
* legacy stdio:["ignore","pipe","pipe"] shape.
|
|
16
|
+
*/
|
|
17
|
+
stdin?: string;
|
|
10
18
|
}
|
|
11
19
|
export interface ExecuteResult {
|
|
12
20
|
stdout: string;
|
package/dist/executor.js
CHANGED
|
@@ -296,16 +296,21 @@ export function spawnCliProcess(command, args, options) {
|
|
|
296
296
|
return proc;
|
|
297
297
|
}
|
|
298
298
|
export async function executeCli(command, args, options = {}) {
|
|
299
|
-
const { timeout, idleTimeout, cwd, env: extraEnv } = options;
|
|
299
|
+
const { timeout, idleTimeout, cwd, env: extraEnv, stdin } = options;
|
|
300
300
|
const extendedPath = getExtendedPath();
|
|
301
301
|
const baseEnv = envWithExtendedPath(process.env, extendedPath);
|
|
302
302
|
const circuitBreaker = getCircuitBreaker(command);
|
|
303
303
|
const runOnce = () => new Promise((resolve, reject) => {
|
|
304
|
+
const stdio = stdin === undefined ? ["ignore", "pipe", "pipe"] : ["pipe", "pipe", "pipe"];
|
|
304
305
|
const proc = spawnCliProcess(command, args, {
|
|
305
306
|
cwd,
|
|
306
|
-
stdio
|
|
307
|
+
stdio,
|
|
307
308
|
env: { ...baseEnv, ...(extraEnv ?? {}) },
|
|
308
309
|
});
|
|
310
|
+
if (stdin !== undefined && proc.stdin) {
|
|
311
|
+
proc.stdin.write(stdin);
|
|
312
|
+
proc.stdin.end();
|
|
313
|
+
}
|
|
309
314
|
let stdout = "";
|
|
310
315
|
let stderr = "";
|
|
311
316
|
let timedOut = false;
|
|
@@ -8,6 +8,13 @@ export interface FlightLogStart {
|
|
|
8
8
|
asyncJobId?: string;
|
|
9
9
|
stablePrefixHash?: string;
|
|
10
10
|
stablePrefixTokens?: number;
|
|
11
|
+
/**
|
|
12
|
+
* Slice κ: number of caller-supplied prompt-parts content blocks
|
|
13
|
+
* that the gateway emitted with an explicit `cache_control`
|
|
14
|
+
* breakpoint on this request. `null` (default) for non-κ requests,
|
|
15
|
+
* including pre-κ rows after a v4 migration of a legacy DB.
|
|
16
|
+
*/
|
|
17
|
+
cacheControlBlocks?: number;
|
|
11
18
|
}
|
|
12
19
|
export interface FlightLogResult {
|
|
13
20
|
response: string;
|
package/dist/flight-recorder.js
CHANGED
|
@@ -55,6 +55,20 @@ function ensureStablePrefixColumns(db) {
|
|
|
55
55
|
}
|
|
56
56
|
db.exec("CREATE INDEX IF NOT EXISTS idx_requests_stable_hash ON requests(stable_prefix_hash)");
|
|
57
57
|
}
|
|
58
|
+
/**
|
|
59
|
+
* Idempotent v4 migration (slice κ): add `cache_control_blocks` column
|
|
60
|
+
* to the `requests` table. Counts the caller-supplied content blocks
|
|
61
|
+
* the gateway emitted with an explicit Anthropic `cache_control`
|
|
62
|
+
* marker. Pre-κ rows keep NULL; only κ-opt-in callers ever set the
|
|
63
|
+
* column to a non-NULL integer.
|
|
64
|
+
*/
|
|
65
|
+
function ensureCacheControlBlocksColumn(db) {
|
|
66
|
+
const rows = db.prepare("PRAGMA table_info(requests)").all?.() ?? [];
|
|
67
|
+
const names = new Set(rows.map((row) => (row && typeof row.name === "string" ? row.name : "")));
|
|
68
|
+
if (!names.has("cache_control_blocks")) {
|
|
69
|
+
db.exec("ALTER TABLE requests ADD COLUMN cache_control_blocks INTEGER");
|
|
70
|
+
}
|
|
71
|
+
}
|
|
58
72
|
export function resolveFlightRecorderDbPath() {
|
|
59
73
|
const configured = process.env.LLM_GATEWAY_LOGS_DB;
|
|
60
74
|
if (configured !== undefined) {
|
|
@@ -176,6 +190,14 @@ export class FlightRecorder {
|
|
|
176
190
|
this.db
|
|
177
191
|
.prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(3, ?)")
|
|
178
192
|
.run(new Date().toISOString());
|
|
193
|
+
// Migration v4: cache_control_blocks (slice κ). Pre-κ rows keep NULL;
|
|
194
|
+
// only κ-opt-in writes populate this. Aggregates in cache-stats /
|
|
195
|
+
// MCP resources can use this to separate explicit κ hits from
|
|
196
|
+
// implicit prefix-cache hits.
|
|
197
|
+
ensureCacheControlBlocksColumn(this.db);
|
|
198
|
+
this.db
|
|
199
|
+
.prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(4, ?)")
|
|
200
|
+
.run(new Date().toISOString());
|
|
179
201
|
if (process.platform !== "win32") {
|
|
180
202
|
try {
|
|
181
203
|
chmodSync(dbPath, 0o600);
|
|
@@ -186,9 +208,11 @@ export class FlightRecorder {
|
|
|
186
208
|
}
|
|
187
209
|
const insertRequest = this.db.prepare(`
|
|
188
210
|
INSERT INTO requests (id, cli, model, prompt, system, session_id, datetime_utc,
|
|
189
|
-
stable_prefix_hash, stable_prefix_tokens
|
|
211
|
+
stable_prefix_hash, stable_prefix_tokens,
|
|
212
|
+
cache_control_blocks)
|
|
190
213
|
VALUES (@id, @cli, @model, @prompt, @system, @session_id, @datetime_utc,
|
|
191
|
-
@stable_prefix_hash, @stable_prefix_tokens
|
|
214
|
+
@stable_prefix_hash, @stable_prefix_tokens,
|
|
215
|
+
@cache_control_blocks)
|
|
192
216
|
`);
|
|
193
217
|
const insertMetadata = this.db.prepare(`
|
|
194
218
|
INSERT INTO gateway_metadata (request_id, async_job_id, status)
|
|
@@ -205,6 +229,7 @@ export class FlightRecorder {
|
|
|
205
229
|
datetime_utc: new Date().toISOString(),
|
|
206
230
|
stable_prefix_hash: entry.stablePrefixHash ?? null,
|
|
207
231
|
stable_prefix_tokens: entry.stablePrefixTokens ?? null,
|
|
232
|
+
cache_control_blocks: entry.cacheControlBlocks ?? null,
|
|
208
233
|
});
|
|
209
234
|
insertMetadata.run({
|
|
210
235
|
request_id: entry.correlationId,
|
package/dist/index.d.ts
CHANGED
|
@@ -67,6 +67,36 @@ type GatewayLogger = typeof logger;
|
|
|
67
67
|
*/
|
|
68
68
|
export declare const MAX_TURNS_SCHEMA: z.ZodNumber;
|
|
69
69
|
export declare const MAX_PRICE_SCHEMA: z.ZodNumber;
|
|
70
|
+
/**
|
|
71
|
+
* Slice λ: shared worktree directive for all 10 `*_request` / `*_request_async`
|
|
72
|
+
* tools. `true` creates a fresh worktree under `<repoRoot>/.worktrees/<uuid>`
|
|
73
|
+
* branched from HEAD. `{ name?, ref? }` lets the caller supply a sanitized
|
|
74
|
+
* name and/or git ref (default ref: HEAD).
|
|
75
|
+
*
|
|
76
|
+
* Lifecycle is gateway-owned: the gateway pre-creates the worktree via
|
|
77
|
+
* `git worktree add`, then spawns the child CLI with `cwd: <worktree-path>`.
|
|
78
|
+
* No `-w` / `--worktree` flag is ever emitted to the underlying CLI. When
|
|
79
|
+
* the request carries a sessionId and the session already has a worktree,
|
|
80
|
+
* that worktree is reused. On session_delete or TTL eviction the gateway
|
|
81
|
+
* runs `git worktree remove --force`.
|
|
82
|
+
*
|
|
83
|
+
* Tool response: when a worktree was used, the successful response stdout
|
|
84
|
+
* is prefixed with `[gateway] worktree=<absolute-path>\n` so callers can
|
|
85
|
+
* parse/use the path without a schema change (slice λ §1.d).
|
|
86
|
+
*
|
|
87
|
+
* NOTE: callers should `.gitignore` the `.worktrees/` directory in their
|
|
88
|
+
* repo (the gateway does NOT auto-gitignore — see slice λ spec Q4).
|
|
89
|
+
*/
|
|
90
|
+
export declare const WORKTREE_SCHEMA: z.ZodUnion<[z.ZodBoolean, z.ZodObject<{
|
|
91
|
+
name: z.ZodOptional<z.ZodString>;
|
|
92
|
+
ref: z.ZodOptional<z.ZodString>;
|
|
93
|
+
}, "strict", z.ZodTypeAny, {
|
|
94
|
+
name?: string | undefined;
|
|
95
|
+
ref?: string | undefined;
|
|
96
|
+
}, {
|
|
97
|
+
name?: string | undefined;
|
|
98
|
+
ref?: string | undefined;
|
|
99
|
+
}>]>;
|
|
70
100
|
export declare const SESSION_PROVIDER_VALUES: readonly ["claude", "codex", "gemini", "grok", "mistral"];
|
|
71
101
|
export declare const SESSION_PROVIDER_ENUM: z.ZodEnum<["claude", "codex", "gemini", "grok", "mistral"]>;
|
|
72
102
|
export type SessionProvider = (typeof SESSION_PROVIDER_VALUES)[number];
|
|
@@ -82,7 +112,7 @@ export interface GatewayServerDeps {
|
|
|
82
112
|
persistence?: PersistenceConfig;
|
|
83
113
|
cacheAwareness?: CacheAwarenessConfig;
|
|
84
114
|
}
|
|
85
|
-
interface GatewayServerRuntime {
|
|
115
|
+
export interface GatewayServerRuntime {
|
|
86
116
|
sessionManager: ISessionManager;
|
|
87
117
|
resourceProvider: ResourceProvider;
|
|
88
118
|
db: DatabaseConnection | null;
|
|
@@ -94,6 +124,60 @@ interface GatewayServerRuntime {
|
|
|
94
124
|
persistence: PersistenceConfig;
|
|
95
125
|
cacheAwareness: CacheAwarenessConfig;
|
|
96
126
|
}
|
|
127
|
+
export declare function resolveGatewayServerRuntime(deps?: GatewayServerDeps, options?: {
|
|
128
|
+
isolateState?: boolean;
|
|
129
|
+
}): GatewayServerRuntime;
|
|
130
|
+
/**
|
|
131
|
+
* Slice λ: shape returned by `resolveWorktreeForRequest`. `cwd` is what
|
|
132
|
+
* the spawn helpers (`executeCli`, `startJobWithDedup`) consume;
|
|
133
|
+
* `worktreePath` is what the tool handler embeds in the response prefix
|
|
134
|
+
* so callers can discover the path.
|
|
135
|
+
*/
|
|
136
|
+
export interface ResolvedWorktree {
|
|
137
|
+
cwd?: string;
|
|
138
|
+
worktreePath?: string;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Slice λ: resolve a request's worktree directive into a spawn cwd.
|
|
142
|
+
*
|
|
143
|
+
* - `worktreeOpt` is the Zod-validated input value (boolean |
|
|
144
|
+
* `{ name?, ref? }` | undefined).
|
|
145
|
+
* - When the request has a session AND the session already has a
|
|
146
|
+
* `metadata.worktreePath`, that path is reused (resume semantics).
|
|
147
|
+
* The reused path is returned without touching git; if the directory
|
|
148
|
+
* was externally removed between requests, the next CLI invocation
|
|
149
|
+
* will surface the error naturally.
|
|
150
|
+
* - When no reusable worktree exists, `createWorktree` runs; on success
|
|
151
|
+
* the new path is written to `session.metadata` (only when a session
|
|
152
|
+
* exists — request-scoped worktrees do NOT persist).
|
|
153
|
+
* - Returns `{}` when `worktreeOpt` is undefined/false (preserves
|
|
154
|
+
* pre-λ behaviour at non-worktree call sites).
|
|
155
|
+
* - Errors propagate as `WorktreeError`/`Error`; the caller wraps them
|
|
156
|
+
* in a `createErrorResponse` envelope. Do NOT swallow.
|
|
157
|
+
*
|
|
158
|
+
* Spec: docs/plans/slice-lambda.spec.md §"Implementation surface to
|
|
159
|
+
* verify" §5.
|
|
160
|
+
*/
|
|
161
|
+
export declare function resolveWorktreeForRequest(worktreeOpt: boolean | {
|
|
162
|
+
name?: string;
|
|
163
|
+
ref?: string;
|
|
164
|
+
} | undefined, sessionId: string | undefined, runtime: GatewayServerRuntime): Promise<ResolvedWorktree>;
|
|
165
|
+
/**
|
|
166
|
+
* Slice λ §1.d: response-envelope shape decision for `worktreePath`.
|
|
167
|
+
*
|
|
168
|
+
* We surface the worktree path inline as a stdout prefix
|
|
169
|
+
* (`[gateway] worktree=<absolute-path>\n`) rather than as a
|
|
170
|
+
* structuredContent field or JSON wrapper. Rationale:
|
|
171
|
+
* - zero schema change across all 10 tools and their downstream parsers
|
|
172
|
+
* - matches how other slice features (session warnings, cache_state
|
|
173
|
+
* aggregates) surface side-channel metadata today
|
|
174
|
+
* - callers that want the path can split on the first newline; callers
|
|
175
|
+
* that don't care see a single ignorable header line
|
|
176
|
+
*
|
|
177
|
+
* Use `formatWorktreePrefix(resolution.worktreePath)` once per tool, at
|
|
178
|
+
* the moment a successful response is constructed.
|
|
179
|
+
*/
|
|
180
|
+
export declare function formatWorktreePrefix(worktreePath?: string): string;
|
|
97
181
|
export declare function extractUsageAndCost(cli: "claude" | "codex" | "gemini" | "grok" | "mistral", output: string, outputFormat?: string,
|
|
98
182
|
/**
|
|
99
183
|
* Optional context for off-stdout telemetry sources. Today only Mistral
|
|
@@ -129,6 +213,27 @@ interface CliRequestPrep {
|
|
|
129
213
|
stablePrefixHash: string | null;
|
|
130
214
|
/** Heuristic token count (bytes/4) of the same stable prefix. */
|
|
131
215
|
stablePrefixTokens: number | null;
|
|
216
|
+
/**
|
|
217
|
+
* Slice κ (Claude only): JSON stream-json payload to feed on stdin
|
|
218
|
+
* when the gateway emits `-p --input-format stream-json`. Undefined
|
|
219
|
+
* when the caller did not opt into Anthropic `cache_control`
|
|
220
|
+
* breakpoints. Non-κ providers always leave this undefined.
|
|
221
|
+
*/
|
|
222
|
+
stdinPayload?: string;
|
|
223
|
+
/**
|
|
224
|
+
* Slice κ (Claude only): number of caller-supplied content blocks
|
|
225
|
+
* that carry an explicit `cache_control` marker. Threaded into the
|
|
226
|
+
* flight recorder so `cache_state` aggregates can distinguish
|
|
227
|
+
* κ-explicit breakpoints from implicit prefix-cache hits.
|
|
228
|
+
*/
|
|
229
|
+
cacheControlBlocks?: number;
|
|
230
|
+
/**
|
|
231
|
+
* Rec #4: structured warnings produced during prep (e.g. cacheable
|
|
232
|
+
* stable prefix without cacheControl). Handlers merge these with any
|
|
233
|
+
* other warnings (cache_ttl_expiring_soon, etc.) before returning to
|
|
234
|
+
* the caller.
|
|
235
|
+
*/
|
|
236
|
+
warnings?: WarningEntry[];
|
|
132
237
|
}
|
|
133
238
|
export declare function prepareClaudeRequest(params: {
|
|
134
239
|
prompt?: string;
|
|
@@ -360,6 +465,11 @@ export interface GeminiRequestParams {
|
|
|
360
465
|
attachments?: string[];
|
|
361
466
|
/** Phase 4 slice γ: emit `--skip-trust` for fresh-workspace headless runs. */
|
|
362
467
|
skipTrust?: boolean;
|
|
468
|
+
/** Slice λ: run this request inside a gateway-owned git worktree. */
|
|
469
|
+
worktree?: boolean | {
|
|
470
|
+
name?: string;
|
|
471
|
+
ref?: string;
|
|
472
|
+
};
|
|
363
473
|
}
|
|
364
474
|
export interface HandlerDeps {
|
|
365
475
|
sessionManager: ISessionManager;
|
|
@@ -412,6 +522,11 @@ export interface GrokRequestParams {
|
|
|
412
522
|
allow?: string[];
|
|
413
523
|
/** Phase 4 slice θ: Grok `--deny <RULE>` (repeatable; one entry per --deny instance). */
|
|
414
524
|
deny?: string[];
|
|
525
|
+
/** Slice λ: run this request inside a gateway-owned git worktree. */
|
|
526
|
+
worktree?: boolean | {
|
|
527
|
+
name?: string;
|
|
528
|
+
ref?: string;
|
|
529
|
+
};
|
|
415
530
|
}
|
|
416
531
|
export declare function handleGrokRequest(deps: HandlerDeps, params: GrokRequestParams): Promise<ExtendedToolResponse>;
|
|
417
532
|
export declare function handleGrokRequestAsync(deps: AsyncHandlerDeps, params: Omit<GrokRequestParams, "optimizeResponse">): Promise<ExtendedToolResponse>;
|
|
@@ -446,6 +561,11 @@ export interface MistralRequestParams {
|
|
|
446
561
|
workingDir?: string;
|
|
447
562
|
/** Phase 4 slice ζ: Vibe `--add-dir <DIR>` repeatable add-dir parity. */
|
|
448
563
|
addDir?: string[];
|
|
564
|
+
/** Slice λ: run this request inside a gateway-owned git worktree. */
|
|
565
|
+
worktree?: boolean | {
|
|
566
|
+
name?: string;
|
|
567
|
+
ref?: string;
|
|
568
|
+
};
|
|
449
569
|
}
|
|
450
570
|
export declare function handleMistralRequest(deps: HandlerDeps, params: MistralRequestParams): Promise<ExtendedToolResponse>;
|
|
451
571
|
export declare function handleMistralRequestAsync(deps: AsyncHandlerDeps, params: Omit<MistralRequestParams, "optimizeResponse">): Promise<ExtendedToolResponse>;
|
|
@@ -480,6 +600,11 @@ export declare function handleCodexRequestAsync(deps: AsyncHandlerDeps, params:
|
|
|
480
600
|
ignoreRules?: boolean;
|
|
481
601
|
workingDir?: string;
|
|
482
602
|
addDir?: string[];
|
|
603
|
+
/** Slice λ: run this request inside a gateway-owned git worktree. */
|
|
604
|
+
worktree?: boolean | {
|
|
605
|
+
name?: string;
|
|
606
|
+
ref?: string;
|
|
607
|
+
};
|
|
483
608
|
}): Promise<ExtendedToolResponse>;
|
|
484
609
|
export declare function createGatewayServer(deps?: GatewayServerDeps): McpServer;
|
|
485
610
|
export {};
|