llm-cli-gateway 1.6.1 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +181 -0
- package/dist/async-job-manager.d.ts +70 -2
- package/dist/async-job-manager.js +166 -6
- package/dist/codex-json-parser.js +4 -1
- package/dist/index.d.ts +32 -0
- package/dist/index.js +152 -36
- package/dist/job-store.d.ts +43 -4
- package/dist/job-store.js +28 -2
- package/dist/mistral-meta-json-parser.d.ts +6 -0
- package/dist/mistral-meta-json-parser.js +175 -0
- package/dist/request-helpers.d.ts +14 -5
- package/dist/request-helpers.js +8 -5
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -10,6 +10,8 @@ import { executeCli, killAllProcessGroups } from "./executor.js";
|
|
|
10
10
|
import { parseStreamJson } from "./stream-json-parser.js";
|
|
11
11
|
import { parseCodexJsonStream } from "./codex-json-parser.js";
|
|
12
12
|
import { parseGeminiJson } from "./gemini-json-parser.js";
|
|
13
|
+
import { parseVibeMetaJson } from "./mistral-meta-json-parser.js";
|
|
14
|
+
import { homedir } from "os";
|
|
13
15
|
import { createSessionManager } from "./session-manager.js";
|
|
14
16
|
import { ResourceProvider } from "./resources.js";
|
|
15
17
|
import { PerformanceMetrics } from "./metrics.js";
|
|
@@ -17,7 +19,7 @@ import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse
|
|
|
17
19
|
import { loadConfig, loadPersistenceConfig, loadCacheAwarenessConfig, } from "./config.js";
|
|
18
20
|
import { checkHealth } from "./health.js";
|
|
19
21
|
import { clearModelRegistryCache, getAvailableCliInfo, getCliInfo, resolveModelAlias, } from "./model-registry.js";
|
|
20
|
-
import { AsyncJobManager } from "./async-job-manager.js";
|
|
22
|
+
import { AsyncJobManager, } from "./async-job-manager.js";
|
|
21
23
|
import { createJobStore } from "./job-store.js";
|
|
22
24
|
import { ApprovalManager } from "./approval-manager.js";
|
|
23
25
|
import { checkReviewIntegrity } from "./review-integrity.js";
|
|
@@ -213,10 +215,10 @@ function getJobStore(runtimeLogger = logger) {
|
|
|
213
215
|
}
|
|
214
216
|
return jobStore;
|
|
215
217
|
}
|
|
216
|
-
function newAsyncJobManager(metrics, runtimeLogger, store = getJobStore(runtimeLogger)) {
|
|
218
|
+
function newAsyncJobManager(metrics, runtimeLogger, store = getJobStore(runtimeLogger), fr = getFlightRecorder(runtimeLogger)) {
|
|
217
219
|
return new AsyncJobManager(runtimeLogger, (cli, durationMs, success) => {
|
|
218
220
|
metrics.recordRequest(cli, durationMs, success);
|
|
219
|
-
}, store);
|
|
221
|
+
}, store, fr);
|
|
220
222
|
}
|
|
221
223
|
function getAsyncJobManager(runtimeLogger = logger) {
|
|
222
224
|
asyncJobManager ??= newAsyncJobManager(performanceMetrics, runtimeLogger);
|
|
@@ -239,17 +241,19 @@ function resolveGatewayServerRuntime(deps = {}, options = {}) {
|
|
|
239
241
|
const runtimeSessionManager = deps.sessionManager ?? sessionManager;
|
|
240
242
|
const runtimePerformanceMetrics = deps.performanceMetrics ??
|
|
241
243
|
(options.isolateState ? new PerformanceMetrics() : performanceMetrics);
|
|
244
|
+
// Resolve flight recorder BEFORE async manager so isolateState managers
|
|
245
|
+
// can be wired with the same recorder instance the runtime exposes.
|
|
246
|
+
const runtimeFlightRecorder = deps.flightRecorder ?? getFlightRecorder(runtimeLogger);
|
|
242
247
|
const runtimeAsyncJobManager = deps.asyncJobManager ??
|
|
243
248
|
(options.isolateState
|
|
244
249
|
? // Factory-created test/HTTP session servers must not mark another instance's
|
|
245
250
|
// durable jobs orphaned. Stdio startup injects the process-global manager.
|
|
246
|
-
newAsyncJobManager(runtimePerformanceMetrics, runtimeLogger, null)
|
|
251
|
+
newAsyncJobManager(runtimePerformanceMetrics, runtimeLogger, null, runtimeFlightRecorder)
|
|
247
252
|
: getAsyncJobManager(runtimeLogger));
|
|
248
253
|
const runtimeApprovalManager = deps.approvalManager ??
|
|
249
254
|
(options.isolateState
|
|
250
255
|
? new ApprovalManager(undefined, runtimeLogger)
|
|
251
256
|
: getApprovalManager(runtimeLogger));
|
|
252
|
-
const runtimeFlightRecorder = deps.flightRecorder ?? getFlightRecorder(runtimeLogger);
|
|
253
257
|
return {
|
|
254
258
|
sessionManager: runtimeSessionManager,
|
|
255
259
|
resourceProvider: deps.resourceProvider ??
|
|
@@ -286,7 +290,16 @@ const SYNC_POLL_INTERVAL_MS = 1_000;
|
|
|
286
290
|
* Start an async job and poll until completion or deadline.
|
|
287
291
|
* Returns the job result if it finishes in time, or a deferral marker.
|
|
288
292
|
*/
|
|
289
|
-
async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete
|
|
293
|
+
async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete,
|
|
294
|
+
/**
|
|
295
|
+
* Slice 1.5: when the sync handler has already written a logStart row
|
|
296
|
+
* keyed on `corrId`, pass these so the manager can write logComplete
|
|
297
|
+
* (with usage extraction) when the underlying async job terminates —
|
|
298
|
+
* even if the sync handler returned a deferred response.
|
|
299
|
+
* `writeFlightStart` is NEVER true on this path: the sync handler is
|
|
300
|
+
* always the upstream logStart writer.
|
|
301
|
+
*/
|
|
302
|
+
flightRecorderEntry, extractUsage) {
|
|
290
303
|
// U26 fix: ownership of onComplete is a contract. Once this function returns
|
|
291
304
|
// OR throws, the caller MUST consider onComplete consumed — i.e. it has
|
|
292
305
|
// either been run, or the AsyncJobManager has taken ownership of it. The
|
|
@@ -336,6 +349,13 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, f
|
|
|
336
349
|
forceRefresh,
|
|
337
350
|
env,
|
|
338
351
|
onComplete,
|
|
352
|
+
// Sync-deferred path: the upstream sync handler already wrote
|
|
353
|
+
// logStart for this corrId, so writeFlightStart stays false. The
|
|
354
|
+
// manager still writes logComplete on terminal state (which UPDATEs
|
|
355
|
+
// the sync handler's row), closing the previously-orphaned
|
|
356
|
+
// sync-deferred case.
|
|
357
|
+
flightRecorderEntry,
|
|
358
|
+
extractUsage,
|
|
339
359
|
});
|
|
340
360
|
// Handoff succeeded: AsyncJobManager owns onComplete (it'll fire via
|
|
341
361
|
// fireOnComplete on terminal status, or run inline immediately for dedup).
|
|
@@ -369,7 +389,14 @@ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, f
|
|
|
369
389
|
}
|
|
370
390
|
await new Promise(resolve => setTimeout(resolve, SYNC_POLL_INTERVAL_MS));
|
|
371
391
|
}
|
|
372
|
-
// Deadline exceeded — return deferral
|
|
392
|
+
// Deadline exceeded — return deferral.
|
|
393
|
+
// R2 Codex-Unit-B F1: hand FR-complete ownership to the manager. Until
|
|
394
|
+
// this call, the manager skips writeFlightComplete on terminal so the
|
|
395
|
+
// sync handler's safeFlightComplete (with rich approvalDecision /
|
|
396
|
+
// optimizationApplied metadata) wins for sync-inline completions. From
|
|
397
|
+
// here on the sync handler returns deferred and will NOT write
|
|
398
|
+
// safeFlightComplete, so the manager must.
|
|
399
|
+
runtime.asyncJobManager.armFlightCompleteForDeferral(job.id);
|
|
373
400
|
runtime.logger.info(`[${corrId}] ${cli} sync deadline exceeded (${SYNC_DEADLINE_MS}ms), deferring to async job ${job.id}`);
|
|
374
401
|
return {
|
|
375
402
|
deferred: true,
|
|
@@ -452,7 +479,14 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
|
|
|
452
479
|
},
|
|
453
480
|
};
|
|
454
481
|
}
|
|
455
|
-
function extractUsageAndCost(cli, output, outputFormat
|
|
482
|
+
export function extractUsageAndCost(cli, output, outputFormat,
|
|
483
|
+
/**
|
|
484
|
+
* Optional context for off-stdout telemetry sources. Today only Mistral
|
|
485
|
+
* uses this — its meta.json lives on disk keyed by sessionId. Threading
|
|
486
|
+
* this in keeps the closure built by `buildAsyncFlightRecorderHandoff`
|
|
487
|
+
* primitives-only (no `params`/`prep` retention on AsyncJobRecord).
|
|
488
|
+
*/
|
|
489
|
+
ctx) {
|
|
456
490
|
if (cli === "claude" && outputFormat === "stream-json") {
|
|
457
491
|
const parsed = parseStreamJson(output);
|
|
458
492
|
if (!parsed.usage) {
|
|
@@ -490,11 +524,44 @@ function extractUsageAndCost(cli, output, outputFormat) {
|
|
|
490
524
|
cacheReadTokens: parsed.usage.cache_read_tokens,
|
|
491
525
|
};
|
|
492
526
|
}
|
|
493
|
-
// Mistral/Vibe:
|
|
494
|
-
//
|
|
495
|
-
//
|
|
527
|
+
// Mistral/Vibe: usage/cost live on disk in `~/.vibe/logs/session/<id>/meta.json`
|
|
528
|
+
// (Phase 4 slice β). Best-effort: if we don't know the sessionId (fresh
|
|
529
|
+
// session whose Vibe-assigned UUID we never observed) or the file is
|
|
530
|
+
// missing/malformed, the parser returns `{}` and the FR row simply lacks
|
|
531
|
+
// usage data — matching pre-slice behaviour. No stdout fallback exists.
|
|
532
|
+
if (cli === "mistral") {
|
|
533
|
+
return parseVibeMetaJson(ctx?.home ?? homedir(), ctx?.sessionId);
|
|
534
|
+
}
|
|
496
535
|
return {};
|
|
497
536
|
}
|
|
537
|
+
/**
|
|
538
|
+
* Slice 1.5: build the async-job-manager's FR payload from a prep object
|
|
539
|
+
* (which every prepare*Request returns), plus the bound CLI and output
|
|
540
|
+
* format primitives needed by extractUsageAndCost. Returning the closure
|
|
541
|
+
* separately means it captures `cliName` and `fmt` ONLY — never `params`
|
|
542
|
+
* or `prep` — so retention on AsyncJobRecord is O(constant).
|
|
543
|
+
*/
|
|
544
|
+
function buildAsyncFlightRecorderHandoff(cliName, prep, sessionId, outputFormat) {
|
|
545
|
+
// Extract primitives BEFORE building the closure — capturing `prep` or
|
|
546
|
+
// `params` directly would pin large attachments / promptParts on the
|
|
547
|
+
// AsyncJobRecord for JOB_TTL_MS. Phase 4 slice β: `sid` and `home` are
|
|
548
|
+
// primitives too, threaded through so the Mistral branch of
|
|
549
|
+
// extractUsageAndCost can read `~/.vibe/logs/session/<id>/meta.json`.
|
|
550
|
+
const cli = cliName;
|
|
551
|
+
const fmt = outputFormat;
|
|
552
|
+
const sid = sessionId;
|
|
553
|
+
const home = homedir();
|
|
554
|
+
return {
|
|
555
|
+
flightRecorderEntry: {
|
|
556
|
+
model: prep.resolvedModel || "default",
|
|
557
|
+
prompt: prep.effectivePrompt,
|
|
558
|
+
sessionId,
|
|
559
|
+
stablePrefixHash: prep.stablePrefixHash ?? undefined,
|
|
560
|
+
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
561
|
+
},
|
|
562
|
+
extractUsage: (stdout) => extractUsageAndCost(cli, stdout, fmt, { sessionId: sid, home }),
|
|
563
|
+
};
|
|
564
|
+
}
|
|
498
565
|
function safeFlightStart(entry, runtime = resolveGatewayServerRuntime()) {
|
|
499
566
|
try {
|
|
500
567
|
runtime.flightRecorder.logStart(entry);
|
|
@@ -1032,11 +1099,12 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
1032
1099
|
args.push("--json");
|
|
1033
1100
|
}
|
|
1034
1101
|
args.push("--skip-git-repo-check");
|
|
1035
|
-
// U26: High-impact feature flags.
|
|
1036
|
-
//
|
|
1037
|
-
// only emit
|
|
1038
|
-
//
|
|
1039
|
-
//
|
|
1102
|
+
// U26: High-impact feature flags. `--search` is rejected by
|
|
1103
|
+
// `codex exec resume` (resume inherits the original session's web-search
|
|
1104
|
+
// state), so we only emit it on a NEW session. `--output-schema`,
|
|
1105
|
+
// `-c key=value`, profile, ephemeral, images, and the ignore-* flags are
|
|
1106
|
+
// all accepted on resume per `codex exec resume --help` (codex-cli 0.133.0)
|
|
1107
|
+
// and are emitted in both branches.
|
|
1040
1108
|
let highImpactCleanup;
|
|
1041
1109
|
if (sessionPlan.mode === "new") {
|
|
1042
1110
|
const high = prepareCodexHighImpactFlags({
|
|
@@ -1056,12 +1124,10 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
1056
1124
|
highImpactCleanup = high.cleanup;
|
|
1057
1125
|
}
|
|
1058
1126
|
else {
|
|
1059
|
-
// On resume, emit only the resume-safe subset (profile, ephemeral,
|
|
1060
|
-
// images, ignoreUserConfig, ignoreRules). outputSchema, search, and
|
|
1061
|
-
// configOverrides are dropped silently to mirror existing behavior for
|
|
1062
|
-
// sandbox/ask-for-approval on resume.
|
|
1063
1127
|
const high = prepareCodexHighImpactFlags({
|
|
1128
|
+
outputSchema: params.outputSchema,
|
|
1064
1129
|
profile: params.profile,
|
|
1130
|
+
configOverrides: params.configOverrides,
|
|
1065
1131
|
ephemeral: params.ephemeral,
|
|
1066
1132
|
images: params.images,
|
|
1067
1133
|
ignoreUserConfig: params.ignoreUserConfig,
|
|
@@ -1191,6 +1257,10 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1191
1257
|
if (params.outputFormat === "json") {
|
|
1192
1258
|
args.push("-o", "json");
|
|
1193
1259
|
}
|
|
1260
|
+
// Phase 4 slice γ: opt-in trust-prompt bypass for fresh workspaces.
|
|
1261
|
+
if (params.skipTrust) {
|
|
1262
|
+
args.push("--skip-trust");
|
|
1263
|
+
}
|
|
1194
1264
|
return {
|
|
1195
1265
|
corrId,
|
|
1196
1266
|
effectivePrompt,
|
|
@@ -1362,6 +1432,7 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
|
|
|
1362
1432
|
reasoningEffort: params.reasoningEffort,
|
|
1363
1433
|
allowedTools: params.allowedTools,
|
|
1364
1434
|
disallowedTools: params.disallowedTools,
|
|
1435
|
+
trust: params.trust,
|
|
1365
1436
|
});
|
|
1366
1437
|
if (prep.ignoredDisallowedTools) {
|
|
1367
1438
|
runtime.logger.info(`[${corrId}] Mistral does not support disallowedTools; ignoring (caller passed ${params.disallowedTools?.length ?? 0} entries)`);
|
|
@@ -1417,7 +1488,10 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
|
|
|
1417
1488
|
correlationId: corrId,
|
|
1418
1489
|
sessionId: sessionId || null,
|
|
1419
1490
|
durationMs,
|
|
1420
|
-
|
|
1491
|
+
// Phase 4 slice β: thread sessionId + home so the Mistral branch of
|
|
1492
|
+
// extractUsageAndCost can read `~/.vibe/logs/session/<dir>/meta.json`.
|
|
1493
|
+
// Other CLIs ignore the ctx (their usage source is stdout).
|
|
1494
|
+
...extractUsageAndCost(cli, stdout, outputFormat, { sessionId, home: homedir() }),
|
|
1421
1495
|
exitCode: 0,
|
|
1422
1496
|
retryCount: 0,
|
|
1423
1497
|
},
|
|
@@ -1515,6 +1589,7 @@ export async function handleGeminiRequest(deps, params) {
|
|
|
1515
1589
|
policyFiles: params.policyFiles,
|
|
1516
1590
|
adminPolicyFiles: params.adminPolicyFiles,
|
|
1517
1591
|
attachments: params.attachments,
|
|
1592
|
+
skipTrust: params.skipTrust,
|
|
1518
1593
|
}, runtime);
|
|
1519
1594
|
if (!("args" in prep))
|
|
1520
1595
|
return prep;
|
|
@@ -1542,7 +1617,8 @@ export async function handleGeminiRequest(deps, params) {
|
|
|
1542
1617
|
args.push(...sessionPlan.args);
|
|
1543
1618
|
const userProvidedSession = sessionPlan.resumed;
|
|
1544
1619
|
const effectiveSessionIdHint = sessionPlan.resumed ? params.sessionId : undefined;
|
|
1545
|
-
const
|
|
1620
|
+
const geminiFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, params.sessionId, params.outputFormat);
|
|
1621
|
+
const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, geminiFrHandoff.flightRecorderEntry, geminiFrHandoff.extractUsage);
|
|
1546
1622
|
// Deferred — job still running, return async reference
|
|
1547
1623
|
if (isDeferredResponse(result)) {
|
|
1548
1624
|
return buildDeferredToolResponse(result, effectiveSessionIdHint);
|
|
@@ -1642,6 +1718,7 @@ export async function handleGeminiRequestAsync(deps, params) {
|
|
|
1642
1718
|
policyFiles: params.policyFiles,
|
|
1643
1719
|
adminPolicyFiles: params.adminPolicyFiles,
|
|
1644
1720
|
attachments: params.attachments,
|
|
1721
|
+
skipTrust: params.skipTrust,
|
|
1645
1722
|
}, runtime);
|
|
1646
1723
|
if (!("args" in prep))
|
|
1647
1724
|
return prep;
|
|
@@ -1675,7 +1752,10 @@ export async function handleGeminiRequestAsync(deps, params) {
|
|
|
1675
1752
|
// surfaces it in the snapshot).
|
|
1676
1753
|
assertUpstreamCliArgs("gemini", args);
|
|
1677
1754
|
assertUpstreamCliEnv("gemini", undefined);
|
|
1678
|
-
|
|
1755
|
+
// Slice 1.5: pure async path — no upstream safeFlightStart, so the
|
|
1756
|
+
// manager owns both logStart and logComplete for this corrId.
|
|
1757
|
+
const geminiAsyncFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, effectiveSessionId, params.outputFormat);
|
|
1758
|
+
const job = deps.asyncJobManager.startJob("gemini", args, corrId, undefined, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, geminiAsyncFrHandoff.flightRecorderEntry, geminiAsyncFrHandoff.extractUsage, true);
|
|
1679
1759
|
deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
|
|
1680
1760
|
const asyncResponse = {
|
|
1681
1761
|
success: true,
|
|
@@ -1745,7 +1825,8 @@ export async function handleGrokRequest(deps, params) {
|
|
|
1745
1825
|
createNewSession: params.createNewSession,
|
|
1746
1826
|
});
|
|
1747
1827
|
args.push(...sessionResult.resumeArgs);
|
|
1748
|
-
const
|
|
1828
|
+
const grokFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, params.sessionId, params.outputFormat);
|
|
1829
|
+
const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, grokFrHandoff.flightRecorderEntry, grokFrHandoff.extractUsage);
|
|
1749
1830
|
// Deferred — job still running, return async reference
|
|
1750
1831
|
if (isDeferredResponse(result)) {
|
|
1751
1832
|
return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
|
|
@@ -1875,7 +1956,8 @@ export async function handleGrokRequestAsync(deps, params) {
|
|
|
1875
1956
|
// Start job only after all session I/O succeeds
|
|
1876
1957
|
assertUpstreamCliArgs("grok", args);
|
|
1877
1958
|
assertUpstreamCliEnv("grok", undefined);
|
|
1878
|
-
const
|
|
1959
|
+
const grokAsyncFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, effectiveSessionId, params.outputFormat);
|
|
1960
|
+
const job = deps.asyncJobManager.startJob("grok", args, corrId, undefined, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, grokAsyncFrHandoff.flightRecorderEntry, grokAsyncFrHandoff.extractUsage, true);
|
|
1879
1961
|
deps.logger.info(`[${corrId}] grok_request_async started job ${job.id}`);
|
|
1880
1962
|
const asyncResponse = {
|
|
1881
1963
|
success: true,
|
|
@@ -1920,6 +2002,7 @@ export async function handleMistralRequest(deps, params) {
|
|
|
1920
2002
|
correlationId: params.correlationId,
|
|
1921
2003
|
optimizePrompt: params.optimizePrompt,
|
|
1922
2004
|
operation: "mistral_request",
|
|
2005
|
+
trust: params.trust,
|
|
1923
2006
|
}, runtime);
|
|
1924
2007
|
if (!("args" in prep))
|
|
1925
2008
|
return prep;
|
|
@@ -1943,7 +2026,8 @@ export async function handleMistralRequest(deps, params) {
|
|
|
1943
2026
|
createNewSession: params.createNewSession,
|
|
1944
2027
|
});
|
|
1945
2028
|
args.push(...sessionResult.resumeArgs);
|
|
1946
|
-
|
|
2029
|
+
const mistralFrHandoff = buildAsyncFlightRecorderHandoff("mistral", prep, params.sessionId, params.outputFormat);
|
|
2030
|
+
let result = await awaitJobOrDefer("mistral", args, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, mistralEnv, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage);
|
|
1947
2031
|
if (isDeferredResponse(result)) {
|
|
1948
2032
|
return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
|
|
1949
2033
|
}
|
|
@@ -1962,9 +2046,15 @@ export async function handleMistralRequest(deps, params) {
|
|
|
1962
2046
|
reasoningEffort: params.reasoningEffort,
|
|
1963
2047
|
allowedTools: params.allowedTools,
|
|
1964
2048
|
disallowedTools: params.disallowedTools,
|
|
2049
|
+
// Phase 4 slice γ: preserve --trust on the model-selection retry
|
|
2050
|
+
// so a fresh untrusted workspace doesn't block headlessly on the
|
|
2051
|
+
// second attempt after surviving the first.
|
|
2052
|
+
trust: params.trust,
|
|
1965
2053
|
});
|
|
1966
2054
|
const retryArgs = [...retryPrep.args, ...sessionResult.resumeArgs];
|
|
1967
|
-
|
|
2055
|
+
// Reuse the FR handoff built above — the retry preserves corrId,
|
|
2056
|
+
// so the manager's logComplete still updates the original row.
|
|
2057
|
+
result = await awaitJobOrDefer("mistral", retryArgs, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, true, runtime, retryPrep.env, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage);
|
|
1968
2058
|
if (isDeferredResponse(result)) {
|
|
1969
2059
|
return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
|
|
1970
2060
|
}
|
|
@@ -2060,6 +2150,7 @@ export async function handleMistralRequestAsync(deps, params) {
|
|
|
2060
2150
|
correlationId: params.correlationId,
|
|
2061
2151
|
optimizePrompt: params.optimizePrompt,
|
|
2062
2152
|
operation: "mistral_request_async",
|
|
2153
|
+
trust: params.trust,
|
|
2063
2154
|
}, runtime);
|
|
2064
2155
|
if (!("args" in prep))
|
|
2065
2156
|
return prep;
|
|
@@ -2092,7 +2183,8 @@ export async function handleMistralRequestAsync(deps, params) {
|
|
|
2092
2183
|
}
|
|
2093
2184
|
assertUpstreamCliArgs("mistral", args);
|
|
2094
2185
|
assertUpstreamCliEnv("mistral", mistralEnv);
|
|
2095
|
-
const
|
|
2186
|
+
const mistralAsyncFrHandoff = buildAsyncFlightRecorderHandoff("mistral", prep, effectiveSessionId, params.outputFormat);
|
|
2187
|
+
const job = deps.asyncJobManager.startJob("mistral", args, corrId, undefined, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, mistralEnv, undefined, mistralAsyncFrHandoff.flightRecorderEntry, mistralAsyncFrHandoff.extractUsage, true);
|
|
2096
2188
|
deps.logger.info(`[${corrId}] mistral_request_async started job ${job.id}`);
|
|
2097
2189
|
const asyncResponse = {
|
|
2098
2190
|
success: true,
|
|
@@ -2193,9 +2285,10 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
2193
2285
|
// registering the record, ownership stays here and we run it in the catch.
|
|
2194
2286
|
assertUpstreamCliArgs("codex", args);
|
|
2195
2287
|
assertUpstreamCliEnv("codex", undefined);
|
|
2288
|
+
const codexAsyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, effectiveSessionId, params.outputFormat);
|
|
2196
2289
|
let job;
|
|
2197
2290
|
try {
|
|
2198
|
-
job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup);
|
|
2291
|
+
job = deps.asyncJobManager.startJob("codex", args, corrId, undefined, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup, codexAsyncFrHandoff.flightRecorderEntry, codexAsyncFrHandoff.extractUsage, true);
|
|
2199
2292
|
// Handoff succeeded: AsyncJobManager will fire prepCleanup on terminal
|
|
2200
2293
|
// status. Release our local ownership claim so the catch path doesn't
|
|
2201
2294
|
// double-fire.
|
|
@@ -2461,7 +2554,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
2461
2554
|
}
|
|
2462
2555
|
// Idle timeout only for stream-json (text/json produce no output until done)
|
|
2463
2556
|
const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
|
|
2464
|
-
const
|
|
2557
|
+
const claudeSyncFrHandoff = buildAsyncFlightRecorderHandoff("claude", prep, effectiveSessionId, outputFormat);
|
|
2558
|
+
const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh, runtime, undefined, undefined, claudeSyncFrHandoff.flightRecorderEntry, claudeSyncFrHandoff.extractUsage);
|
|
2465
2559
|
// Deferred — job still running, return async reference
|
|
2466
2560
|
if (isDeferredResponse(result)) {
|
|
2467
2561
|
return buildDeferredToolResponse(result, effectiveSessionId);
|
|
@@ -2703,7 +2797,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
2703
2797
|
// completion or deferred). The outer finally MUST NOT clean again.
|
|
2704
2798
|
const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
|
|
2705
2799
|
try {
|
|
2706
|
-
const
|
|
2800
|
+
const codexSyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, sessionId, outputFormat);
|
|
2801
|
+
const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), outputFormat, forceRefresh, runtime, undefined, prepCleanup, codexSyncFrHandoff.flightRecorderEntry, codexSyncFrHandoff.extractUsage);
|
|
2707
2802
|
// Deferred — job still running, return async reference. Cleanup
|
|
2708
2803
|
// ownership belongs to AsyncJobManager via onComplete.
|
|
2709
2804
|
if (isDeferredResponse(result)) {
|
|
@@ -2944,7 +3039,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
2944
3039
|
policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
|
|
2945
3040
|
adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
|
|
2946
3041
|
attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
|
|
2947
|
-
|
|
3042
|
+
skipTrust: z
|
|
3043
|
+
.boolean()
|
|
3044
|
+
.default(false)
|
|
3045
|
+
.describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
|
|
3046
|
+
}, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, }) => {
|
|
2948
3047
|
return handleGeminiRequest({ sessionManager, logger, runtime }, {
|
|
2949
3048
|
prompt,
|
|
2950
3049
|
promptParts,
|
|
@@ -2968,6 +3067,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
2968
3067
|
policyFiles,
|
|
2969
3068
|
adminPolicyFiles,
|
|
2970
3069
|
attachments,
|
|
3070
|
+
skipTrust,
|
|
2971
3071
|
});
|
|
2972
3072
|
});
|
|
2973
3073
|
//──────────────────────────────────────────────────────────────────────────────
|
|
@@ -3138,7 +3238,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
3138
3238
|
.boolean()
|
|
3139
3239
|
.default(false)
|
|
3140
3240
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3141
|
-
|
|
3241
|
+
trust: z
|
|
3242
|
+
.boolean()
|
|
3243
|
+
.default(false)
|
|
3244
|
+
.describe("Emit `--trust` so Vibe trusts the cwd for this invocation only (not persisted to trusted_folders.toml) and skips the interactive trust prompt (Phase 4 slice γ)."),
|
|
3245
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, }) => {
|
|
3142
3246
|
return handleMistralRequest({ sessionManager, logger, runtime }, {
|
|
3143
3247
|
prompt,
|
|
3144
3248
|
promptParts,
|
|
@@ -3160,6 +3264,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3160
3264
|
optimizeResponse,
|
|
3161
3265
|
idleTimeoutMs,
|
|
3162
3266
|
forceRefresh,
|
|
3267
|
+
trust,
|
|
3163
3268
|
});
|
|
3164
3269
|
});
|
|
3165
3270
|
//──────────────────────────────────────────────────────────────────────────────
|
|
@@ -3344,7 +3449,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
3344
3449
|
: undefined;
|
|
3345
3450
|
assertUpstreamCliArgs("claude", args);
|
|
3346
3451
|
assertUpstreamCliEnv("claude", undefined);
|
|
3347
|
-
const
|
|
3452
|
+
const claudeAsyncFrHandoff = buildAsyncFlightRecorderHandoff("claude", prep, effectiveSessionId, outputFormat);
|
|
3453
|
+
const job = asyncJobManager.startJob("claude", args, corrId, undefined, effectiveIdleTimeout, outputFormat, forceRefresh, undefined, undefined, claudeAsyncFrHandoff.flightRecorderEntry, claudeAsyncFrHandoff.extractUsage, true);
|
|
3348
3454
|
logger.info(`[${corrId}] claude_request_async started job ${job.id}, outputFormat=${outputFormat}`);
|
|
3349
3455
|
const asyncResponse = {
|
|
3350
3456
|
success: true,
|
|
@@ -3549,7 +3655,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
3549
3655
|
policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
|
|
3550
3656
|
adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
|
|
3551
3657
|
attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
|
|
3552
|
-
|
|
3658
|
+
skipTrust: z
|
|
3659
|
+
.boolean()
|
|
3660
|
+
.default(false)
|
|
3661
|
+
.describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
|
|
3662
|
+
}, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, }) => {
|
|
3553
3663
|
return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3554
3664
|
prompt,
|
|
3555
3665
|
promptParts,
|
|
@@ -3572,6 +3682,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3572
3682
|
policyFiles,
|
|
3573
3683
|
adminPolicyFiles,
|
|
3574
3684
|
attachments,
|
|
3685
|
+
skipTrust,
|
|
3575
3686
|
});
|
|
3576
3687
|
});
|
|
3577
3688
|
server.tool("grok_request_async", {
|
|
@@ -3733,7 +3844,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
3733
3844
|
.boolean()
|
|
3734
3845
|
.default(false)
|
|
3735
3846
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3736
|
-
|
|
3847
|
+
trust: z
|
|
3848
|
+
.boolean()
|
|
3849
|
+
.default(false)
|
|
3850
|
+
.describe("Emit `--trust` so Vibe trusts the cwd for this invocation only (not persisted to trusted_folders.toml) and skips the interactive trust prompt (Phase 4 slice γ)."),
|
|
3851
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, trust, }) => {
|
|
3737
3852
|
return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3738
3853
|
prompt,
|
|
3739
3854
|
promptParts,
|
|
@@ -3754,6 +3869,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3754
3869
|
optimizePrompt,
|
|
3755
3870
|
idleTimeoutMs,
|
|
3756
3871
|
forceRefresh,
|
|
3872
|
+
trust,
|
|
3757
3873
|
});
|
|
3758
3874
|
});
|
|
3759
3875
|
server.tool("llm_job_status", {
|
package/dist/job-store.d.ts
CHANGED
|
@@ -51,10 +51,35 @@ export interface JobStore {
|
|
|
51
51
|
}): void;
|
|
52
52
|
getById(id: string): JobRecord | null;
|
|
53
53
|
findByRequestKey(requestKey: string): JobRecord | null;
|
|
54
|
-
|
|
54
|
+
/**
|
|
55
|
+
* Flip every `status='running'` row to `'orphaned'` at gateway boot.
|
|
56
|
+
*
|
|
57
|
+
* Returns the row count AND a snapshot of every row that was flipped, so
|
|
58
|
+
* AsyncJobManager can write a flight-recorder logComplete with the full
|
|
59
|
+
* sync-helper-equivalent payload (response from stderr||stdout,
|
|
60
|
+
* durationMs from startedAt). Pre-slice-1.5 rows that never wrote a
|
|
61
|
+
* logStart degrade silently to a no-op UPDATE inside the FR.
|
|
62
|
+
*/
|
|
63
|
+
markOrphanedOnStartup(): {
|
|
64
|
+
count: number;
|
|
65
|
+
orphaned: Array<OrphanedJobSnapshot>;
|
|
66
|
+
};
|
|
55
67
|
evictExpired(): number;
|
|
56
68
|
close(): void;
|
|
57
69
|
}
|
|
70
|
+
/**
|
|
71
|
+
* Per-orphan snapshot returned by `markOrphanedOnStartup` so the
|
|
72
|
+
* AsyncJobManager constructor can build a faithful FlightLogResult for
|
|
73
|
+
* each row it flipped.
|
|
74
|
+
*/
|
|
75
|
+
export interface OrphanedJobSnapshot {
|
|
76
|
+
id: string;
|
|
77
|
+
correlationId: string;
|
|
78
|
+
startedAt: string;
|
|
79
|
+
stdout: string;
|
|
80
|
+
stderr: string;
|
|
81
|
+
exitCode: number | null;
|
|
82
|
+
}
|
|
58
83
|
/**
|
|
59
84
|
* SQLite-backed job store. Default backend for production. Durable across
|
|
60
85
|
* gateway restarts; safe for single-instance deployments.
|
|
@@ -69,6 +94,7 @@ export declare class SqliteJobStore implements JobStore {
|
|
|
69
94
|
private updateCompleteStmt;
|
|
70
95
|
private getByIdStmt;
|
|
71
96
|
private findByRequestKeyStmt;
|
|
97
|
+
private selectRunningOrphansStmt;
|
|
72
98
|
private markOrphanedStmt;
|
|
73
99
|
private deleteExpiredStmt;
|
|
74
100
|
constructor(dbPath: string, logger?: Logger, options?: {
|
|
@@ -114,8 +140,15 @@ export declare class SqliteJobStore implements JobStore {
|
|
|
114
140
|
/**
|
|
115
141
|
* On gateway boot, flip any jobs that were 'running' to 'orphaned'.
|
|
116
142
|
* The child processes were detached but can't be reattached to in this process.
|
|
143
|
+
*
|
|
144
|
+
* Returns the row count + a per-orphan snapshot so AsyncJobManager can
|
|
145
|
+
* write a flight-recorder logComplete with proper audit data
|
|
146
|
+
* (durationMs from startedAt, response from stderr||stdout).
|
|
117
147
|
*/
|
|
118
|
-
markOrphanedOnStartup():
|
|
148
|
+
markOrphanedOnStartup(): {
|
|
149
|
+
count: number;
|
|
150
|
+
orphaned: Array<OrphanedJobSnapshot>;
|
|
151
|
+
};
|
|
119
152
|
/**
|
|
120
153
|
* Delete rows whose expires_at has passed. Returns number of rows deleted.
|
|
121
154
|
*/
|
|
@@ -171,7 +204,10 @@ export declare class MemoryJobStore implements JobStore {
|
|
|
171
204
|
* In-memory stores have no cross-process state, so any "running" rows here
|
|
172
205
|
* came from this very process and aren't actually orphaned. No-op.
|
|
173
206
|
*/
|
|
174
|
-
markOrphanedOnStartup():
|
|
207
|
+
markOrphanedOnStartup(): {
|
|
208
|
+
count: number;
|
|
209
|
+
orphaned: Array<OrphanedJobSnapshot>;
|
|
210
|
+
};
|
|
175
211
|
evictExpired(): number;
|
|
176
212
|
close(): void;
|
|
177
213
|
}
|
|
@@ -188,7 +224,10 @@ export declare class PostgresJobStore implements JobStore {
|
|
|
188
224
|
recordComplete(): void;
|
|
189
225
|
getById(): JobRecord | null;
|
|
190
226
|
findByRequestKey(): JobRecord | null;
|
|
191
|
-
markOrphanedOnStartup():
|
|
227
|
+
markOrphanedOnStartup(): {
|
|
228
|
+
count: number;
|
|
229
|
+
orphaned: Array<OrphanedJobSnapshot>;
|
|
230
|
+
};
|
|
192
231
|
evictExpired(): number;
|
|
193
232
|
close(): void;
|
|
194
233
|
}
|
package/dist/job-store.js
CHANGED
|
@@ -73,6 +73,7 @@ export class SqliteJobStore {
|
|
|
73
73
|
updateCompleteStmt;
|
|
74
74
|
getByIdStmt;
|
|
75
75
|
findByRequestKeyStmt;
|
|
76
|
+
selectRunningOrphansStmt;
|
|
76
77
|
markOrphanedStmt;
|
|
77
78
|
deleteExpiredStmt;
|
|
78
79
|
constructor(dbPath, logger = noopLogger, options = {}) {
|
|
@@ -148,6 +149,16 @@ export class SqliteJobStore {
|
|
|
148
149
|
AND status IN ('running', 'completed')
|
|
149
150
|
ORDER BY started_at DESC
|
|
150
151
|
LIMIT 1
|
|
152
|
+
`);
|
|
153
|
+
// Snapshot every in-flight row's audit data BEFORE the orphan-flip
|
|
154
|
+
// UPDATE so AsyncJobManager can construct a full FlightLogResult per
|
|
155
|
+
// orphan. No transaction wrapper required: gateway boot is
|
|
156
|
+
// single-threaded before any new jobs can arrive, so no
|
|
157
|
+
// status='running' row can be inserted between this SELECT and the
|
|
158
|
+
// UPDATE below.
|
|
159
|
+
this.selectRunningOrphansStmt = this.db.prepare(`
|
|
160
|
+
SELECT id, correlation_id, started_at, stdout, stderr, exit_code
|
|
161
|
+
FROM jobs WHERE status = 'running'
|
|
151
162
|
`);
|
|
152
163
|
this.markOrphanedStmt = this.db.prepare(`
|
|
153
164
|
UPDATE jobs
|
|
@@ -227,14 +238,29 @@ export class SqliteJobStore {
|
|
|
227
238
|
/**
|
|
228
239
|
* On gateway boot, flip any jobs that were 'running' to 'orphaned'.
|
|
229
240
|
* The child processes were detached but can't be reattached to in this process.
|
|
241
|
+
*
|
|
242
|
+
* Returns the row count + a per-orphan snapshot so AsyncJobManager can
|
|
243
|
+
* write a flight-recorder logComplete with proper audit data
|
|
244
|
+
* (durationMs from startedAt, response from stderr||stdout).
|
|
230
245
|
*/
|
|
231
246
|
markOrphanedOnStartup() {
|
|
232
247
|
const now = new Date().toISOString();
|
|
233
248
|
// Orphaned jobs retain a short window so callers can fetch the partial output,
|
|
234
249
|
// then evict. Reuse the standard retention.
|
|
235
250
|
const expiresAt = new Date(Date.now() + this.retentionMs).toISOString();
|
|
251
|
+
// SELECT before UPDATE — gateway boot is single-threaded so no row can
|
|
252
|
+
// appear in 'running' between the two statements.
|
|
253
|
+
const rows = (this.selectRunningOrphansStmt.all?.() ?? []);
|
|
254
|
+
const orphaned = rows.map(row => ({
|
|
255
|
+
id: row.id,
|
|
256
|
+
correlationId: row.correlation_id,
|
|
257
|
+
startedAt: row.started_at,
|
|
258
|
+
stdout: row.stdout ?? "",
|
|
259
|
+
stderr: row.stderr ?? "",
|
|
260
|
+
exitCode: row.exit_code,
|
|
261
|
+
}));
|
|
236
262
|
const result = this.markOrphanedStmt.run(now, expiresAt);
|
|
237
|
-
return result?.changes ?? 0;
|
|
263
|
+
return { count: result?.changes ?? 0, orphaned };
|
|
238
264
|
}
|
|
239
265
|
/**
|
|
240
266
|
* Delete rows whose expires_at has passed. Returns number of rows deleted.
|
|
@@ -341,7 +367,7 @@ export class MemoryJobStore {
|
|
|
341
367
|
* came from this very process and aren't actually orphaned. No-op.
|
|
342
368
|
*/
|
|
343
369
|
markOrphanedOnStartup() {
|
|
344
|
-
return 0;
|
|
370
|
+
return { count: 0, orphaned: [] };
|
|
345
371
|
}
|
|
346
372
|
evictExpired() {
|
|
347
373
|
const nowIso = new Date().toISOString();
|