llm-cli-gateway 1.17.3 → 1.17.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +45 -0
- package/README.md +1 -1
- package/dist/approval-manager.js +0 -8
- package/dist/async-job-manager.d.ts +0 -113
- package/dist/async-job-manager.js +6 -124
- package/dist/cache-stats.d.ts +0 -89
- package/dist/cache-stats.js +0 -62
- package/dist/claude-mcp-config.js +0 -1
- package/dist/cli-updater.d.ts +0 -8
- package/dist/cli-updater.js +0 -12
- package/dist/codex-json-parser.d.ts +0 -20
- package/dist/codex-json-parser.js +0 -21
- package/dist/config.d.ts +0 -31
- package/dist/config.js +2 -72
- package/dist/db.d.ts +0 -18
- package/dist/db.js +0 -22
- package/dist/doctor.d.ts +0 -49
- package/dist/doctor.js +0 -47
- package/dist/endpoint-exposure.js +0 -1
- package/dist/executor.d.ts +0 -19
- package/dist/executor.js +3 -38
- package/dist/flight-recorder.d.ts +0 -26
- package/dist/flight-recorder.js +1 -70
- package/dist/gemini-json-parser.d.ts +0 -25
- package/dist/gemini-json-parser.js +0 -28
- package/dist/health.d.ts +0 -3
- package/dist/health.js +0 -3
- package/dist/index.d.ts +12 -208
- package/dist/index.js +116 -588
- package/dist/job-store.d.ts +0 -74
- package/dist/job-store.js +1 -73
- package/dist/logger.d.ts +0 -7
- package/dist/logger.js +0 -6
- package/dist/migrate-sessions.d.ts +0 -3
- package/dist/migrate-sessions.js +0 -16
- package/dist/migrate.js +1 -18
- package/dist/mistral-meta-json-parser.js +0 -67
- package/dist/model-registry.js +0 -13
- package/dist/pricing.d.ts +0 -46
- package/dist/pricing.js +0 -47
- package/dist/process-monitor.d.ts +0 -15
- package/dist/process-monitor.js +2 -31
- package/dist/prompt-parts.d.ts +6 -31
- package/dist/prompt-parts.js +0 -11
- package/dist/provider-status.d.ts +0 -8
- package/dist/provider-status.js +0 -11
- package/dist/request-helpers.d.ts +4 -316
- package/dist/request-helpers.js +13 -231
- package/dist/resources.d.ts +0 -20
- package/dist/resources.js +1 -34
- package/dist/retry.d.ts +0 -45
- package/dist/retry.js +3 -40
- package/dist/session-manager-pg.d.ts +0 -32
- package/dist/session-manager-pg.js +0 -32
- package/dist/session-manager.d.ts +0 -21
- package/dist/session-manager.js +1 -15
- package/dist/stream-json-parser.d.ts +0 -18
- package/dist/stream-json-parser.js +0 -22
- package/dist/upstream-contracts.d.ts +0 -55
- package/dist/upstream-contracts.js +86 -64
- package/dist/validation-orchestrator.js +0 -3
- package/dist/worktree-manager.d.ts +0 -9
- package/dist/worktree-manager.js +0 -21
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,51 @@ All notable changes to the llm-cli-gateway project.
|
|
|
4
4
|
|
|
5
5
|
## Unreleased
|
|
6
6
|
|
|
7
|
+
## [1.17.5] - 2026-06-02: Socket networkAccess cleanup
|
|
8
|
+
|
|
9
|
+
Patch release that stops the recurring Socket `networkAccess` (`globalThis["fetch"]`)
|
|
10
|
+
false-positive on the published package.
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
|
|
14
|
+
- The build now strips comments from the published `dist/*.js` (`removeComments`),
|
|
15
|
+
and the word "fetch" no longer appears in any shipped source. Socket's
|
|
16
|
+
`networkAccess` heuristic scans shipped comments and descriptions, and a stray
|
|
17
|
+
"fetch" in a JSDoc kept tripping a `globalThis["fetch"]` alert that the 1.17.3
|
|
18
|
+
reword only partly addressed. The `shellAccess` (`child_process`) alert on
|
|
19
|
+
`executor.js` / `worktree-manager.js` is inherent to spawning the provider CLIs
|
|
20
|
+
and git and is unchanged.
|
|
21
|
+
|
|
22
|
+
## [1.17.4] - 2026-06-02: upstream contract compatibility
|
|
23
|
+
|
|
24
|
+
Patch release that realigns the provider CLI contracts with the currently
|
|
25
|
+
installed binaries (codex 0.135.0, grok 0.2.16, gemini 0.44.1, claude 2.1.159,
|
|
26
|
+
vibe 2.12.1).
|
|
27
|
+
|
|
28
|
+
### Fixed
|
|
29
|
+
|
|
30
|
+
- Mistral: dropped the unsupported `--effort` / `--reasoning-effort` surface.
|
|
31
|
+
vibe 2.x argparse rejects both flags, so any `mistral_request` that passed
|
|
32
|
+
`effort` / `reasoningEffort` failed before reaching the model. Locked out with
|
|
33
|
+
two `expect:fail` conformance fixtures and a builder guard test.
|
|
34
|
+
|
|
35
|
+
### Added
|
|
36
|
+
|
|
37
|
+
- Grok: `--compaction-mode` (summary|transcript|segments) and
|
|
38
|
+
`--compaction-detail` (none|minimal|balanced|verbose) context controls, wired
|
|
39
|
+
as enum passthrough flags on `grok_request` / `grok_request_async`.
|
|
40
|
+
- Gemini: a `yolo` boolean that emits `--yolo` (auto-approve all actions). It
|
|
41
|
+
routes through the mcp_managed approval gate and is never emitted alongside
|
|
42
|
+
`--approval-mode yolo`.
|
|
43
|
+
- Claude: `--no-session-persistence`, `--setting-sources`, `--settings`, and
|
|
44
|
+
`--tools` exposed through `prepareClaudeHighImpactFlags`. `--betas` is left
|
|
45
|
+
out on purpose, since it is API-key only and the gateway runs Claude via OAuth.
|
|
46
|
+
|
|
47
|
+
### Notes
|
|
48
|
+
|
|
49
|
+
- Documented `--max-turns` as a known `--probe-installed` false-positive: claude
|
|
50
|
+
2.x hides it from `--help` but still accepts it.
|
|
51
|
+
|
|
7
52
|
## [1.17.3] - 2026-05-31 — Socket scanner prose cleanup
|
|
8
53
|
|
|
9
54
|
Patch release that removes wording in shipped metadata that Socket classified
|
package/README.md
CHANGED
|
@@ -747,7 +747,7 @@ Use this flow when analysis/runtime can exceed client tool-call limits:
|
|
|
747
747
|
|
|
748
748
|
1. Start job with `*_request_async`
|
|
749
749
|
2. Poll with `llm_job_status`
|
|
750
|
-
3.
|
|
750
|
+
3. Read output with `llm_job_result`
|
|
751
751
|
4. Optionally stop with `llm_job_cancel`
|
|
752
752
|
|
|
753
753
|
Async request tools accept the same approval strategy fields as their sync variants:
|
package/dist/approval-manager.js
CHANGED
|
@@ -65,22 +65,18 @@ export class ApprovalManager {
|
|
|
65
65
|
reasons.push("Request enables documentation retrieval MCP (ref_tools)");
|
|
66
66
|
}
|
|
67
67
|
if (request.allowedTools && request.allowedTools.length === 0) {
|
|
68
|
-
// Independently verify review context from the prompt — never trust caller-supplied flags alone
|
|
69
68
|
const promptIsReview = isReviewContext(request.prompt);
|
|
70
69
|
if (promptIsReview) {
|
|
71
70
|
score += 6;
|
|
72
71
|
reasons.push("Empty allowedTools in review context — reviewers need tool access");
|
|
73
72
|
}
|
|
74
73
|
else {
|
|
75
|
-
// Neutral score — tool restrictions should never reduce risk score
|
|
76
|
-
// (prevents gaming via review-context evasion + restrictive tools = negative score)
|
|
77
74
|
reasons.push("No tool permissions requested");
|
|
78
75
|
}
|
|
79
76
|
}
|
|
80
77
|
if (request.disallowedTools && request.disallowedTools.length > 0) {
|
|
81
78
|
const promptIsReviewForDisallowed = isReviewContext(request.prompt);
|
|
82
79
|
const criticalTools = ["Read", "Grep", "Glob", "Bash"];
|
|
83
|
-
// Canonicalize to handle scoped forms like "Read(*)", "Bash(git:*)"
|
|
84
80
|
const canonicalized = request.disallowedTools.map(s => {
|
|
85
81
|
const trimmed = s.trim();
|
|
86
82
|
const cut = Math.min(...[trimmed.indexOf("("), trimmed.indexOf(":")]
|
|
@@ -94,7 +90,6 @@ export class ApprovalManager {
|
|
|
94
90
|
reasons.push(`Critical review tools disallowed: ${blockedCritical.join(", ")} — reviewers need these`);
|
|
95
91
|
}
|
|
96
92
|
else {
|
|
97
|
-
// Neutral score — tool restrictions should never reduce risk score
|
|
98
93
|
reasons.push("Has explicit disallowed tool restrictions");
|
|
99
94
|
}
|
|
100
95
|
}
|
|
@@ -104,7 +99,6 @@ export class ApprovalManager {
|
|
|
104
99
|
}
|
|
105
100
|
if (request.reviewIntegrity && request.reviewIntegrity.violations.length > 0) {
|
|
106
101
|
for (const violation of request.reviewIntegrity.violations) {
|
|
107
|
-
// Skip empty_allowed_tools and critical_tools_disallowed — already handled in context-dependent scoring above
|
|
108
102
|
if (violation.type === "empty_allowed_tools" ||
|
|
109
103
|
violation.type === "critical_tools_disallowed")
|
|
110
104
|
continue;
|
|
@@ -112,8 +106,6 @@ export class ApprovalManager {
|
|
|
112
106
|
reasons.push(`Review integrity: ${violation.detail}`);
|
|
113
107
|
}
|
|
114
108
|
}
|
|
115
|
-
// Balanced policy allows routine full-auto requests with standard MCP servers,
|
|
116
|
-
// while still denying bypass/sensitive combinations.
|
|
117
109
|
const threshold = policy === "strict" ? 2 : policy === "balanced" ? 5 : 7;
|
|
118
110
|
const status = score <= threshold ? "approved" : "denied";
|
|
119
111
|
const record = {
|
|
@@ -4,32 +4,14 @@ import { JobStore } from "./job-store.js";
|
|
|
4
4
|
import { type FlightRecorderLike } from "./flight-recorder.js";
|
|
5
5
|
export type LlmCli = "claude" | "codex" | "gemini" | "grok" | "mistral";
|
|
6
6
|
export type AsyncJobStatus = "running" | "completed" | "failed" | "canceled" | "orphaned";
|
|
7
|
-
/**
|
|
8
|
-
* Slice 1.5 flight-recorder payload supplied via StartJobOptions.
|
|
9
|
-
* Decomposed to primitive fields (no nested handler-locals) so retaining
|
|
10
|
-
* a reference on the in-memory job record doesn't pin large promptParts
|
|
11
|
-
* or attachments via closure scope.
|
|
12
|
-
*/
|
|
13
7
|
export interface AsyncJobFlightRecorderEntry {
|
|
14
8
|
model: string;
|
|
15
9
|
prompt: string;
|
|
16
10
|
sessionId?: string;
|
|
17
11
|
stablePrefixHash?: string;
|
|
18
12
|
stablePrefixTokens?: number;
|
|
19
|
-
/**
|
|
20
|
-
* Slice κ: count of caller-supplied prompt-parts content blocks the
|
|
21
|
-
* gateway emitted with explicit Anthropic `cache_control` markers
|
|
22
|
-
* (ttl='1h'). Only set for Claude requests that opt into κ; left
|
|
23
|
-
* undefined elsewhere so legacy rows stay NULL.
|
|
24
|
-
*/
|
|
25
13
|
cacheControlBlocks?: number;
|
|
26
14
|
}
|
|
27
|
-
/**
|
|
28
|
-
* Slice 1.5 usage-extraction callback. Closures MUST be constructed from
|
|
29
|
-
* primitive locals only (e.g. const fmt = params.outputFormat; closure
|
|
30
|
-
* captures fmt). Capturing the handler's full `params` object pins large
|
|
31
|
-
* promptParts/attachments for JOB_TTL_MS.
|
|
32
|
-
*/
|
|
33
15
|
export type AsyncJobUsageExtractor = (stdout: string) => {
|
|
34
16
|
inputTokens?: number;
|
|
35
17
|
outputTokens?: number;
|
|
@@ -61,54 +43,17 @@ export interface StartJobOptions {
|
|
|
61
43
|
cwd?: string;
|
|
62
44
|
idleTimeoutMs?: number;
|
|
63
45
|
outputFormat?: string;
|
|
64
|
-
/** Bypass dedup and force a fresh CLI run even if a recent matching job exists. */
|
|
65
46
|
forceRefresh?: boolean;
|
|
66
|
-
/**
|
|
67
|
-
* Extra environment variables to inject when spawning the child CLI.
|
|
68
|
-
* Used by Mistral Vibe to pass `VIBE_ACTIVE_MODEL` (Vibe has no `--model` flag).
|
|
69
|
-
*
|
|
70
|
-
* IMPORTANT: env vars participate in the dedup key (canonicalised by sorted
|
|
71
|
-
* keys + JSON-stringified). Two requests that differ only in env (e.g. two
|
|
72
|
-
* Mistral requests with the same prompt but different VIBE_ACTIVE_MODEL)
|
|
73
|
-
* therefore do NOT collide on dedup.
|
|
74
|
-
*/
|
|
75
47
|
env?: Record<string, string>;
|
|
76
|
-
/**
|
|
77
|
-
* Slice κ: optional UTF-8 payload to pipe into the child's stdin.
|
|
78
|
-
* Participates in the dedup key — two requests with identical argv
|
|
79
|
-
* but different stdin do NOT collide. When set, stdio[0] is "pipe";
|
|
80
|
-
* when unset, stdio[0] stays "ignore" (regression-protected).
|
|
81
|
-
*/
|
|
82
48
|
stdin?: string;
|
|
83
|
-
/**
|
|
84
|
-
* Optional hook fired exactly once when the job reaches a terminal state.
|
|
85
|
-
* Used by callers that own per-request resources (outputSchema temp files,
|
|
86
|
-
* etc.) that must persist for the lifetime of the spawned CLI process.
|
|
87
|
-
*/
|
|
88
49
|
onComplete?: () => void;
|
|
89
|
-
/**
|
|
90
|
-
* Slice 1.5: when true, AsyncJobManager writes a flight-recorder logStart
|
|
91
|
-
* row at startJob entry using `flightRecorderEntry`. Pure async handlers
|
|
92
|
-
* (handle*RequestAsync) pass true because they have no upstream
|
|
93
|
-
* safeFlightStart writer. The sync-deferred path (awaitJobOrDefer) passes
|
|
94
|
-
* false because the upstream sync handler already wrote logStart keyed on
|
|
95
|
-
* the same correlationId — a second INSERT would crash on the PK.
|
|
96
|
-
*/
|
|
97
50
|
writeFlightStart?: boolean;
|
|
98
|
-
/** Slice 1.5: payload for the FR logStart and the terminal logComplete. */
|
|
99
51
|
flightRecorderEntry?: AsyncJobFlightRecorderEntry;
|
|
100
|
-
/**
|
|
101
|
-
* Slice 1.5: invoked only on terminal `completed` to populate token-usage
|
|
102
|
-
* fields in the FR logComplete payload. Construct from primitive locals
|
|
103
|
-
* only (see AsyncJobUsageExtractor doc).
|
|
104
|
-
*/
|
|
105
52
|
extractUsage?: AsyncJobUsageExtractor;
|
|
106
53
|
}
|
|
107
54
|
export interface StartJobOutcome {
|
|
108
55
|
snapshot: AsyncJobSnapshot;
|
|
109
|
-
/** Set to the existing job's id when the request was de-duplicated. */
|
|
110
56
|
deduped: boolean;
|
|
111
|
-
/** Set when deduped — the original job's correlation id, useful for logging. */
|
|
112
57
|
originalCorrelationId?: string;
|
|
113
58
|
}
|
|
114
59
|
export declare class AsyncJobManager {
|
|
@@ -120,77 +65,19 @@ export declare class AsyncJobManager {
|
|
|
120
65
|
private store;
|
|
121
66
|
private flightRecorder;
|
|
122
67
|
constructor(logger?: Logger, onJobComplete?: ((cli: LlmCli, durationMs: number, success: boolean) => void) | undefined, store?: JobStore | null, flightRecorder?: FlightRecorderLike);
|
|
123
|
-
/**
|
|
124
|
-
* True iff a durable (or memory) job store is attached. The MCP-tool
|
|
125
|
-
* registration layer ANDs this with persistence.asyncJobsEnabled when
|
|
126
|
-
* deciding whether to register the *_request_async / llm_job_* tools.
|
|
127
|
-
* Without a store, async tools must not be registered, otherwise we
|
|
128
|
-
* re-open the silent in-memory loss path the structural invariant closes.
|
|
129
|
-
*/
|
|
130
68
|
hasStore(): boolean;
|
|
131
69
|
private emitMetrics;
|
|
132
70
|
private evictCompletedJobs;
|
|
133
|
-
/**
|
|
134
|
-
* Compute the dedup key for a job. Stable across re-issues of the same request,
|
|
135
|
-
* which is exactly what allows agents to safely retry without restarting the run.
|
|
136
|
-
*
|
|
137
|
-
* U22 fix: env vars participate in the key via a deterministic canonicalisation
|
|
138
|
-
* (sorted keys → JSON-stringified). This prevents two Mistral requests with the
|
|
139
|
-
* same argv but different `VIBE_ACTIVE_MODEL` from deduping onto each other.
|
|
140
|
-
*/
|
|
141
71
|
private buildRequestKey;
|
|
142
72
|
private fireOnComplete;
|
|
143
|
-
/**
|
|
144
|
-
* Slice 1.5: write the terminal flight-recorder row. Mirrors sync-path
|
|
145
|
-
* failure semantics (response = stderr||stdout on failure, errorMessage
|
|
146
|
-
* falls back through overrideErrorMessage → job.error → job.stderr →
|
|
147
|
-
* "Exit code N"). Single-shot guard set only on SUCCESSFUL write so a
|
|
148
|
-
* thrown logComplete can be retried by a later terminal callback; the
|
|
149
|
-
* FR's WHERE status='started' UPDATE guard remains the actual
|
|
150
|
-
* idempotency mechanism for the common "retry succeeds, original
|
|
151
|
-
* succeeded too" case.
|
|
152
|
-
*/
|
|
153
73
|
private writeFlightComplete;
|
|
154
74
|
private safeExtractUsage;
|
|
155
|
-
/**
|
|
156
|
-
* R2 Codex-Unit-B F1: awaitJobOrDefer calls this when returning a
|
|
157
|
-
* deferred response. From this point on the sync handler will not write
|
|
158
|
-
* its own safeFlightComplete, so the manager takes over.
|
|
159
|
-
*
|
|
160
|
-
* Race mitigation: if the job already terminated between the sync
|
|
161
|
-
* deadline expiring and this method firing, write logComplete
|
|
162
|
-
* synchronously here so the previously-skipped terminal callback's
|
|
163
|
-
* write isn't lost.
|
|
164
|
-
*/
|
|
165
75
|
armFlightCompleteForDeferral(jobId: string): void;
|
|
166
76
|
private safeStoreCall;
|
|
167
|
-
/**
|
|
168
|
-
* Flush in-memory stdout/stderr to the durable store if anything changed
|
|
169
|
-
* since the last flush. Throttled by OUTPUT_FLUSH_INTERVAL_MS to avoid
|
|
170
|
-
* pounding sqlite on every chunk of streaming output.
|
|
171
|
-
*/
|
|
172
77
|
private maybeFlushOutput;
|
|
173
78
|
private persistComplete;
|
|
174
|
-
/**
|
|
175
|
-
* Reconstitute an in-memory AsyncJobRecord from a durable row, so subsequent
|
|
176
|
-
* getJobSnapshot/getJobResult calls hit the in-memory cache.
|
|
177
|
-
* The reconstituted record has process=null — it represents historical data only.
|
|
178
|
-
*/
|
|
179
79
|
private hydrateFromStore;
|
|
180
|
-
/**
|
|
181
|
-
* Backwards-compatible entry point. Equivalent to startJobWithDedup({...}).snapshot.
|
|
182
|
-
* Existing callers keep working unchanged; forceRefresh is exposed as a trailing
|
|
183
|
-
* optional param for the dedup-aware path.
|
|
184
|
-
*/
|
|
185
80
|
startJob(cli: LlmCli, args: string[], correlationId: string, cwd?: string, idleTimeoutMs?: number, outputFormat?: string, forceRefresh?: boolean, env?: Record<string, string>, onComplete?: () => void, flightRecorderEntry?: AsyncJobFlightRecorderEntry, extractUsage?: AsyncJobUsageExtractor, writeFlightStart?: boolean, stdin?: string): AsyncJobSnapshot;
|
|
186
|
-
/**
|
|
187
|
-
* Start a job, with optional dedup against recent identical requests.
|
|
188
|
-
* Returns `{ snapshot, deduped }` so callers can log/report the short-circuit.
|
|
189
|
-
*
|
|
190
|
-
* Dedup is keyed on (cli, args). If a job with the same key was started within
|
|
191
|
-
* the dedup window (default 1h) and is still running or completed, its snapshot
|
|
192
|
-
* is returned without spawning a new process. forceRefresh skips dedup entirely.
|
|
193
|
-
*/
|
|
194
81
|
startJobWithDedup(cli: LlmCli, args: string[], correlationId: string, opts?: StartJobOptions): StartJobOutcome;
|
|
195
82
|
getJobSnapshot(jobId: string): AsyncJobSnapshot | null;
|
|
196
83
|
getJobSnapshots(jobIds: string[]): Record<string, AsyncJobSnapshot | null>;
|
|
@@ -5,9 +5,9 @@ import { ProcessMonitor } from "./process-monitor.js";
|
|
|
5
5
|
import { computeRequestKey } from "./job-store.js";
|
|
6
6
|
import { NoopFlightRecorder } from "./flight-recorder.js";
|
|
7
7
|
const MAX_OUTPUT_SIZE = 50 * 1024 * 1024;
|
|
8
|
-
const JOB_TTL_MS = 60 * 60 * 1000;
|
|
9
|
-
const EVICTION_INTERVAL_MS = 5 * 60 * 1000;
|
|
10
|
-
const OUTPUT_FLUSH_INTERVAL_MS = 1000;
|
|
8
|
+
const JOB_TTL_MS = 60 * 60 * 1000;
|
|
9
|
+
const EVICTION_INTERVAL_MS = 5 * 60 * 1000;
|
|
10
|
+
const OUTPUT_FLUSH_INTERVAL_MS = 1000;
|
|
11
11
|
function describeProcessLaunchError(cli, error) {
|
|
12
12
|
const code = error.code;
|
|
13
13
|
if (code === "ENOENT") {
|
|
@@ -30,11 +30,6 @@ function describeWindowsLaunchExit(cli, exitCode) {
|
|
|
30
30
|
message: `The '${cli}' command was not found. Install the ${cli} CLI and make sure it is on PATH.`,
|
|
31
31
|
};
|
|
32
32
|
}
|
|
33
|
-
/**
|
|
34
|
-
* U22 fix: deterministic canonicalisation of an env-var map for the dedup key.
|
|
35
|
-
* Returns "" when env is undefined or empty (preserves dedup key continuity for
|
|
36
|
-
* pre-U22 callers that pass no env).
|
|
37
|
-
*/
|
|
38
33
|
function canonicaliseEnvForKey(env) {
|
|
39
34
|
if (!env)
|
|
40
35
|
return "";
|
|
@@ -75,10 +70,6 @@ export class AsyncJobManager {
|
|
|
75
70
|
if (count > 0) {
|
|
76
71
|
this.logger.info(`Marked ${count} in-flight job(s) as orphaned after gateway restart`);
|
|
77
72
|
}
|
|
78
|
-
// Slice 1.5: close out the FR row for each orphaned job. The FR
|
|
79
|
-
// logComplete UPDATE has WHERE status='started' so pre-1.7.0 rows
|
|
80
|
-
// (where the prior gateway never wrote a logStart) silently
|
|
81
|
-
// no-op. Wrapped per-orphan so a single bad row can't tank boot.
|
|
82
73
|
for (const orphan of orphaned) {
|
|
83
74
|
try {
|
|
84
75
|
const durationMs = Math.max(0, Date.now() - new Date(orphan.startedAt).getTime());
|
|
@@ -103,18 +94,10 @@ export class AsyncJobManager {
|
|
|
103
94
|
}
|
|
104
95
|
}
|
|
105
96
|
this.evictionTimer = setInterval(() => this.evictCompletedJobs(), EVICTION_INTERVAL_MS);
|
|
106
|
-
// Allow the process to exit even if the timer is active
|
|
107
97
|
if (this.evictionTimer.unref) {
|
|
108
98
|
this.evictionTimer.unref();
|
|
109
99
|
}
|
|
110
100
|
}
|
|
111
|
-
/**
|
|
112
|
-
* True iff a durable (or memory) job store is attached. The MCP-tool
|
|
113
|
-
* registration layer ANDs this with persistence.asyncJobsEnabled when
|
|
114
|
-
* deciding whether to register the *_request_async / llm_job_* tools.
|
|
115
|
-
* Without a store, async tools must not be registered, otherwise we
|
|
116
|
-
* re-open the silent in-memory loss path the structural invariant closes.
|
|
117
|
-
*/
|
|
118
101
|
hasStore() {
|
|
119
102
|
return this.store !== null;
|
|
120
103
|
}
|
|
@@ -137,7 +120,6 @@ export class AsyncJobManager {
|
|
|
137
120
|
evictCompletedJobs() {
|
|
138
121
|
const now = Date.now();
|
|
139
122
|
let evicted = 0;
|
|
140
|
-
// Dead process auto-recovery: check for running jobs whose process no longer exists
|
|
141
123
|
for (const [id, job] of this.jobs) {
|
|
142
124
|
if (job.status === "running" && job.process && job.process.pid) {
|
|
143
125
|
try {
|
|
@@ -157,10 +139,8 @@ export class AsyncJobManager {
|
|
|
157
139
|
this.writeFlightComplete(job, "failed");
|
|
158
140
|
this.fireOnComplete(job);
|
|
159
141
|
}
|
|
160
|
-
// EPERM: process exists but we can't signal it — ignore
|
|
161
142
|
}
|
|
162
143
|
}
|
|
163
|
-
// Check for exited flag mismatch (close handler may have fired but status wasn't updated)
|
|
164
144
|
if (job.status === "running" && job.exited) {
|
|
165
145
|
job.status = "failed";
|
|
166
146
|
job.error = "Process exited without proper status transition";
|
|
@@ -186,7 +166,6 @@ export class AsyncJobManager {
|
|
|
186
166
|
if (evicted > 0) {
|
|
187
167
|
this.logger.debug(`Evicted ${evicted} completed jobs from memory (durable store retains them)`);
|
|
188
168
|
}
|
|
189
|
-
// Sweep the durable store, too. Errors are non-fatal — the job rows just stay until next sweep.
|
|
190
169
|
if (this.store) {
|
|
191
170
|
try {
|
|
192
171
|
const removed = this.store.evictExpired();
|
|
@@ -199,26 +178,7 @@ export class AsyncJobManager {
|
|
|
199
178
|
}
|
|
200
179
|
}
|
|
201
180
|
}
|
|
202
|
-
/**
|
|
203
|
-
* Compute the dedup key for a job. Stable across re-issues of the same request,
|
|
204
|
-
* which is exactly what allows agents to safely retry without restarting the run.
|
|
205
|
-
*
|
|
206
|
-
* U22 fix: env vars participate in the key via a deterministic canonicalisation
|
|
207
|
-
* (sorted keys → JSON-stringified). This prevents two Mistral requests with the
|
|
208
|
-
* same argv but different `VIBE_ACTIVE_MODEL` from deduping onto each other.
|
|
209
|
-
*/
|
|
210
181
|
buildRequestKey(cli, args, env, stdin, cwd) {
|
|
211
|
-
// Slice κ: stdin participates in the dedup key. Two Claude requests
|
|
212
|
-
// with identical argv but different cache_control content blocks
|
|
213
|
-
// would otherwise collide on dedup and the second caller would get
|
|
214
|
-
// the wrong response. The legacy "no stdin" code path passes
|
|
215
|
-
// stdin=undefined, which serialises to the same empty marker the
|
|
216
|
-
// previous version emitted — non-κ dedup is unchanged.
|
|
217
|
-
// Slice λ: cwd participates similarly. Two requests with identical
|
|
218
|
-
// argv but different worktrees would otherwise collide on dedup and
|
|
219
|
-
// the second caller would receive a response executed in the wrong
|
|
220
|
-
// worktree. cwd=undefined preserves the pre-λ key shape — non-λ
|
|
221
|
-
// dedup is unchanged.
|
|
222
182
|
const extraEnv = canonicaliseEnvForKey(env);
|
|
223
183
|
const withStdin = stdin === undefined ? extraEnv : `${extraEnv}|stdin:${stdin}`;
|
|
224
184
|
const extra = cwd === undefined ? withStdin : `${withStdin}|cwd:${cwd}`;
|
|
@@ -237,26 +197,13 @@ export class AsyncJobManager {
|
|
|
237
197
|
this.logger.error(`Job ${job.id} onComplete hook threw`, err);
|
|
238
198
|
}
|
|
239
199
|
}
|
|
240
|
-
/**
|
|
241
|
-
* Slice 1.5: write the terminal flight-recorder row. Mirrors sync-path
|
|
242
|
-
* failure semantics (response = stderr||stdout on failure, errorMessage
|
|
243
|
-
* falls back through overrideErrorMessage → job.error → job.stderr →
|
|
244
|
-
* "Exit code N"). Single-shot guard set only on SUCCESSFUL write so a
|
|
245
|
-
* thrown logComplete can be retried by a later terminal callback; the
|
|
246
|
-
* FR's WHERE status='started' UPDATE guard remains the actual
|
|
247
|
-
* idempotency mechanism for the common "retry succeeds, original
|
|
248
|
-
* succeeded too" case.
|
|
249
|
-
*/
|
|
250
200
|
writeFlightComplete(job, finalStatus, overrideErrorMessage) {
|
|
251
201
|
if (!job.flightRecorderEntry)
|
|
252
|
-
return;
|
|
253
|
-
// R2 Codex-Unit-B F1: only write when armed. Sync-inline requests are
|
|
254
|
-
// NOT armed at startJob — the sync handler owns the rich-metadata
|
|
255
|
-
// safeFlightComplete write. Pure async + sync-deferred ARE armed.
|
|
202
|
+
return;
|
|
256
203
|
if (!job.flightCompleteArmed)
|
|
257
204
|
return;
|
|
258
205
|
if (job.flightRecorderComplete)
|
|
259
|
-
return;
|
|
206
|
+
return;
|
|
260
207
|
const durationMs = Math.max(0, Date.now() - new Date(job.startedAt).getTime());
|
|
261
208
|
const usage = finalStatus === "completed" && job.extractUsage ? this.safeExtractUsage(job) : {};
|
|
262
209
|
const isFailure = finalStatus === "failed";
|
|
@@ -281,11 +228,7 @@ export class AsyncJobManager {
|
|
|
281
228
|
cacheCreationTokens: usage.cacheCreationTokens,
|
|
282
229
|
costUsd: usage.costUsd,
|
|
283
230
|
});
|
|
284
|
-
// Only mark complete on successful write so a thrown logComplete
|
|
285
|
-
// can be retried by the next terminal callback.
|
|
286
231
|
job.flightRecorderComplete = true;
|
|
287
|
-
// Clear retained references so the GC can reclaim anything the
|
|
288
|
-
// extractUsage closure captured.
|
|
289
232
|
job.flightRecorderEntry = undefined;
|
|
290
233
|
job.extractUsage = undefined;
|
|
291
234
|
}
|
|
@@ -302,27 +245,15 @@ export class AsyncJobManager {
|
|
|
302
245
|
return {};
|
|
303
246
|
}
|
|
304
247
|
}
|
|
305
|
-
/**
|
|
306
|
-
* R2 Codex-Unit-B F1: awaitJobOrDefer calls this when returning a
|
|
307
|
-
* deferred response. From this point on the sync handler will not write
|
|
308
|
-
* its own safeFlightComplete, so the manager takes over.
|
|
309
|
-
*
|
|
310
|
-
* Race mitigation: if the job already terminated between the sync
|
|
311
|
-
* deadline expiring and this method firing, write logComplete
|
|
312
|
-
* synchronously here so the previously-skipped terminal callback's
|
|
313
|
-
* write isn't lost.
|
|
314
|
-
*/
|
|
315
248
|
armFlightCompleteForDeferral(jobId) {
|
|
316
249
|
const job = this.jobs.get(jobId);
|
|
317
250
|
if (!job)
|
|
318
251
|
return;
|
|
319
252
|
if (job.flightCompleteArmed)
|
|
320
|
-
return;
|
|
253
|
+
return;
|
|
321
254
|
job.flightCompleteArmed = true;
|
|
322
255
|
if (job.status === "running")
|
|
323
256
|
return;
|
|
324
|
-
// Job already terminal — the close handler's writeFlightComplete
|
|
325
|
-
// saw flightCompleteArmed=false and skipped. Write now to recover.
|
|
326
257
|
const finalStatus = job.status === "completed" ? "completed" : "failed";
|
|
327
258
|
const override = job.canceled ? "canceled by caller" : undefined;
|
|
328
259
|
this.writeFlightComplete(job, finalStatus, override);
|
|
@@ -337,11 +268,6 @@ export class AsyncJobManager {
|
|
|
337
268
|
this.logger.error(`JobStore.${label} failed`, err);
|
|
338
269
|
}
|
|
339
270
|
}
|
|
340
|
-
/**
|
|
341
|
-
* Flush in-memory stdout/stderr to the durable store if anything changed
|
|
342
|
-
* since the last flush. Throttled by OUTPUT_FLUSH_INTERVAL_MS to avoid
|
|
343
|
-
* pounding sqlite on every chunk of streaming output.
|
|
344
|
-
*/
|
|
345
271
|
maybeFlushOutput(job, force = false) {
|
|
346
272
|
if (!this.store)
|
|
347
273
|
return;
|
|
@@ -361,7 +287,6 @@ export class AsyncJobManager {
|
|
|
361
287
|
return;
|
|
362
288
|
if (!job.finishedAt)
|
|
363
289
|
return;
|
|
364
|
-
// Make sure the latest output is captured in the same row update.
|
|
365
290
|
job.outputDirty = false;
|
|
366
291
|
this.safeStoreCall("recordComplete", () => this.store.recordComplete({
|
|
367
292
|
id: job.id,
|
|
@@ -374,11 +299,6 @@ export class AsyncJobManager {
|
|
|
374
299
|
finishedAt: job.finishedAt,
|
|
375
300
|
}));
|
|
376
301
|
}
|
|
377
|
-
/**
|
|
378
|
-
* Reconstitute an in-memory AsyncJobRecord from a durable row, so subsequent
|
|
379
|
-
* getJobSnapshot/getJobResult calls hit the in-memory cache.
|
|
380
|
-
* The reconstituted record has process=null — it represents historical data only.
|
|
381
|
-
*/
|
|
382
302
|
hydrateFromStore(jobId) {
|
|
383
303
|
if (!this.store)
|
|
384
304
|
return null;
|
|
@@ -426,11 +346,6 @@ export class AsyncJobManager {
|
|
|
426
346
|
this.jobs.set(jobId, reconstituted);
|
|
427
347
|
return reconstituted;
|
|
428
348
|
}
|
|
429
|
-
/**
|
|
430
|
-
* Backwards-compatible entry point. Equivalent to startJobWithDedup({...}).snapshot.
|
|
431
|
-
* Existing callers keep working unchanged; forceRefresh is exposed as a trailing
|
|
432
|
-
* optional param for the dedup-aware path.
|
|
433
|
-
*/
|
|
434
349
|
startJob(cli, args, correlationId, cwd, idleTimeoutMs, outputFormat, forceRefresh, env, onComplete, flightRecorderEntry, extractUsage, writeFlightStart, stdin) {
|
|
435
350
|
return this.startJobWithDedup(cli, args, correlationId, {
|
|
436
351
|
cwd,
|
|
@@ -445,14 +360,6 @@ export class AsyncJobManager {
|
|
|
445
360
|
writeFlightStart,
|
|
446
361
|
}).snapshot;
|
|
447
362
|
}
|
|
448
|
-
/**
|
|
449
|
-
* Start a job, with optional dedup against recent identical requests.
|
|
450
|
-
* Returns `{ snapshot, deduped }` so callers can log/report the short-circuit.
|
|
451
|
-
*
|
|
452
|
-
* Dedup is keyed on (cli, args). If a job with the same key was started within
|
|
453
|
-
* the dedup window (default 1h) and is still running or completed, its snapshot
|
|
454
|
-
* is returned without spawning a new process. forceRefresh skips dedup entirely.
|
|
455
|
-
*/
|
|
456
363
|
startJobWithDedup(cli, args, correlationId, opts = {}) {
|
|
457
364
|
const { cwd, idleTimeoutMs, outputFormat, forceRefresh, env: extraEnv, stdin, onComplete, flightRecorderEntry, extractUsage, writeFlightStart, } = opts;
|
|
458
365
|
const requestKey = this.buildRequestKey(cli, args, extraEnv, stdin, cwd);
|
|
@@ -460,7 +367,6 @@ export class AsyncJobManager {
|
|
|
460
367
|
try {
|
|
461
368
|
const existing = this.store.findByRequestKey(requestKey);
|
|
462
369
|
if (existing) {
|
|
463
|
-
// Prefer the in-memory record if we still have it (live process, idle timers, etc).
|
|
464
370
|
let record = this.jobs.get(existing.id);
|
|
465
371
|
if (!record) {
|
|
466
372
|
record = this.hydrateFromStore(existing.id) ?? undefined;
|
|
@@ -471,11 +377,6 @@ export class AsyncJobManager {
|
|
|
471
377
|
originalCorrelationId: record.correlationId,
|
|
472
378
|
status: record.status,
|
|
473
379
|
});
|
|
474
|
-
// U26 fix: the caller's per-request resources (e.g. outputSchema temp
|
|
475
|
-
// file) are NOT consumed by the deduped job, which reuses its own
|
|
476
|
-
// original resources. Release the new request's cleanup immediately
|
|
477
|
-
// to avoid an orphaned temp file. The original job's onComplete (if
|
|
478
|
-
// any) remains attached to that original job record.
|
|
479
380
|
if (onComplete) {
|
|
480
381
|
try {
|
|
481
382
|
onComplete();
|
|
@@ -498,8 +399,6 @@ export class AsyncJobManager {
|
|
|
498
399
|
}
|
|
499
400
|
const id = randomUUID();
|
|
500
401
|
const startedAt = new Date().toISOString();
|
|
501
|
-
// Mistral Vibe ships as the `vibe` binary; the gateway uses `mistral` as the
|
|
502
|
-
// provider key but spawns `vibe` on the shell.
|
|
503
402
|
const command = cli === "mistral" ? "vibe" : cli;
|
|
504
403
|
const baseEnv = envWithExtendedPath(process.env, getExtendedPath());
|
|
505
404
|
const child = spawnCliProcess(command, args, {
|
|
@@ -516,7 +415,6 @@ export class AsyncJobManager {
|
|
|
516
415
|
}
|
|
517
416
|
child.stdin.end();
|
|
518
417
|
}
|
|
519
|
-
// Single cleanup flag to prevent double-unregister
|
|
520
418
|
let groupCleaned = false;
|
|
521
419
|
const cleanupGroup = () => {
|
|
522
420
|
if (groupCleaned)
|
|
@@ -552,10 +450,6 @@ export class AsyncJobManager {
|
|
|
552
450
|
flightRecorderEntry,
|
|
553
451
|
extractUsage,
|
|
554
452
|
flightRecorderComplete: false,
|
|
555
|
-
// R2 Codex-Unit-B F1: pure async path arms now (writeFlightStart=true
|
|
556
|
-
// means the manager is the only FR writer). Sync-deferred path
|
|
557
|
-
// arrives with writeFlightStart=false and arms later via
|
|
558
|
-
// armFlightCompleteForDeferral when awaitJobOrDefer decides to defer.
|
|
559
453
|
flightCompleteArmed: writeFlightStart === true,
|
|
560
454
|
};
|
|
561
455
|
this.jobs.set(id, job);
|
|
@@ -569,10 +463,6 @@ export class AsyncJobManager {
|
|
|
569
463
|
startedAt,
|
|
570
464
|
pid: child.pid ?? null,
|
|
571
465
|
}));
|
|
572
|
-
// Slice 1.5: only opt-in callers (pure async handlers) write logStart
|
|
573
|
-
// here. The sync-deferred path passes writeFlightStart=false because
|
|
574
|
-
// the upstream sync handler already wrote a logStart row keyed on the
|
|
575
|
-
// same correlationId; a duplicate INSERT would crash on the PK.
|
|
576
466
|
if (writeFlightStart && flightRecorderEntry) {
|
|
577
467
|
try {
|
|
578
468
|
this.flightRecorder.logStart({
|
|
@@ -592,7 +482,6 @@ export class AsyncJobManager {
|
|
|
592
482
|
}
|
|
593
483
|
}
|
|
594
484
|
this.logger.info(`Job ${id} started for ${cli}`, { correlationId });
|
|
595
|
-
// Idle timeout: kill process if no output activity for idleTimeoutMs
|
|
596
485
|
let idleTimerId;
|
|
597
486
|
const resetIdleTimer = () => {
|
|
598
487
|
if (!idleTimeoutMs || idleTimeoutMs <= 0)
|
|
@@ -655,7 +544,6 @@ export class AsyncJobManager {
|
|
|
655
544
|
child.on("close", (code) => {
|
|
656
545
|
job.exited = true;
|
|
657
546
|
job.clearIdleTimer?.();
|
|
658
|
-
// Unregister process group on clean exit (no kill was issued)
|
|
659
547
|
if (!job.canceled && job.status === "running") {
|
|
660
548
|
job.cleanupGroup?.();
|
|
661
549
|
}
|
|
@@ -664,11 +552,7 @@ export class AsyncJobManager {
|
|
|
664
552
|
if (!job.finishedAt) {
|
|
665
553
|
job.finishedAt = new Date().toISOString();
|
|
666
554
|
}
|
|
667
|
-
// Ensure terminal state reaches the durable store (idle-timeout/output-overflow already persisted).
|
|
668
555
|
this.persistComplete(job);
|
|
669
|
-
// Slice 1.5: retry the FR complete write iff the earlier terminal
|
|
670
|
-
// callback's logComplete threw. The single-shot guard in
|
|
671
|
-
// writeFlightComplete makes this a no-op in the common case.
|
|
672
556
|
const fallbackFlightStatus = job.status === "completed" ? "completed" : "failed";
|
|
673
557
|
const fallbackOverride = job.status === "canceled" ? "canceled by caller" : undefined;
|
|
674
558
|
this.writeFlightComplete(job, fallbackFlightStatus, fallbackOverride);
|
|
@@ -736,7 +620,6 @@ export class AsyncJobManager {
|
|
|
736
620
|
if (job.status !== "running") {
|
|
737
621
|
return { canceled: false, reason: `Job is already ${job.status}` };
|
|
738
622
|
}
|
|
739
|
-
// Reconstituted (orphaned) jobs have no live process to signal — refuse cancel.
|
|
740
623
|
if (!job.process) {
|
|
741
624
|
return {
|
|
742
625
|
canceled: false,
|
|
@@ -777,7 +660,6 @@ export class AsyncJobManager {
|
|
|
777
660
|
getJobHealth() {
|
|
778
661
|
const running = this.getRunningJobs();
|
|
779
662
|
const health = this.processMonitor.checkJobHealth(running);
|
|
780
|
-
// Clean up stale CPU samples for PIDs that are no longer running
|
|
781
663
|
const activePids = new Set(running.map(j => j.pid).filter((p) => p !== null));
|
|
782
664
|
this.processMonitor.cleanupSamples(activePids);
|
|
783
665
|
return {
|