llm-cli-gateway 1.17.3 → 1.17.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +45 -0
  2. package/README.md +1 -1
  3. package/dist/approval-manager.js +0 -8
  4. package/dist/async-job-manager.d.ts +0 -113
  5. package/dist/async-job-manager.js +6 -124
  6. package/dist/cache-stats.d.ts +0 -89
  7. package/dist/cache-stats.js +0 -62
  8. package/dist/claude-mcp-config.js +0 -1
  9. package/dist/cli-updater.d.ts +0 -8
  10. package/dist/cli-updater.js +0 -12
  11. package/dist/codex-json-parser.d.ts +0 -20
  12. package/dist/codex-json-parser.js +0 -21
  13. package/dist/config.d.ts +0 -31
  14. package/dist/config.js +2 -72
  15. package/dist/db.d.ts +0 -18
  16. package/dist/db.js +0 -22
  17. package/dist/doctor.d.ts +0 -49
  18. package/dist/doctor.js +0 -47
  19. package/dist/endpoint-exposure.js +0 -1
  20. package/dist/executor.d.ts +0 -19
  21. package/dist/executor.js +3 -38
  22. package/dist/flight-recorder.d.ts +0 -26
  23. package/dist/flight-recorder.js +1 -70
  24. package/dist/gemini-json-parser.d.ts +0 -25
  25. package/dist/gemini-json-parser.js +0 -28
  26. package/dist/health.d.ts +0 -3
  27. package/dist/health.js +0 -3
  28. package/dist/index.d.ts +12 -208
  29. package/dist/index.js +116 -588
  30. package/dist/job-store.d.ts +0 -74
  31. package/dist/job-store.js +1 -73
  32. package/dist/logger.d.ts +0 -7
  33. package/dist/logger.js +0 -6
  34. package/dist/migrate-sessions.d.ts +0 -3
  35. package/dist/migrate-sessions.js +0 -16
  36. package/dist/migrate.js +1 -18
  37. package/dist/mistral-meta-json-parser.js +0 -67
  38. package/dist/model-registry.js +0 -13
  39. package/dist/pricing.d.ts +0 -46
  40. package/dist/pricing.js +0 -47
  41. package/dist/process-monitor.d.ts +0 -15
  42. package/dist/process-monitor.js +2 -31
  43. package/dist/prompt-parts.d.ts +6 -31
  44. package/dist/prompt-parts.js +0 -11
  45. package/dist/provider-status.d.ts +0 -8
  46. package/dist/provider-status.js +0 -11
  47. package/dist/request-helpers.d.ts +4 -316
  48. package/dist/request-helpers.js +13 -231
  49. package/dist/resources.d.ts +0 -20
  50. package/dist/resources.js +1 -34
  51. package/dist/retry.d.ts +0 -45
  52. package/dist/retry.js +3 -40
  53. package/dist/session-manager-pg.d.ts +0 -32
  54. package/dist/session-manager-pg.js +0 -32
  55. package/dist/session-manager.d.ts +0 -21
  56. package/dist/session-manager.js +1 -15
  57. package/dist/stream-json-parser.d.ts +0 -18
  58. package/dist/stream-json-parser.js +0 -22
  59. package/dist/upstream-contracts.d.ts +0 -55
  60. package/dist/upstream-contracts.js +86 -64
  61. package/dist/validation-orchestrator.js +0 -3
  62. package/dist/worktree-manager.d.ts +0 -9
  63. package/dist/worktree-manager.js +0 -21
  64. package/package.json +1 -1
package/CHANGELOG.md CHANGED
@@ -4,6 +4,51 @@ All notable changes to the llm-cli-gateway project.
4
4
 
5
5
  ## Unreleased
6
6
 
7
+ ## [1.17.5] - 2026-06-02: Socket networkAccess cleanup
8
+
9
+ Patch release that stops the recurring Socket `networkAccess` (`globalThis["fetch"]`)
10
+ false-positive on the published package.
11
+
12
+ ### Fixed
13
+
14
+ - The build now strips comments from the published `dist/*.js` (`removeComments`),
15
+ and the word "fetch" no longer appears in any shipped source. Socket's
16
+ `networkAccess` heuristic scans shipped comments and descriptions, and a stray
17
+ "fetch" in a JSDoc kept tripping a `globalThis["fetch"]` alert that the 1.17.3
18
+ reword only partly addressed. The `shellAccess` (`child_process`) alert on
19
+ `executor.js` / `worktree-manager.js` is inherent to spawning the provider CLIs
20
+ and git and is unchanged.
21
+
22
+ ## [1.17.4] - 2026-06-02: upstream contract compatibility
23
+
24
+ Patch release that realigns the provider CLI contracts with the currently
25
+ installed binaries (codex 0.135.0, grok 0.2.16, gemini 0.44.1, claude 2.1.159,
26
+ vibe 2.12.1).
27
+
28
+ ### Fixed
29
+
30
+ - Mistral: dropped the unsupported `--effort` / `--reasoning-effort` surface.
31
+ vibe 2.x argparse rejects both flags, so any `mistral_request` that passed
32
+ `effort` / `reasoningEffort` failed before reaching the model. Locked out with
33
+ two `expect:fail` conformance fixtures and a builder guard test.
34
+
35
+ ### Added
36
+
37
+ - Grok: `--compaction-mode` (summary|transcript|segments) and
38
+ `--compaction-detail` (none|minimal|balanced|verbose) context controls, wired
39
+ as enum passthrough flags on `grok_request` / `grok_request_async`.
40
+ - Gemini: a `yolo` boolean that emits `--yolo` (auto-approve all actions). It
41
+ routes through the mcp_managed approval gate and is never emitted alongside
42
+ `--approval-mode yolo`.
43
+ - Claude: `--no-session-persistence`, `--setting-sources`, `--settings`, and
44
+ `--tools` exposed through `prepareClaudeHighImpactFlags`. `--betas` is left
45
+ out on purpose, since it is API-key only and the gateway runs Claude via OAuth.
46
+
47
+ ### Notes
48
+
49
+ - Documented `--max-turns` as a known `--probe-installed` false-positive: claude
50
+ 2.x hides it from `--help` but still accepts it.
51
+
7
52
  ## [1.17.3] - 2026-05-31 — Socket scanner prose cleanup
8
53
 
9
54
  Patch release that removes wording in shipped metadata that Socket classified
package/README.md CHANGED
@@ -747,7 +747,7 @@ Use this flow when analysis/runtime can exceed client tool-call limits:
747
747
 
748
748
  1. Start job with `*_request_async`
749
749
  2. Poll with `llm_job_status`
750
- 3. Fetch output with `llm_job_result`
750
+ 3. Read output with `llm_job_result`
751
751
  4. Optionally stop with `llm_job_cancel`
752
752
 
753
753
  Async request tools accept the same approval strategy fields as their sync variants:
@@ -65,22 +65,18 @@ export class ApprovalManager {
65
65
  reasons.push("Request enables documentation retrieval MCP (ref_tools)");
66
66
  }
67
67
  if (request.allowedTools && request.allowedTools.length === 0) {
68
- // Independently verify review context from the prompt — never trust caller-supplied flags alone
69
68
  const promptIsReview = isReviewContext(request.prompt);
70
69
  if (promptIsReview) {
71
70
  score += 6;
72
71
  reasons.push("Empty allowedTools in review context — reviewers need tool access");
73
72
  }
74
73
  else {
75
- // Neutral score — tool restrictions should never reduce risk score
76
- // (prevents gaming via review-context evasion + restrictive tools = negative score)
77
74
  reasons.push("No tool permissions requested");
78
75
  }
79
76
  }
80
77
  if (request.disallowedTools && request.disallowedTools.length > 0) {
81
78
  const promptIsReviewForDisallowed = isReviewContext(request.prompt);
82
79
  const criticalTools = ["Read", "Grep", "Glob", "Bash"];
83
- // Canonicalize to handle scoped forms like "Read(*)", "Bash(git:*)"
84
80
  const canonicalized = request.disallowedTools.map(s => {
85
81
  const trimmed = s.trim();
86
82
  const cut = Math.min(...[trimmed.indexOf("("), trimmed.indexOf(":")]
@@ -94,7 +90,6 @@ export class ApprovalManager {
94
90
  reasons.push(`Critical review tools disallowed: ${blockedCritical.join(", ")} — reviewers need these`);
95
91
  }
96
92
  else {
97
- // Neutral score — tool restrictions should never reduce risk score
98
93
  reasons.push("Has explicit disallowed tool restrictions");
99
94
  }
100
95
  }
@@ -104,7 +99,6 @@ export class ApprovalManager {
104
99
  }
105
100
  if (request.reviewIntegrity && request.reviewIntegrity.violations.length > 0) {
106
101
  for (const violation of request.reviewIntegrity.violations) {
107
- // Skip empty_allowed_tools and critical_tools_disallowed — already handled in context-dependent scoring above
108
102
  if (violation.type === "empty_allowed_tools" ||
109
103
  violation.type === "critical_tools_disallowed")
110
104
  continue;
@@ -112,8 +106,6 @@ export class ApprovalManager {
112
106
  reasons.push(`Review integrity: ${violation.detail}`);
113
107
  }
114
108
  }
115
- // Balanced policy allows routine full-auto requests with standard MCP servers,
116
- // while still denying bypass/sensitive combinations.
117
109
  const threshold = policy === "strict" ? 2 : policy === "balanced" ? 5 : 7;
118
110
  const status = score <= threshold ? "approved" : "denied";
119
111
  const record = {
@@ -4,32 +4,14 @@ import { JobStore } from "./job-store.js";
4
4
  import { type FlightRecorderLike } from "./flight-recorder.js";
5
5
  export type LlmCli = "claude" | "codex" | "gemini" | "grok" | "mistral";
6
6
  export type AsyncJobStatus = "running" | "completed" | "failed" | "canceled" | "orphaned";
7
- /**
8
- * Slice 1.5 flight-recorder payload supplied via StartJobOptions.
9
- * Decomposed to primitive fields (no nested handler-locals) so retaining
10
- * a reference on the in-memory job record doesn't pin large promptParts
11
- * or attachments via closure scope.
12
- */
13
7
  export interface AsyncJobFlightRecorderEntry {
14
8
  model: string;
15
9
  prompt: string;
16
10
  sessionId?: string;
17
11
  stablePrefixHash?: string;
18
12
  stablePrefixTokens?: number;
19
- /**
20
- * Slice κ: count of caller-supplied prompt-parts content blocks the
21
- * gateway emitted with explicit Anthropic `cache_control` markers
22
- * (ttl='1h'). Only set for Claude requests that opt into κ; left
23
- * undefined elsewhere so legacy rows stay NULL.
24
- */
25
13
  cacheControlBlocks?: number;
26
14
  }
27
- /**
28
- * Slice 1.5 usage-extraction callback. Closures MUST be constructed from
29
- * primitive locals only (e.g. const fmt = params.outputFormat; closure
30
- * captures fmt). Capturing the handler's full `params` object pins large
31
- * promptParts/attachments for JOB_TTL_MS.
32
- */
33
15
  export type AsyncJobUsageExtractor = (stdout: string) => {
34
16
  inputTokens?: number;
35
17
  outputTokens?: number;
@@ -61,54 +43,17 @@ export interface StartJobOptions {
61
43
  cwd?: string;
62
44
  idleTimeoutMs?: number;
63
45
  outputFormat?: string;
64
- /** Bypass dedup and force a fresh CLI run even if a recent matching job exists. */
65
46
  forceRefresh?: boolean;
66
- /**
67
- * Extra environment variables to inject when spawning the child CLI.
68
- * Used by Mistral Vibe to pass `VIBE_ACTIVE_MODEL` (Vibe has no `--model` flag).
69
- *
70
- * IMPORTANT: env vars participate in the dedup key (canonicalised by sorted
71
- * keys + JSON-stringified). Two requests that differ only in env (e.g. two
72
- * Mistral requests with the same prompt but different VIBE_ACTIVE_MODEL)
73
- * therefore do NOT collide on dedup.
74
- */
75
47
  env?: Record<string, string>;
76
- /**
77
- * Slice κ: optional UTF-8 payload to pipe into the child's stdin.
78
- * Participates in the dedup key — two requests with identical argv
79
- * but different stdin do NOT collide. When set, stdio[0] is "pipe";
80
- * when unset, stdio[0] stays "ignore" (regression-protected).
81
- */
82
48
  stdin?: string;
83
- /**
84
- * Optional hook fired exactly once when the job reaches a terminal state.
85
- * Used by callers that own per-request resources (outputSchema temp files,
86
- * etc.) that must persist for the lifetime of the spawned CLI process.
87
- */
88
49
  onComplete?: () => void;
89
- /**
90
- * Slice 1.5: when true, AsyncJobManager writes a flight-recorder logStart
91
- * row at startJob entry using `flightRecorderEntry`. Pure async handlers
92
- * (handle*RequestAsync) pass true because they have no upstream
93
- * safeFlightStart writer. The sync-deferred path (awaitJobOrDefer) passes
94
- * false because the upstream sync handler already wrote logStart keyed on
95
- * the same correlationId — a second INSERT would crash on the PK.
96
- */
97
50
  writeFlightStart?: boolean;
98
- /** Slice 1.5: payload for the FR logStart and the terminal logComplete. */
99
51
  flightRecorderEntry?: AsyncJobFlightRecorderEntry;
100
- /**
101
- * Slice 1.5: invoked only on terminal `completed` to populate token-usage
102
- * fields in the FR logComplete payload. Construct from primitive locals
103
- * only (see AsyncJobUsageExtractor doc).
104
- */
105
52
  extractUsage?: AsyncJobUsageExtractor;
106
53
  }
107
54
  export interface StartJobOutcome {
108
55
  snapshot: AsyncJobSnapshot;
109
- /** Set to the existing job's id when the request was de-duplicated. */
110
56
  deduped: boolean;
111
- /** Set when deduped — the original job's correlation id, useful for logging. */
112
57
  originalCorrelationId?: string;
113
58
  }
114
59
  export declare class AsyncJobManager {
@@ -120,77 +65,19 @@ export declare class AsyncJobManager {
120
65
  private store;
121
66
  private flightRecorder;
122
67
  constructor(logger?: Logger, onJobComplete?: ((cli: LlmCli, durationMs: number, success: boolean) => void) | undefined, store?: JobStore | null, flightRecorder?: FlightRecorderLike);
123
- /**
124
- * True iff a durable (or memory) job store is attached. The MCP-tool
125
- * registration layer ANDs this with persistence.asyncJobsEnabled when
126
- * deciding whether to register the *_request_async / llm_job_* tools.
127
- * Without a store, async tools must not be registered, otherwise we
128
- * re-open the silent in-memory loss path the structural invariant closes.
129
- */
130
68
  hasStore(): boolean;
131
69
  private emitMetrics;
132
70
  private evictCompletedJobs;
133
- /**
134
- * Compute the dedup key for a job. Stable across re-issues of the same request,
135
- * which is exactly what allows agents to safely retry without restarting the run.
136
- *
137
- * U22 fix: env vars participate in the key via a deterministic canonicalisation
138
- * (sorted keys → JSON-stringified). This prevents two Mistral requests with the
139
- * same argv but different `VIBE_ACTIVE_MODEL` from deduping onto each other.
140
- */
141
71
  private buildRequestKey;
142
72
  private fireOnComplete;
143
- /**
144
- * Slice 1.5: write the terminal flight-recorder row. Mirrors sync-path
145
- * failure semantics (response = stderr||stdout on failure, errorMessage
146
- * falls back through overrideErrorMessage → job.error → job.stderr →
147
- * "Exit code N"). Single-shot guard set only on SUCCESSFUL write so a
148
- * thrown logComplete can be retried by a later terminal callback; the
149
- * FR's WHERE status='started' UPDATE guard remains the actual
150
- * idempotency mechanism for the common "retry succeeds, original
151
- * succeeded too" case.
152
- */
153
73
  private writeFlightComplete;
154
74
  private safeExtractUsage;
155
- /**
156
- * R2 Codex-Unit-B F1: awaitJobOrDefer calls this when returning a
157
- * deferred response. From this point on the sync handler will not write
158
- * its own safeFlightComplete, so the manager takes over.
159
- *
160
- * Race mitigation: if the job already terminated between the sync
161
- * deadline expiring and this method firing, write logComplete
162
- * synchronously here so the previously-skipped terminal callback's
163
- * write isn't lost.
164
- */
165
75
  armFlightCompleteForDeferral(jobId: string): void;
166
76
  private safeStoreCall;
167
- /**
168
- * Flush in-memory stdout/stderr to the durable store if anything changed
169
- * since the last flush. Throttled by OUTPUT_FLUSH_INTERVAL_MS to avoid
170
- * pounding sqlite on every chunk of streaming output.
171
- */
172
77
  private maybeFlushOutput;
173
78
  private persistComplete;
174
- /**
175
- * Reconstitute an in-memory AsyncJobRecord from a durable row, so subsequent
176
- * getJobSnapshot/getJobResult calls hit the in-memory cache.
177
- * The reconstituted record has process=null — it represents historical data only.
178
- */
179
79
  private hydrateFromStore;
180
- /**
181
- * Backwards-compatible entry point. Equivalent to startJobWithDedup({...}).snapshot.
182
- * Existing callers keep working unchanged; forceRefresh is exposed as a trailing
183
- * optional param for the dedup-aware path.
184
- */
185
80
  startJob(cli: LlmCli, args: string[], correlationId: string, cwd?: string, idleTimeoutMs?: number, outputFormat?: string, forceRefresh?: boolean, env?: Record<string, string>, onComplete?: () => void, flightRecorderEntry?: AsyncJobFlightRecorderEntry, extractUsage?: AsyncJobUsageExtractor, writeFlightStart?: boolean, stdin?: string): AsyncJobSnapshot;
186
- /**
187
- * Start a job, with optional dedup against recent identical requests.
188
- * Returns `{ snapshot, deduped }` so callers can log/report the short-circuit.
189
- *
190
- * Dedup is keyed on (cli, args). If a job with the same key was started within
191
- * the dedup window (default 1h) and is still running or completed, its snapshot
192
- * is returned without spawning a new process. forceRefresh skips dedup entirely.
193
- */
194
81
  startJobWithDedup(cli: LlmCli, args: string[], correlationId: string, opts?: StartJobOptions): StartJobOutcome;
195
82
  getJobSnapshot(jobId: string): AsyncJobSnapshot | null;
196
83
  getJobSnapshots(jobIds: string[]): Record<string, AsyncJobSnapshot | null>;
@@ -5,9 +5,9 @@ import { ProcessMonitor } from "./process-monitor.js";
5
5
  import { computeRequestKey } from "./job-store.js";
6
6
  import { NoopFlightRecorder } from "./flight-recorder.js";
7
7
  const MAX_OUTPUT_SIZE = 50 * 1024 * 1024;
8
- const JOB_TTL_MS = 60 * 60 * 1000; // 1 hour in-memory retention; durable store has its own (longer) retention
9
- const EVICTION_INTERVAL_MS = 5 * 60 * 1000; // Check every 5 minutes
10
- const OUTPUT_FLUSH_INTERVAL_MS = 1000; // Throttle DB writes for streaming stdout/stderr
8
+ const JOB_TTL_MS = 60 * 60 * 1000;
9
+ const EVICTION_INTERVAL_MS = 5 * 60 * 1000;
10
+ const OUTPUT_FLUSH_INTERVAL_MS = 1000;
11
11
  function describeProcessLaunchError(cli, error) {
12
12
  const code = error.code;
13
13
  if (code === "ENOENT") {
@@ -30,11 +30,6 @@ function describeWindowsLaunchExit(cli, exitCode) {
30
30
  message: `The '${cli}' command was not found. Install the ${cli} CLI and make sure it is on PATH.`,
31
31
  };
32
32
  }
33
- /**
34
- * U22 fix: deterministic canonicalisation of an env-var map for the dedup key.
35
- * Returns "" when env is undefined or empty (preserves dedup key continuity for
36
- * pre-U22 callers that pass no env).
37
- */
38
33
  function canonicaliseEnvForKey(env) {
39
34
  if (!env)
40
35
  return "";
@@ -75,10 +70,6 @@ export class AsyncJobManager {
75
70
  if (count > 0) {
76
71
  this.logger.info(`Marked ${count} in-flight job(s) as orphaned after gateway restart`);
77
72
  }
78
- // Slice 1.5: close out the FR row for each orphaned job. The FR
79
- // logComplete UPDATE has WHERE status='started' so pre-1.7.0 rows
80
- // (where the prior gateway never wrote a logStart) silently
81
- // no-op. Wrapped per-orphan so a single bad row can't tank boot.
82
73
  for (const orphan of orphaned) {
83
74
  try {
84
75
  const durationMs = Math.max(0, Date.now() - new Date(orphan.startedAt).getTime());
@@ -103,18 +94,10 @@ export class AsyncJobManager {
103
94
  }
104
95
  }
105
96
  this.evictionTimer = setInterval(() => this.evictCompletedJobs(), EVICTION_INTERVAL_MS);
106
- // Allow the process to exit even if the timer is active
107
97
  if (this.evictionTimer.unref) {
108
98
  this.evictionTimer.unref();
109
99
  }
110
100
  }
111
- /**
112
- * True iff a durable (or memory) job store is attached. The MCP-tool
113
- * registration layer ANDs this with persistence.asyncJobsEnabled when
114
- * deciding whether to register the *_request_async / llm_job_* tools.
115
- * Without a store, async tools must not be registered, otherwise we
116
- * re-open the silent in-memory loss path the structural invariant closes.
117
- */
118
101
  hasStore() {
119
102
  return this.store !== null;
120
103
  }
@@ -137,7 +120,6 @@ export class AsyncJobManager {
137
120
  evictCompletedJobs() {
138
121
  const now = Date.now();
139
122
  let evicted = 0;
140
- // Dead process auto-recovery: check for running jobs whose process no longer exists
141
123
  for (const [id, job] of this.jobs) {
142
124
  if (job.status === "running" && job.process && job.process.pid) {
143
125
  try {
@@ -157,10 +139,8 @@ export class AsyncJobManager {
157
139
  this.writeFlightComplete(job, "failed");
158
140
  this.fireOnComplete(job);
159
141
  }
160
- // EPERM: process exists but we can't signal it — ignore
161
142
  }
162
143
  }
163
- // Check for exited flag mismatch (close handler may have fired but status wasn't updated)
164
144
  if (job.status === "running" && job.exited) {
165
145
  job.status = "failed";
166
146
  job.error = "Process exited without proper status transition";
@@ -186,7 +166,6 @@ export class AsyncJobManager {
186
166
  if (evicted > 0) {
187
167
  this.logger.debug(`Evicted ${evicted} completed jobs from memory (durable store retains them)`);
188
168
  }
189
- // Sweep the durable store, too. Errors are non-fatal — the job rows just stay until next sweep.
190
169
  if (this.store) {
191
170
  try {
192
171
  const removed = this.store.evictExpired();
@@ -199,26 +178,7 @@ export class AsyncJobManager {
199
178
  }
200
179
  }
201
180
  }
202
- /**
203
- * Compute the dedup key for a job. Stable across re-issues of the same request,
204
- * which is exactly what allows agents to safely retry without restarting the run.
205
- *
206
- * U22 fix: env vars participate in the key via a deterministic canonicalisation
207
- * (sorted keys → JSON-stringified). This prevents two Mistral requests with the
208
- * same argv but different `VIBE_ACTIVE_MODEL` from deduping onto each other.
209
- */
210
181
  buildRequestKey(cli, args, env, stdin, cwd) {
211
- // Slice κ: stdin participates in the dedup key. Two Claude requests
212
- // with identical argv but different cache_control content blocks
213
- // would otherwise collide on dedup and the second caller would get
214
- // the wrong response. The legacy "no stdin" code path passes
215
- // stdin=undefined, which serialises to the same empty marker the
216
- // previous version emitted — non-κ dedup is unchanged.
217
- // Slice λ: cwd participates similarly. Two requests with identical
218
- // argv but different worktrees would otherwise collide on dedup and
219
- // the second caller would receive a response executed in the wrong
220
- // worktree. cwd=undefined preserves the pre-λ key shape — non-λ
221
- // dedup is unchanged.
222
182
  const extraEnv = canonicaliseEnvForKey(env);
223
183
  const withStdin = stdin === undefined ? extraEnv : `${extraEnv}|stdin:${stdin}`;
224
184
  const extra = cwd === undefined ? withStdin : `${withStdin}|cwd:${cwd}`;
@@ -237,26 +197,13 @@ export class AsyncJobManager {
237
197
  this.logger.error(`Job ${job.id} onComplete hook threw`, err);
238
198
  }
239
199
  }
240
- /**
241
- * Slice 1.5: write the terminal flight-recorder row. Mirrors sync-path
242
- * failure semantics (response = stderr||stdout on failure, errorMessage
243
- * falls back through overrideErrorMessage → job.error → job.stderr →
244
- * "Exit code N"). Single-shot guard set only on SUCCESSFUL write so a
245
- * thrown logComplete can be retried by a later terminal callback; the
246
- * FR's WHERE status='started' UPDATE guard remains the actual
247
- * idempotency mechanism for the common "retry succeeds, original
248
- * succeeded too" case.
249
- */
250
200
  writeFlightComplete(job, finalStatus, overrideErrorMessage) {
251
201
  if (!job.flightRecorderEntry)
252
- return; // never opted in
253
- // R2 Codex-Unit-B F1: only write when armed. Sync-inline requests are
254
- // NOT armed at startJob — the sync handler owns the rich-metadata
255
- // safeFlightComplete write. Pure async + sync-deferred ARE armed.
202
+ return;
256
203
  if (!job.flightCompleteArmed)
257
204
  return;
258
205
  if (job.flightRecorderComplete)
259
- return; // already wrote successfully
206
+ return;
260
207
  const durationMs = Math.max(0, Date.now() - new Date(job.startedAt).getTime());
261
208
  const usage = finalStatus === "completed" && job.extractUsage ? this.safeExtractUsage(job) : {};
262
209
  const isFailure = finalStatus === "failed";
@@ -281,11 +228,7 @@ export class AsyncJobManager {
281
228
  cacheCreationTokens: usage.cacheCreationTokens,
282
229
  costUsd: usage.costUsd,
283
230
  });
284
- // Only mark complete on successful write so a thrown logComplete
285
- // can be retried by the next terminal callback.
286
231
  job.flightRecorderComplete = true;
287
- // Clear retained references so the GC can reclaim anything the
288
- // extractUsage closure captured.
289
232
  job.flightRecorderEntry = undefined;
290
233
  job.extractUsage = undefined;
291
234
  }
@@ -302,27 +245,15 @@ export class AsyncJobManager {
302
245
  return {};
303
246
  }
304
247
  }
305
- /**
306
- * R2 Codex-Unit-B F1: awaitJobOrDefer calls this when returning a
307
- * deferred response. From this point on the sync handler will not write
308
- * its own safeFlightComplete, so the manager takes over.
309
- *
310
- * Race mitigation: if the job already terminated between the sync
311
- * deadline expiring and this method firing, write logComplete
312
- * synchronously here so the previously-skipped terminal callback's
313
- * write isn't lost.
314
- */
315
248
  armFlightCompleteForDeferral(jobId) {
316
249
  const job = this.jobs.get(jobId);
317
250
  if (!job)
318
251
  return;
319
252
  if (job.flightCompleteArmed)
320
- return; // pure async already armed
253
+ return;
321
254
  job.flightCompleteArmed = true;
322
255
  if (job.status === "running")
323
256
  return;
324
- // Job already terminal — the close handler's writeFlightComplete
325
- // saw flightCompleteArmed=false and skipped. Write now to recover.
326
257
  const finalStatus = job.status === "completed" ? "completed" : "failed";
327
258
  const override = job.canceled ? "canceled by caller" : undefined;
328
259
  this.writeFlightComplete(job, finalStatus, override);
@@ -337,11 +268,6 @@ export class AsyncJobManager {
337
268
  this.logger.error(`JobStore.${label} failed`, err);
338
269
  }
339
270
  }
340
- /**
341
- * Flush in-memory stdout/stderr to the durable store if anything changed
342
- * since the last flush. Throttled by OUTPUT_FLUSH_INTERVAL_MS to avoid
343
- * pounding sqlite on every chunk of streaming output.
344
- */
345
271
  maybeFlushOutput(job, force = false) {
346
272
  if (!this.store)
347
273
  return;
@@ -361,7 +287,6 @@ export class AsyncJobManager {
361
287
  return;
362
288
  if (!job.finishedAt)
363
289
  return;
364
- // Make sure the latest output is captured in the same row update.
365
290
  job.outputDirty = false;
366
291
  this.safeStoreCall("recordComplete", () => this.store.recordComplete({
367
292
  id: job.id,
@@ -374,11 +299,6 @@ export class AsyncJobManager {
374
299
  finishedAt: job.finishedAt,
375
300
  }));
376
301
  }
377
- /**
378
- * Reconstitute an in-memory AsyncJobRecord from a durable row, so subsequent
379
- * getJobSnapshot/getJobResult calls hit the in-memory cache.
380
- * The reconstituted record has process=null — it represents historical data only.
381
- */
382
302
  hydrateFromStore(jobId) {
383
303
  if (!this.store)
384
304
  return null;
@@ -426,11 +346,6 @@ export class AsyncJobManager {
426
346
  this.jobs.set(jobId, reconstituted);
427
347
  return reconstituted;
428
348
  }
429
- /**
430
- * Backwards-compatible entry point. Equivalent to startJobWithDedup({...}).snapshot.
431
- * Existing callers keep working unchanged; forceRefresh is exposed as a trailing
432
- * optional param for the dedup-aware path.
433
- */
434
349
  startJob(cli, args, correlationId, cwd, idleTimeoutMs, outputFormat, forceRefresh, env, onComplete, flightRecorderEntry, extractUsage, writeFlightStart, stdin) {
435
350
  return this.startJobWithDedup(cli, args, correlationId, {
436
351
  cwd,
@@ -445,14 +360,6 @@ export class AsyncJobManager {
445
360
  writeFlightStart,
446
361
  }).snapshot;
447
362
  }
448
- /**
449
- * Start a job, with optional dedup against recent identical requests.
450
- * Returns `{ snapshot, deduped }` so callers can log/report the short-circuit.
451
- *
452
- * Dedup is keyed on (cli, args). If a job with the same key was started within
453
- * the dedup window (default 1h) and is still running or completed, its snapshot
454
- * is returned without spawning a new process. forceRefresh skips dedup entirely.
455
- */
456
363
  startJobWithDedup(cli, args, correlationId, opts = {}) {
457
364
  const { cwd, idleTimeoutMs, outputFormat, forceRefresh, env: extraEnv, stdin, onComplete, flightRecorderEntry, extractUsage, writeFlightStart, } = opts;
458
365
  const requestKey = this.buildRequestKey(cli, args, extraEnv, stdin, cwd);
@@ -460,7 +367,6 @@ export class AsyncJobManager {
460
367
  try {
461
368
  const existing = this.store.findByRequestKey(requestKey);
462
369
  if (existing) {
463
- // Prefer the in-memory record if we still have it (live process, idle timers, etc).
464
370
  let record = this.jobs.get(existing.id);
465
371
  if (!record) {
466
372
  record = this.hydrateFromStore(existing.id) ?? undefined;
@@ -471,11 +377,6 @@ export class AsyncJobManager {
471
377
  originalCorrelationId: record.correlationId,
472
378
  status: record.status,
473
379
  });
474
- // U26 fix: the caller's per-request resources (e.g. outputSchema temp
475
- // file) are NOT consumed by the deduped job, which reuses its own
476
- // original resources. Release the new request's cleanup immediately
477
- // to avoid an orphaned temp file. The original job's onComplete (if
478
- // any) remains attached to that original job record.
479
380
  if (onComplete) {
480
381
  try {
481
382
  onComplete();
@@ -498,8 +399,6 @@ export class AsyncJobManager {
498
399
  }
499
400
  const id = randomUUID();
500
401
  const startedAt = new Date().toISOString();
501
- // Mistral Vibe ships as the `vibe` binary; the gateway uses `mistral` as the
502
- // provider key but spawns `vibe` on the shell.
503
402
  const command = cli === "mistral" ? "vibe" : cli;
504
403
  const baseEnv = envWithExtendedPath(process.env, getExtendedPath());
505
404
  const child = spawnCliProcess(command, args, {
@@ -516,7 +415,6 @@ export class AsyncJobManager {
516
415
  }
517
416
  child.stdin.end();
518
417
  }
519
- // Single cleanup flag to prevent double-unregister
520
418
  let groupCleaned = false;
521
419
  const cleanupGroup = () => {
522
420
  if (groupCleaned)
@@ -552,10 +450,6 @@ export class AsyncJobManager {
552
450
  flightRecorderEntry,
553
451
  extractUsage,
554
452
  flightRecorderComplete: false,
555
- // R2 Codex-Unit-B F1: pure async path arms now (writeFlightStart=true
556
- // means the manager is the only FR writer). Sync-deferred path
557
- // arrives with writeFlightStart=false and arms later via
558
- // armFlightCompleteForDeferral when awaitJobOrDefer decides to defer.
559
453
  flightCompleteArmed: writeFlightStart === true,
560
454
  };
561
455
  this.jobs.set(id, job);
@@ -569,10 +463,6 @@ export class AsyncJobManager {
569
463
  startedAt,
570
464
  pid: child.pid ?? null,
571
465
  }));
572
- // Slice 1.5: only opt-in callers (pure async handlers) write logStart
573
- // here. The sync-deferred path passes writeFlightStart=false because
574
- // the upstream sync handler already wrote a logStart row keyed on the
575
- // same correlationId; a duplicate INSERT would crash on the PK.
576
466
  if (writeFlightStart && flightRecorderEntry) {
577
467
  try {
578
468
  this.flightRecorder.logStart({
@@ -592,7 +482,6 @@ export class AsyncJobManager {
592
482
  }
593
483
  }
594
484
  this.logger.info(`Job ${id} started for ${cli}`, { correlationId });
595
- // Idle timeout: kill process if no output activity for idleTimeoutMs
596
485
  let idleTimerId;
597
486
  const resetIdleTimer = () => {
598
487
  if (!idleTimeoutMs || idleTimeoutMs <= 0)
@@ -655,7 +544,6 @@ export class AsyncJobManager {
655
544
  child.on("close", (code) => {
656
545
  job.exited = true;
657
546
  job.clearIdleTimer?.();
658
- // Unregister process group on clean exit (no kill was issued)
659
547
  if (!job.canceled && job.status === "running") {
660
548
  job.cleanupGroup?.();
661
549
  }
@@ -664,11 +552,7 @@ export class AsyncJobManager {
664
552
  if (!job.finishedAt) {
665
553
  job.finishedAt = new Date().toISOString();
666
554
  }
667
- // Ensure terminal state reaches the durable store (idle-timeout/output-overflow already persisted).
668
555
  this.persistComplete(job);
669
- // Slice 1.5: retry the FR complete write iff the earlier terminal
670
- // callback's logComplete threw. The single-shot guard in
671
- // writeFlightComplete makes this a no-op in the common case.
672
556
  const fallbackFlightStatus = job.status === "completed" ? "completed" : "failed";
673
557
  const fallbackOverride = job.status === "canceled" ? "canceled by caller" : undefined;
674
558
  this.writeFlightComplete(job, fallbackFlightStatus, fallbackOverride);
@@ -736,7 +620,6 @@ export class AsyncJobManager {
736
620
  if (job.status !== "running") {
737
621
  return { canceled: false, reason: `Job is already ${job.status}` };
738
622
  }
739
- // Reconstituted (orphaned) jobs have no live process to signal — refuse cancel.
740
623
  if (!job.process) {
741
624
  return {
742
625
  canceled: false,
@@ -777,7 +660,6 @@ export class AsyncJobManager {
777
660
  getJobHealth() {
778
661
  const running = this.getRunningJobs();
779
662
  const health = this.processMonitor.checkJobHealth(running);
780
- // Clean up stale CPU samples for PIDs that are no longer running
781
663
  const activePids = new Set(running.map(j => j.pid).filter((p) => p !== null));
782
664
  this.processMonitor.cleanupSamples(activePids);
783
665
  return {