llm-cli-gateway 1.0.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/README.md +153 -9
- package/dist/approval-manager.d.ts +1 -1
- package/dist/approval-manager.js +7 -4
- package/dist/async-job-manager.d.ts +53 -4
- package/dist/async-job-manager.js +254 -27
- package/dist/claude-mcp-config.js +7 -4
- package/dist/cli-updater.d.ts +38 -0
- package/dist/cli-updater.js +145 -0
- package/dist/config.js +15 -9
- package/dist/db.js +4 -4
- package/dist/executor.js +20 -13
- package/dist/flight-recorder.d.ts +48 -0
- package/dist/flight-recorder.js +220 -0
- package/dist/health.js +3 -3
- package/dist/index.d.ts +28 -0
- package/dist/index.js +1456 -278
- package/dist/job-store.d.ts +84 -0
- package/dist/job-store.js +251 -0
- package/dist/logger.js +1 -1
- package/dist/metrics.js +9 -12
- package/dist/migrate-sessions.js +2 -2
- package/dist/model-registry.d.ts +14 -0
- package/dist/model-registry.js +448 -140
- package/dist/optimizer.js +9 -9
- package/dist/process-monitor.js +24 -8
- package/dist/request-helpers.d.ts +48 -0
- package/dist/request-helpers.js +64 -2
- package/dist/resources.js +76 -32
- package/dist/retry.js +6 -4
- package/dist/review-integrity.d.ts +6 -38
- package/dist/review-integrity.js +41 -275
- package/dist/session-manager-pg.js +7 -4
- package/dist/session-manager.d.ts +1 -1
- package/dist/session-manager.js +9 -5
- package/dist/stream-json-parser.js +8 -6
- package/package.json +7 -4
|
@@ -1,18 +1,20 @@
|
|
|
1
1
|
import { spawn } from "child_process";
|
|
2
2
|
import { randomUUID } from "crypto";
|
|
3
|
-
import { getExtendedPath, killProcessGroup, registerProcessGroup, unregisterProcessGroup } from "./executor.js";
|
|
3
|
+
import { getExtendedPath, killProcessGroup, registerProcessGroup, unregisterProcessGroup, } from "./executor.js";
|
|
4
4
|
import { noopLogger } from "./logger.js";
|
|
5
5
|
import { ProcessMonitor } from "./process-monitor.js";
|
|
6
|
+
import { computeRequestKey } from "./job-store.js";
|
|
6
7
|
const MAX_OUTPUT_SIZE = 50 * 1024 * 1024;
|
|
7
|
-
const JOB_TTL_MS = 60 * 60 * 1000; // 1 hour
|
|
8
|
+
const JOB_TTL_MS = 60 * 60 * 1000; // 1 hour in-memory retention; durable store has its own (longer) retention
|
|
8
9
|
const EVICTION_INTERVAL_MS = 5 * 60 * 1000; // Check every 5 minutes
|
|
10
|
+
const OUTPUT_FLUSH_INTERVAL_MS = 1000; // Throttle DB writes for streaming stdout/stderr
|
|
9
11
|
function truncateText(value, maxChars) {
|
|
10
12
|
if (value.length <= maxChars) {
|
|
11
13
|
return { text: value, truncated: false };
|
|
12
14
|
}
|
|
13
15
|
return {
|
|
14
16
|
text: value.slice(value.length - maxChars),
|
|
15
|
-
truncated: true
|
|
17
|
+
truncated: true,
|
|
16
18
|
};
|
|
17
19
|
}
|
|
18
20
|
export class AsyncJobManager {
|
|
@@ -21,10 +23,23 @@ export class AsyncJobManager {
|
|
|
21
23
|
jobs = new Map();
|
|
22
24
|
evictionTimer = null;
|
|
23
25
|
processMonitor;
|
|
24
|
-
|
|
26
|
+
store;
|
|
27
|
+
constructor(logger = noopLogger, onJobComplete, store = null) {
|
|
25
28
|
this.logger = logger;
|
|
26
29
|
this.onJobComplete = onJobComplete;
|
|
27
30
|
this.processMonitor = new ProcessMonitor(logger);
|
|
31
|
+
this.store = store;
|
|
32
|
+
if (this.store) {
|
|
33
|
+
try {
|
|
34
|
+
const orphaned = this.store.markOrphanedOnStartup();
|
|
35
|
+
if (orphaned > 0) {
|
|
36
|
+
this.logger.info(`Marked ${orphaned} in-flight job(s) as orphaned after gateway restart`);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
catch (err) {
|
|
40
|
+
this.logger.error("markOrphanedOnStartup failed", err);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
28
43
|
this.evictionTimer = setInterval(() => this.evictCompletedJobs(), EVICTION_INTERVAL_MS);
|
|
29
44
|
// Allow the process to exit even if the timer is active
|
|
30
45
|
if (this.evictionTimer.unref) {
|
|
@@ -52,7 +67,7 @@ export class AsyncJobManager {
|
|
|
52
67
|
let evicted = 0;
|
|
53
68
|
// Dead process auto-recovery: check for running jobs whose process no longer exists
|
|
54
69
|
for (const [id, job] of this.jobs) {
|
|
55
|
-
if (job.status === "running" && job.process.pid) {
|
|
70
|
+
if (job.status === "running" && job.process && job.process.pid) {
|
|
56
71
|
try {
|
|
57
72
|
process.kill(job.process.pid, 0);
|
|
58
73
|
}
|
|
@@ -66,6 +81,7 @@ export class AsyncJobManager {
|
|
|
66
81
|
unregisterProcessGroup(job.process.pid);
|
|
67
82
|
this.logger.error(`Job ${id} process ${job.process.pid} no longer exists, marking as failed`);
|
|
68
83
|
this.emitMetrics(job);
|
|
84
|
+
this.persistComplete(job);
|
|
69
85
|
}
|
|
70
86
|
// EPERM: process exists but we can't signal it — ignore
|
|
71
87
|
}
|
|
@@ -75,10 +91,11 @@ export class AsyncJobManager {
|
|
|
75
91
|
job.status = "failed";
|
|
76
92
|
job.error = "Process exited without proper status transition";
|
|
77
93
|
job.finishedAt = job.finishedAt || new Date().toISOString();
|
|
78
|
-
if (job.process.pid)
|
|
94
|
+
if (job.process && job.process.pid)
|
|
79
95
|
unregisterProcessGroup(job.process.pid);
|
|
80
96
|
this.logger.error(`Job ${id} has exited flag but was still in running state, marking as failed`);
|
|
81
97
|
this.emitMetrics(job);
|
|
98
|
+
this.persistComplete(job);
|
|
82
99
|
}
|
|
83
100
|
}
|
|
84
101
|
for (const [id, job] of this.jobs) {
|
|
@@ -91,17 +108,185 @@ export class AsyncJobManager {
|
|
|
91
108
|
}
|
|
92
109
|
}
|
|
93
110
|
if (evicted > 0) {
|
|
94
|
-
this.logger.debug(`Evicted ${evicted} completed jobs`);
|
|
111
|
+
this.logger.debug(`Evicted ${evicted} completed jobs from memory (durable store retains them)`);
|
|
112
|
+
}
|
|
113
|
+
// Sweep the durable store, too. Errors are non-fatal — the job rows just stay until next sweep.
|
|
114
|
+
if (this.store) {
|
|
115
|
+
try {
|
|
116
|
+
const removed = this.store.evictExpired();
|
|
117
|
+
if (removed > 0) {
|
|
118
|
+
this.logger.debug(`Evicted ${removed} expired jobs from durable store`);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
catch (err) {
|
|
122
|
+
this.logger.error("durable store eviction failed", err);
|
|
123
|
+
}
|
|
95
124
|
}
|
|
96
125
|
}
|
|
97
|
-
|
|
126
|
+
/**
|
|
127
|
+
* Compute the dedup key for a job. Stable across re-issues of the same request,
|
|
128
|
+
* which is exactly what allows agents to safely retry without restarting the run.
|
|
129
|
+
*/
|
|
130
|
+
buildRequestKey(cli, args) {
|
|
131
|
+
return computeRequestKey(cli, args);
|
|
132
|
+
}
|
|
133
|
+
safeStoreCall(label, fn) {
|
|
134
|
+
if (!this.store)
|
|
135
|
+
return;
|
|
136
|
+
try {
|
|
137
|
+
fn();
|
|
138
|
+
}
|
|
139
|
+
catch (err) {
|
|
140
|
+
this.logger.error(`JobStore.${label} failed`, err);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Flush in-memory stdout/stderr to the durable store if anything changed
|
|
145
|
+
* since the last flush. Throttled by OUTPUT_FLUSH_INTERVAL_MS to avoid
|
|
146
|
+
* pounding sqlite on every chunk of streaming output.
|
|
147
|
+
*/
|
|
148
|
+
maybeFlushOutput(job, force = false) {
|
|
149
|
+
if (!this.store)
|
|
150
|
+
return;
|
|
151
|
+
if (!job.outputDirty)
|
|
152
|
+
return;
|
|
153
|
+
const now = Date.now();
|
|
154
|
+
if (!force && now - job.lastOutputFlushAt < OUTPUT_FLUSH_INTERVAL_MS)
|
|
155
|
+
return;
|
|
156
|
+
job.outputDirty = false;
|
|
157
|
+
job.lastOutputFlushAt = now;
|
|
158
|
+
this.safeStoreCall("recordOutput", () => this.store.recordOutput(job.id, job.stdout, job.stderr, job.outputTruncated));
|
|
159
|
+
}
|
|
160
|
+
persistComplete(job) {
|
|
161
|
+
if (!this.store)
|
|
162
|
+
return;
|
|
163
|
+
if (job.status === "running")
|
|
164
|
+
return;
|
|
165
|
+
if (!job.finishedAt)
|
|
166
|
+
return;
|
|
167
|
+
// Make sure the latest output is captured in the same row update.
|
|
168
|
+
job.outputDirty = false;
|
|
169
|
+
this.safeStoreCall("recordComplete", () => this.store.recordComplete({
|
|
170
|
+
id: job.id,
|
|
171
|
+
status: job.status === "running" ? "failed" : job.status,
|
|
172
|
+
exitCode: job.exitCode,
|
|
173
|
+
stdout: job.stdout,
|
|
174
|
+
stderr: job.stderr,
|
|
175
|
+
outputTruncated: job.outputTruncated,
|
|
176
|
+
error: job.error,
|
|
177
|
+
finishedAt: job.finishedAt,
|
|
178
|
+
}));
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Reconstitute an in-memory AsyncJobRecord from a durable row, so subsequent
|
|
182
|
+
* getJobSnapshot/getJobResult calls hit the in-memory cache.
|
|
183
|
+
* The reconstituted record has process=null — it represents historical data only.
|
|
184
|
+
*/
|
|
185
|
+
hydrateFromStore(jobId) {
|
|
186
|
+
if (!this.store)
|
|
187
|
+
return null;
|
|
188
|
+
let row;
|
|
189
|
+
try {
|
|
190
|
+
row = this.store.getById(jobId);
|
|
191
|
+
}
|
|
192
|
+
catch (err) {
|
|
193
|
+
this.logger.error("JobStore.getById failed", err);
|
|
194
|
+
return null;
|
|
195
|
+
}
|
|
196
|
+
if (!row)
|
|
197
|
+
return null;
|
|
198
|
+
const args = (() => {
|
|
199
|
+
try {
|
|
200
|
+
const parsed = JSON.parse(row.argsJson);
|
|
201
|
+
return Array.isArray(parsed) ? parsed.map(String) : [];
|
|
202
|
+
}
|
|
203
|
+
catch {
|
|
204
|
+
return [];
|
|
205
|
+
}
|
|
206
|
+
})();
|
|
207
|
+
const reconstituted = {
|
|
208
|
+
id: row.id,
|
|
209
|
+
cli: row.cli,
|
|
210
|
+
args,
|
|
211
|
+
requestKey: row.requestKey,
|
|
212
|
+
correlationId: row.correlationId,
|
|
213
|
+
status: row.status,
|
|
214
|
+
startedAt: row.startedAt,
|
|
215
|
+
finishedAt: row.finishedAt,
|
|
216
|
+
exitCode: row.exitCode,
|
|
217
|
+
stdout: row.stdout,
|
|
218
|
+
stderr: row.stderr,
|
|
219
|
+
outputTruncated: row.outputTruncated,
|
|
220
|
+
canceled: row.status === "canceled",
|
|
221
|
+
error: row.error,
|
|
222
|
+
process: null,
|
|
223
|
+
exited: row.status !== "running",
|
|
224
|
+
metricsRecorded: true,
|
|
225
|
+
outputFormat: row.outputFormat ?? undefined,
|
|
226
|
+
outputDirty: false,
|
|
227
|
+
lastOutputFlushAt: Date.now(),
|
|
228
|
+
};
|
|
229
|
+
this.jobs.set(jobId, reconstituted);
|
|
230
|
+
return reconstituted;
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Backwards-compatible entry point. Equivalent to startJobWithDedup({...}).snapshot.
|
|
234
|
+
* Existing callers keep working unchanged; forceRefresh is exposed as a trailing
|
|
235
|
+
* optional param for the dedup-aware path.
|
|
236
|
+
*/
|
|
237
|
+
startJob(cli, args, correlationId, cwd, idleTimeoutMs, outputFormat, forceRefresh) {
|
|
238
|
+
return this.startJobWithDedup(cli, args, correlationId, {
|
|
239
|
+
cwd,
|
|
240
|
+
idleTimeoutMs,
|
|
241
|
+
outputFormat,
|
|
242
|
+
forceRefresh,
|
|
243
|
+
}).snapshot;
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Start a job, with optional dedup against recent identical requests.
|
|
247
|
+
* Returns `{ snapshot, deduped }` so callers can log/report the short-circuit.
|
|
248
|
+
*
|
|
249
|
+
* Dedup is keyed on (cli, args). If a job with the same key was started within
|
|
250
|
+
* the dedup window (default 1h) and is still running or completed, its snapshot
|
|
251
|
+
* is returned without spawning a new process. forceRefresh skips dedup entirely.
|
|
252
|
+
*/
|
|
253
|
+
startJobWithDedup(cli, args, correlationId, opts = {}) {
|
|
254
|
+
const { cwd, idleTimeoutMs, outputFormat, forceRefresh } = opts;
|
|
255
|
+
const requestKey = this.buildRequestKey(cli, args);
|
|
256
|
+
if (!forceRefresh && this.store) {
|
|
257
|
+
try {
|
|
258
|
+
const existing = this.store.findByRequestKey(requestKey);
|
|
259
|
+
if (existing) {
|
|
260
|
+
// Prefer the in-memory record if we still have it (live process, idle timers, etc).
|
|
261
|
+
let record = this.jobs.get(existing.id);
|
|
262
|
+
if (!record) {
|
|
263
|
+
record = this.hydrateFromStore(existing.id) ?? undefined;
|
|
264
|
+
}
|
|
265
|
+
if (record) {
|
|
266
|
+
this.logger.info(`Job ${existing.id} reused via dedup for ${cli}`, {
|
|
267
|
+
correlationId,
|
|
268
|
+
originalCorrelationId: record.correlationId,
|
|
269
|
+
status: record.status,
|
|
270
|
+
});
|
|
271
|
+
return {
|
|
272
|
+
snapshot: this.snapshot(record),
|
|
273
|
+
deduped: true,
|
|
274
|
+
originalCorrelationId: record.correlationId,
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
catch (err) {
|
|
280
|
+
this.logger.error("dedup lookup failed; proceeding with fresh run", err);
|
|
281
|
+
}
|
|
282
|
+
}
|
|
98
283
|
const id = randomUUID();
|
|
99
284
|
const startedAt = new Date().toISOString();
|
|
100
285
|
const child = spawn(cli, args, {
|
|
101
286
|
cwd,
|
|
102
287
|
detached: true,
|
|
103
288
|
stdio: ["ignore", "pipe", "pipe"],
|
|
104
|
-
env: { ...process.env, PATH: getExtendedPath() }
|
|
289
|
+
env: { ...process.env, PATH: getExtendedPath() },
|
|
105
290
|
});
|
|
106
291
|
if (child.pid)
|
|
107
292
|
registerProcessGroup(child.pid);
|
|
@@ -119,6 +304,7 @@ export class AsyncJobManager {
|
|
|
119
304
|
id,
|
|
120
305
|
cli,
|
|
121
306
|
args: [...args],
|
|
307
|
+
requestKey,
|
|
122
308
|
correlationId,
|
|
123
309
|
status: "running",
|
|
124
310
|
startedAt,
|
|
@@ -133,9 +319,21 @@ export class AsyncJobManager {
|
|
|
133
319
|
exited: false,
|
|
134
320
|
metricsRecorded: false,
|
|
135
321
|
outputFormat,
|
|
136
|
-
cleanupGroup
|
|
322
|
+
cleanupGroup,
|
|
323
|
+
outputDirty: false,
|
|
324
|
+
lastOutputFlushAt: Date.now(),
|
|
137
325
|
};
|
|
138
326
|
this.jobs.set(id, job);
|
|
327
|
+
this.safeStoreCall("recordStart", () => this.store.recordStart({
|
|
328
|
+
id,
|
|
329
|
+
correlationId,
|
|
330
|
+
requestKey,
|
|
331
|
+
cli,
|
|
332
|
+
args: [...args],
|
|
333
|
+
outputFormat,
|
|
334
|
+
startedAt,
|
|
335
|
+
pid: child.pid ?? null,
|
|
336
|
+
}));
|
|
139
337
|
this.logger.info(`Job ${id} started for ${cli}`, { correlationId });
|
|
140
338
|
// Idle timeout: kill process if no output activity for idleTimeoutMs
|
|
141
339
|
let idleTimerId;
|
|
@@ -151,11 +349,15 @@ export class AsyncJobManager {
|
|
|
151
349
|
job.exitCode = 125;
|
|
152
350
|
job.error = `Process killed after ${idleTimeoutMs}ms of inactivity`;
|
|
153
351
|
job.finishedAt = new Date().toISOString();
|
|
154
|
-
|
|
155
|
-
|
|
352
|
+
if (job.process)
|
|
353
|
+
killProcessGroup(job.process, "SIGTERM");
|
|
354
|
+
this.logger.info(`Job ${id} killed due to inactivity (${idleTimeoutMs}ms)`, {
|
|
355
|
+
correlationId,
|
|
356
|
+
});
|
|
156
357
|
this.emitMetrics(job);
|
|
358
|
+
this.persistComplete(job);
|
|
157
359
|
setTimeout(() => {
|
|
158
|
-
if (!job.exited)
|
|
360
|
+
if (!job.exited && job.process)
|
|
159
361
|
killProcessGroup(job.process, "SIGKILL");
|
|
160
362
|
job.cleanupGroup?.();
|
|
161
363
|
}, 5000);
|
|
@@ -183,6 +385,7 @@ export class AsyncJobManager {
|
|
|
183
385
|
job.finishedAt = new Date().toISOString();
|
|
184
386
|
this.logger.error(`Job ${id} error: ${error.message}`, { correlationId });
|
|
185
387
|
this.emitMetrics(job);
|
|
388
|
+
this.persistComplete(job);
|
|
186
389
|
}
|
|
187
390
|
});
|
|
188
391
|
child.on("close", (code) => {
|
|
@@ -197,6 +400,8 @@ export class AsyncJobManager {
|
|
|
197
400
|
if (!job.finishedAt) {
|
|
198
401
|
job.finishedAt = new Date().toISOString();
|
|
199
402
|
}
|
|
403
|
+
// Ensure terminal state reaches the durable store (idle-timeout/output-overflow already persisted).
|
|
404
|
+
this.persistComplete(job);
|
|
200
405
|
return;
|
|
201
406
|
}
|
|
202
407
|
job.exitCode = code ?? 0;
|
|
@@ -211,20 +416,25 @@ export class AsyncJobManager {
|
|
|
211
416
|
job.status = "failed";
|
|
212
417
|
}
|
|
213
418
|
this.emitMetrics(job);
|
|
419
|
+
this.persistComplete(job);
|
|
214
420
|
});
|
|
215
|
-
return this.snapshot(job);
|
|
421
|
+
return { snapshot: this.snapshot(job), deduped: false };
|
|
216
422
|
}
|
|
217
423
|
getJobSnapshot(jobId) {
|
|
218
|
-
|
|
424
|
+
let job = this.jobs.get(jobId);
|
|
219
425
|
if (!job) {
|
|
220
|
-
|
|
426
|
+
job = this.hydrateFromStore(jobId) ?? undefined;
|
|
427
|
+
if (!job)
|
|
428
|
+
return null;
|
|
221
429
|
}
|
|
222
430
|
return this.snapshot(job);
|
|
223
431
|
}
|
|
224
432
|
getJobResult(jobId, maxChars = 200000) {
|
|
225
|
-
|
|
433
|
+
let job = this.jobs.get(jobId);
|
|
226
434
|
if (!job) {
|
|
227
|
-
|
|
435
|
+
job = this.hydrateFromStore(jobId) ?? undefined;
|
|
436
|
+
if (!job)
|
|
437
|
+
return null;
|
|
228
438
|
}
|
|
229
439
|
const stdout = truncateText(job.stdout, maxChars);
|
|
230
440
|
const stderr = truncateText(job.stderr, maxChars);
|
|
@@ -233,7 +443,7 @@ export class AsyncJobManager {
|
|
|
233
443
|
stdout: stdout.text,
|
|
234
444
|
stderr: stderr.text,
|
|
235
445
|
stdoutTruncated: stdout.truncated,
|
|
236
|
-
stderrTruncated: stderr.truncated
|
|
446
|
+
stderrTruncated: stderr.truncated,
|
|
237
447
|
};
|
|
238
448
|
}
|
|
239
449
|
cancelJob(jobId) {
|
|
@@ -244,14 +454,22 @@ export class AsyncJobManager {
|
|
|
244
454
|
if (job.status !== "running") {
|
|
245
455
|
return { canceled: false, reason: `Job is already ${job.status}` };
|
|
246
456
|
}
|
|
457
|
+
// Reconstituted (orphaned) jobs have no live process to signal — refuse cancel.
|
|
458
|
+
if (!job.process) {
|
|
459
|
+
return {
|
|
460
|
+
canceled: false,
|
|
461
|
+
reason: "Job has no live process (orphaned from prior gateway run)",
|
|
462
|
+
};
|
|
463
|
+
}
|
|
247
464
|
job.canceled = true;
|
|
248
465
|
job.status = "canceled";
|
|
249
466
|
job.finishedAt = new Date().toISOString();
|
|
250
467
|
job.clearIdleTimer?.();
|
|
251
468
|
killProcessGroup(job.process, "SIGTERM");
|
|
252
469
|
this.logger.info(`Job ${jobId} canceled`, { correlationId: job.correlationId });
|
|
470
|
+
this.persistComplete(job);
|
|
253
471
|
setTimeout(() => {
|
|
254
|
-
if (!job.exited)
|
|
472
|
+
if (!job.exited && job.process)
|
|
255
473
|
killProcessGroup(job.process, "SIGKILL");
|
|
256
474
|
job.cleanupGroup?.();
|
|
257
475
|
}, 5000);
|
|
@@ -262,8 +480,11 @@ export class AsyncJobManager {
|
|
|
262
480
|
for (const [id, job] of this.jobs) {
|
|
263
481
|
if (job.status === "running") {
|
|
264
482
|
result.push({
|
|
265
|
-
jobId: id,
|
|
266
|
-
|
|
483
|
+
jobId: id,
|
|
484
|
+
cli: job.cli,
|
|
485
|
+
status: job.status,
|
|
486
|
+
pid: job.process?.pid ?? null,
|
|
487
|
+
startedAt: job.startedAt,
|
|
267
488
|
});
|
|
268
489
|
}
|
|
269
490
|
}
|
|
@@ -279,7 +500,7 @@ export class AsyncJobManager {
|
|
|
279
500
|
runningJobs: running.length,
|
|
280
501
|
deadJobs: health.filter(h => h.isDead).length,
|
|
281
502
|
zombieJobs: health.filter(h => h.isZombie).length,
|
|
282
|
-
jobs: health
|
|
503
|
+
jobs: health,
|
|
283
504
|
};
|
|
284
505
|
}
|
|
285
506
|
getJobOutputFormat(jobId) {
|
|
@@ -298,7 +519,7 @@ export class AsyncJobManager {
|
|
|
298
519
|
stdoutBytes: Buffer.byteLength(job.stdout),
|
|
299
520
|
stderrBytes: Buffer.byteLength(job.stderr),
|
|
300
521
|
error: job.error,
|
|
301
|
-
exited: job.exited
|
|
522
|
+
exited: job.exited,
|
|
302
523
|
};
|
|
303
524
|
}
|
|
304
525
|
appendOutput(job, stream, chunk) {
|
|
@@ -311,11 +532,15 @@ export class AsyncJobManager {
|
|
|
311
532
|
job.error = "Output exceeded maximum size (50MB)";
|
|
312
533
|
job.finishedAt = new Date().toISOString();
|
|
313
534
|
job.clearIdleTimer?.();
|
|
314
|
-
|
|
315
|
-
|
|
535
|
+
if (job.process)
|
|
536
|
+
killProcessGroup(job.process, "SIGTERM");
|
|
537
|
+
this.logger.info(`Job ${job.id} killed due to output overflow`, {
|
|
538
|
+
correlationId: job.correlationId,
|
|
539
|
+
});
|
|
316
540
|
this.emitMetrics(job);
|
|
541
|
+
this.persistComplete(job);
|
|
317
542
|
setTimeout(() => {
|
|
318
|
-
if (!job.exited)
|
|
543
|
+
if (!job.exited && job.process)
|
|
319
544
|
killProcessGroup(job.process, "SIGKILL");
|
|
320
545
|
job.cleanupGroup?.();
|
|
321
546
|
}, 5000);
|
|
@@ -330,5 +555,7 @@ export class AsyncJobManager {
|
|
|
330
555
|
else {
|
|
331
556
|
job.stderr += text;
|
|
332
557
|
}
|
|
558
|
+
job.outputDirty = true;
|
|
559
|
+
this.maybeFlushOutput(job);
|
|
333
560
|
}
|
|
334
561
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync, renameSync, openSync, fsyncSync, closeSync, chmodSync } from "fs";
|
|
1
|
+
import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync, renameSync, openSync, fsyncSync, closeSync, chmodSync, } from "fs";
|
|
2
2
|
import { homedir } from "os";
|
|
3
3
|
import { dirname, join } from "path";
|
|
4
4
|
import { parse as parseToml } from "toml";
|
|
@@ -45,7 +45,7 @@ function readCodexServerConfig(server) {
|
|
|
45
45
|
return {
|
|
46
46
|
command,
|
|
47
47
|
args,
|
|
48
|
-
env
|
|
48
|
+
env,
|
|
49
49
|
};
|
|
50
50
|
}
|
|
51
51
|
catch {
|
|
@@ -120,7 +120,7 @@ function toClaudeServerDef(server) {
|
|
|
120
120
|
return {
|
|
121
121
|
command,
|
|
122
122
|
args,
|
|
123
|
-
...(Object.keys(env).length > 0 ? { env } : {})
|
|
123
|
+
...(Object.keys(env).length > 0 ? { env } : {}),
|
|
124
124
|
};
|
|
125
125
|
}
|
|
126
126
|
export function buildClaudeMcpConfig(servers) {
|
|
@@ -142,7 +142,10 @@ export function buildClaudeMcpConfig(servers) {
|
|
|
142
142
|
try {
|
|
143
143
|
mkdirSync(configDir, { recursive: true });
|
|
144
144
|
const tempPath = `${configPath}.tmp.${process.pid}`;
|
|
145
|
-
writeFileSync(tempPath, JSON.stringify({ mcpServers }, null, 2), {
|
|
145
|
+
writeFileSync(tempPath, JSON.stringify({ mcpServers }, null, 2), {
|
|
146
|
+
encoding: "utf-8",
|
|
147
|
+
mode: 0o600,
|
|
148
|
+
});
|
|
146
149
|
const fd = openSync(tempPath, "r+");
|
|
147
150
|
try {
|
|
148
151
|
fsyncSync(fd);
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import type { Logger } from "./logger.js";
|
|
2
|
+
import type { CliType } from "./session-manager.js";
|
|
3
|
+
export interface CliVersionInfo {
|
|
4
|
+
cli: CliType;
|
|
5
|
+
command: string;
|
|
6
|
+
args: string[];
|
|
7
|
+
installed: boolean;
|
|
8
|
+
version?: string;
|
|
9
|
+
stdout: string;
|
|
10
|
+
stderr: string;
|
|
11
|
+
error?: string;
|
|
12
|
+
}
|
|
13
|
+
export interface CliUpgradePlan {
|
|
14
|
+
cli: CliType;
|
|
15
|
+
target: string;
|
|
16
|
+
command: string;
|
|
17
|
+
args: string[];
|
|
18
|
+
strategy: "self-update" | "npm-global-install";
|
|
19
|
+
requiresNetwork: boolean;
|
|
20
|
+
note?: string;
|
|
21
|
+
}
|
|
22
|
+
export interface CliUpgradeResult {
|
|
23
|
+
dryRun: boolean;
|
|
24
|
+
plan: CliUpgradePlan;
|
|
25
|
+
stdout?: string;
|
|
26
|
+
stderr?: string;
|
|
27
|
+
exitCode?: number;
|
|
28
|
+
}
|
|
29
|
+
export declare function buildCliUpgradePlan(cli: CliType, target?: string): CliUpgradePlan;
|
|
30
|
+
export declare function getCliVersion(cli: CliType): Promise<CliVersionInfo>;
|
|
31
|
+
export declare function getCliVersions(cli?: CliType): Promise<CliVersionInfo[]>;
|
|
32
|
+
export declare function runCliUpgrade(params: {
|
|
33
|
+
cli: CliType;
|
|
34
|
+
target?: string;
|
|
35
|
+
dryRun: boolean;
|
|
36
|
+
timeoutMs?: number;
|
|
37
|
+
logger?: Logger;
|
|
38
|
+
}): Promise<CliUpgradeResult>;
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import { executeCli } from "./executor.js";
|
|
2
|
+
const VERSION_ARGS = {
|
|
3
|
+
claude: ["--version"],
|
|
4
|
+
codex: ["--version"],
|
|
5
|
+
gemini: ["--version"],
|
|
6
|
+
grok: ["--version"],
|
|
7
|
+
};
|
|
8
|
+
const NPM_PACKAGES = {
|
|
9
|
+
codex: "@openai/codex",
|
|
10
|
+
gemini: "@google/gemini-cli",
|
|
11
|
+
};
|
|
12
|
+
export function buildCliUpgradePlan(cli, target = "latest") {
|
|
13
|
+
const normalizedTarget = normalizeTarget(target);
|
|
14
|
+
if (cli === "claude") {
|
|
15
|
+
if (normalizedTarget === "latest") {
|
|
16
|
+
return {
|
|
17
|
+
cli,
|
|
18
|
+
target: normalizedTarget,
|
|
19
|
+
command: "claude",
|
|
20
|
+
args: ["update"],
|
|
21
|
+
strategy: "self-update",
|
|
22
|
+
requiresNetwork: true,
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
return {
|
|
26
|
+
cli,
|
|
27
|
+
target: normalizedTarget,
|
|
28
|
+
command: "claude",
|
|
29
|
+
args: ["install", normalizedTarget],
|
|
30
|
+
strategy: "self-update",
|
|
31
|
+
requiresNetwork: true,
|
|
32
|
+
note: "Claude Code supports explicit install targets through 'claude install <target>'.",
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
if (cli === "grok") {
|
|
36
|
+
if (normalizedTarget === "latest") {
|
|
37
|
+
return {
|
|
38
|
+
cli,
|
|
39
|
+
target: normalizedTarget,
|
|
40
|
+
command: "grok",
|
|
41
|
+
args: ["update"],
|
|
42
|
+
strategy: "self-update",
|
|
43
|
+
requiresNetwork: true,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
return {
|
|
47
|
+
cli,
|
|
48
|
+
target: normalizedTarget,
|
|
49
|
+
command: "grok",
|
|
50
|
+
args: ["update", "--version", normalizedTarget],
|
|
51
|
+
strategy: "self-update",
|
|
52
|
+
requiresNetwork: true,
|
|
53
|
+
note: "Grok CLI supports explicit version targets via 'grok update --version <target>'.",
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
if (cli === "codex" && normalizedTarget === "latest") {
|
|
57
|
+
return {
|
|
58
|
+
cli,
|
|
59
|
+
target: normalizedTarget,
|
|
60
|
+
command: "codex",
|
|
61
|
+
args: ["update"],
|
|
62
|
+
strategy: "self-update",
|
|
63
|
+
requiresNetwork: true,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
const packageName = cli === "codex" ? NPM_PACKAGES.codex : NPM_PACKAGES.gemini;
|
|
67
|
+
return {
|
|
68
|
+
cli,
|
|
69
|
+
target: normalizedTarget,
|
|
70
|
+
command: "npm",
|
|
71
|
+
args: ["install", "-g", `${packageName}@${normalizedTarget}`],
|
|
72
|
+
strategy: "npm-global-install",
|
|
73
|
+
requiresNetwork: true,
|
|
74
|
+
note: cli === "codex"
|
|
75
|
+
? "Explicit Codex targets use the documented npm package path; latest can use 'codex update'."
|
|
76
|
+
: "Gemini CLI does not expose a self-update command in the gateway-supported CLI surface, so upgrades use npm.",
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
export async function getCliVersion(cli) {
|
|
80
|
+
const args = VERSION_ARGS[cli];
|
|
81
|
+
try {
|
|
82
|
+
const result = await executeCli(cli, args, { timeout: 15_000 });
|
|
83
|
+
return {
|
|
84
|
+
cli,
|
|
85
|
+
command: cli,
|
|
86
|
+
args,
|
|
87
|
+
installed: true,
|
|
88
|
+
version: extractVersion(result.stdout, result.stderr),
|
|
89
|
+
stdout: result.stdout,
|
|
90
|
+
stderr: result.stderr,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
catch (error) {
|
|
94
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
95
|
+
return {
|
|
96
|
+
cli,
|
|
97
|
+
command: cli,
|
|
98
|
+
args,
|
|
99
|
+
installed: false,
|
|
100
|
+
stdout: "",
|
|
101
|
+
stderr: "",
|
|
102
|
+
error: message,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
export async function getCliVersions(cli) {
|
|
107
|
+
const clis = cli ? [cli] : ["claude", "codex", "gemini", "grok"];
|
|
108
|
+
return Promise.all(clis.map(item => getCliVersion(item)));
|
|
109
|
+
}
|
|
110
|
+
export async function runCliUpgrade(params) {
|
|
111
|
+
const plan = buildCliUpgradePlan(params.cli, params.target);
|
|
112
|
+
if (params.dryRun) {
|
|
113
|
+
return { dryRun: true, plan };
|
|
114
|
+
}
|
|
115
|
+
params.logger?.info(`Upgrading ${params.cli} CLI`, {
|
|
116
|
+
target: plan.target,
|
|
117
|
+
command: plan.command,
|
|
118
|
+
args: plan.args,
|
|
119
|
+
});
|
|
120
|
+
const result = await executeCli(plan.command, plan.args, {
|
|
121
|
+
timeout: params.timeoutMs ?? 600_000,
|
|
122
|
+
logger: params.logger,
|
|
123
|
+
});
|
|
124
|
+
return {
|
|
125
|
+
dryRun: false,
|
|
126
|
+
plan,
|
|
127
|
+
stdout: result.stdout,
|
|
128
|
+
stderr: result.stderr,
|
|
129
|
+
exitCode: result.code,
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
function normalizeTarget(target) {
|
|
133
|
+
const normalized = target.trim();
|
|
134
|
+
if (!normalized || normalized.startsWith("-") || /[\u0000-\u001f\u007f\s]/.test(normalized)) {
|
|
135
|
+
throw new Error("Upgrade target must be a non-empty package tag or version without whitespace and cannot start with '-'");
|
|
136
|
+
}
|
|
137
|
+
return normalized;
|
|
138
|
+
}
|
|
139
|
+
function extractVersion(stdout, stderr) {
|
|
140
|
+
const text = `${stdout}\n${stderr}`
|
|
141
|
+
.split(/\r?\n/)
|
|
142
|
+
.map(line => line.trim())
|
|
143
|
+
.find(line => line.length > 0);
|
|
144
|
+
return text || undefined;
|
|
145
|
+
}
|