labgate 0.5.43 → 0.5.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +7 -0
- package/dist/cli.js.map +1 -1
- package/dist/lib/container.d.ts +13 -1
- package/dist/lib/container.js +404 -33
- package/dist/lib/container.js.map +1 -1
- package/dist/lib/image-pull-lock.d.ts +5 -0
- package/dist/lib/image-pull-lock.js +4 -0
- package/dist/lib/image-pull-lock.js.map +1 -1
- package/package.json +1 -1
package/dist/lib/container.js
CHANGED
|
@@ -42,7 +42,9 @@ exports.imageToSifName = imageToSifName;
|
|
|
42
42
|
exports.isUsableApptainerSif = isUsableApptainerSif;
|
|
43
43
|
exports.ensureSifImage = ensureSifImage;
|
|
44
44
|
exports.resolveSlurmProxyPathToHost = resolveSlurmProxyPathToHost;
|
|
45
|
+
exports.listenOnUnixSocket = listenOnUnixSocket;
|
|
45
46
|
exports.buildCodexOauthPublishSpec = buildCodexOauthPublishSpec;
|
|
47
|
+
exports.describeCliStartupPhase = describeCliStartupPhase;
|
|
46
48
|
exports.buildEntrypoint = buildEntrypoint;
|
|
47
49
|
exports.setupBrowserHook = setupBrowserHook;
|
|
48
50
|
exports.getAgentTokenEnv = getAgentTokenEnv;
|
|
@@ -713,6 +715,8 @@ function imageToSifName(image) {
|
|
|
713
715
|
return `${readable}-${hash}.sif`;
|
|
714
716
|
}
|
|
715
717
|
const APPTAINER_SIF_INSPECT_TIMEOUT_MS = 15_000;
|
|
718
|
+
const SLOW_APPTAINER_SIF_INSPECT_WARN_MS = 5_000;
|
|
719
|
+
const SLOW_IMAGE_PULL_LOCK_WAIT_WARN_MS = 5_000;
|
|
716
720
|
function isUsableApptainerSif(runtime, sifPath) {
|
|
717
721
|
if (!(0, fs_1.existsSync)(sifPath))
|
|
718
722
|
return false;
|
|
@@ -731,19 +735,36 @@ function isUsableApptainerSif(runtime, sifPath) {
|
|
|
731
735
|
* Ensure a SIF image exists in the cache directory.
|
|
732
736
|
* Pulls from docker:// URI if not already cached.
|
|
733
737
|
*/
|
|
734
|
-
async function ensureSifImage(runtime, image) {
|
|
738
|
+
async function ensureSifImage(runtime, image, hooks = {}) {
|
|
739
|
+
const recordTiming = (label, startedAt) => {
|
|
740
|
+
hooks.onTiming?.(label, Math.max(0, Date.now() - startedAt));
|
|
741
|
+
};
|
|
735
742
|
const imagesDir = (0, config_js_1.getImagesDir)();
|
|
736
743
|
(0, fs_1.mkdirSync)(imagesDir, { recursive: true });
|
|
737
744
|
const sifPath = (0, path_1.join)(imagesDir, imageToSifName(image));
|
|
738
745
|
const pullLockPath = `${sifPath}.pull.lock`;
|
|
739
|
-
|
|
746
|
+
const initialInspectStartedAt = Date.now();
|
|
747
|
+
const initialCacheHit = isUsableApptainerSif(runtime, sifPath);
|
|
748
|
+
recordTiming('image_prepare_sif_cache_inspect', initialInspectStartedAt);
|
|
749
|
+
if (Math.max(0, Date.now() - initialInspectStartedAt) >= SLOW_APPTAINER_SIF_INSPECT_WARN_MS) {
|
|
750
|
+
hooks.onWarning?.('slow_sif_cache_inspect');
|
|
751
|
+
}
|
|
752
|
+
if (initialCacheHit && !(0, fs_1.existsSync)(pullLockPath)) {
|
|
740
753
|
return sifPath;
|
|
741
754
|
}
|
|
755
|
+
let pullLockTimingRecorded = false;
|
|
742
756
|
try {
|
|
743
757
|
await (0, image_pull_lock_js_1.withImagePullFileLock)(pullLockPath, image, async () => {
|
|
744
|
-
|
|
758
|
+
const reInspectStartedAt = Date.now();
|
|
759
|
+
const cacheHitAfterLock = isUsableApptainerSif(runtime, sifPath);
|
|
760
|
+
recordTiming('image_prepare_sif_cache_reinspect', reInspectStartedAt);
|
|
761
|
+
if (Math.max(0, Date.now() - reInspectStartedAt) >= SLOW_APPTAINER_SIF_INSPECT_WARN_MS) {
|
|
762
|
+
hooks.onWarning?.('slow_sif_cache_reinspect');
|
|
763
|
+
}
|
|
764
|
+
if (cacheHitAfterLock)
|
|
745
765
|
return;
|
|
746
766
|
if ((0, fs_1.existsSync)(sifPath)) {
|
|
767
|
+
hooks.onWarning?.('cached_sif_failed_validation');
|
|
747
768
|
log.warn(`Cached SIF for ${log.dim(image)} failed validation. Re-pulling image.`);
|
|
748
769
|
try {
|
|
749
770
|
(0, fs_1.rmSync)(sifPath, { force: true });
|
|
@@ -755,10 +776,19 @@ async function ensureSifImage(runtime, image) {
|
|
|
755
776
|
const tempSifPath = `${sifPath}.tmp-${process.pid}-${(0, crypto_1.randomBytes)(6).toString('hex')}`;
|
|
756
777
|
log.info(`Pulling image ${log.dim(image)}`);
|
|
757
778
|
try {
|
|
779
|
+
hooks.onHint?.('sif_pull_required');
|
|
780
|
+
const pullStartedAt = Date.now();
|
|
758
781
|
(0, child_process_1.execFileSync)(runtime, ['pull', tempSifPath, `docker://${image}`], {
|
|
759
782
|
stdio: 'inherit',
|
|
760
783
|
});
|
|
761
|
-
|
|
784
|
+
recordTiming('image_prepare_sif_pull', pullStartedAt);
|
|
785
|
+
const validatePulledStartedAt = Date.now();
|
|
786
|
+
const pulledSifUsable = isUsableApptainerSif(runtime, tempSifPath);
|
|
787
|
+
recordTiming('image_prepare_sif_pull_validate', validatePulledStartedAt);
|
|
788
|
+
if (Math.max(0, Date.now() - validatePulledStartedAt) >= SLOW_APPTAINER_SIF_INSPECT_WARN_MS) {
|
|
789
|
+
hooks.onWarning?.('slow_sif_pull_validate');
|
|
790
|
+
}
|
|
791
|
+
if (!pulledSifUsable) {
|
|
762
792
|
throw new Error(`Pulled SIF failed validation: ${tempSifPath}`);
|
|
763
793
|
}
|
|
764
794
|
(0, fs_1.renameSync)(tempSifPath, sifPath);
|
|
@@ -772,6 +802,13 @@ async function ensureSifImage(runtime, image) {
|
|
|
772
802
|
}
|
|
773
803
|
}
|
|
774
804
|
}, {
|
|
805
|
+
onAcquired: ({ waitedMs }) => {
|
|
806
|
+
pullLockTimingRecorded = true;
|
|
807
|
+
hooks.onTiming?.('image_prepare_pull_lock_wait', waitedMs);
|
|
808
|
+
if (waitedMs >= SLOW_IMAGE_PULL_LOCK_WAIT_WARN_MS) {
|
|
809
|
+
hooks.onWarning?.('slow_image_pull_lock_wait');
|
|
810
|
+
}
|
|
811
|
+
},
|
|
775
812
|
onWait: ({ owner }) => {
|
|
776
813
|
const ownerLabel = owner && owner !== 'unknown' ? ` (owner: ${owner})` : '';
|
|
777
814
|
log.step(`Waiting for shared image pull lock for ${log.dim(image)}${ownerLabel}...`);
|
|
@@ -789,6 +826,9 @@ async function ensureSifImage(runtime, image) {
|
|
|
789
826
|
console.error(String(msg).trim().slice(0, 500));
|
|
790
827
|
process.exit(1);
|
|
791
828
|
}
|
|
829
|
+
if (!pullLockTimingRecorded) {
|
|
830
|
+
hooks.onTiming?.('image_prepare_pull_lock_wait', 0);
|
|
831
|
+
}
|
|
792
832
|
return sifPath;
|
|
793
833
|
}
|
|
794
834
|
function ensurePodmanImage(runtime, image) {
|
|
@@ -1018,7 +1058,32 @@ function mapSlurmProxyArgsToHost(session, args, containerCwd) {
|
|
|
1018
1058
|
}
|
|
1019
1059
|
return { ok: true, args: mappedArgs };
|
|
1020
1060
|
}
|
|
1021
|
-
function
|
|
1061
|
+
async function listenOnUnixSocket(server, socketHostPath) {
|
|
1062
|
+
await new Promise((resolve, reject) => {
|
|
1063
|
+
const cleanup = () => {
|
|
1064
|
+
server.off('error', onError);
|
|
1065
|
+
server.off('listening', onListening);
|
|
1066
|
+
};
|
|
1067
|
+
const onError = (err) => {
|
|
1068
|
+
cleanup();
|
|
1069
|
+
reject(err);
|
|
1070
|
+
};
|
|
1071
|
+
const onListening = () => {
|
|
1072
|
+
cleanup();
|
|
1073
|
+
resolve();
|
|
1074
|
+
};
|
|
1075
|
+
server.once('error', onError);
|
|
1076
|
+
server.once('listening', onListening);
|
|
1077
|
+
try {
|
|
1078
|
+
server.listen(socketHostPath);
|
|
1079
|
+
}
|
|
1080
|
+
catch (err) {
|
|
1081
|
+
cleanup();
|
|
1082
|
+
reject(err);
|
|
1083
|
+
}
|
|
1084
|
+
});
|
|
1085
|
+
}
|
|
1086
|
+
async function startSlurmHostProxy(session) {
|
|
1022
1087
|
const sandboxHome = (0, config_js_1.getSandboxHome)();
|
|
1023
1088
|
const socketHostDir = (0, path_1.join)(sandboxHome, '.labgate', 'slurm', 'host-proxy');
|
|
1024
1089
|
const socketHostPath = (0, path_1.join)(socketHostDir, 'slurm.sock');
|
|
@@ -1141,10 +1206,14 @@ function startSlurmHostProxy(session) {
|
|
|
1141
1206
|
log.warn(`SLURM host proxy server error: ${err?.message ?? String(err)}`);
|
|
1142
1207
|
});
|
|
1143
1208
|
try {
|
|
1144
|
-
server
|
|
1209
|
+
await listenOnUnixSocket(server, socketHostPath);
|
|
1145
1210
|
}
|
|
1146
1211
|
catch (err) {
|
|
1147
1212
|
log.warn(`Could not start SLURM host proxy: ${err?.message ?? String(err)}`);
|
|
1213
|
+
try {
|
|
1214
|
+
server.close();
|
|
1215
|
+
}
|
|
1216
|
+
catch { /* ignore */ }
|
|
1148
1217
|
try {
|
|
1149
1218
|
(0, fs_1.unlinkSync)(socketHostPath);
|
|
1150
1219
|
}
|
|
@@ -1276,7 +1345,9 @@ const DEFAULT_CODEX_OAUTH_CALLBACK_PORT = 1455;
|
|
|
1276
1345
|
const CODEX_OAUTH_STARTUP_HEARTBEAT_MS = 10_000;
|
|
1277
1346
|
const CODEX_OAUTH_STARTUP_HEARTBEAT_MAX_MS = 50_000;
|
|
1278
1347
|
const CODEX_OAUTH_STARTUP_SPINNER_MS = 125;
|
|
1279
|
-
const
|
|
1348
|
+
const CLI_STARTUP_SPINNER_FRAMES = ['|', '/', '-', '\\'];
|
|
1349
|
+
const CLI_STARTUP_SPINNER_MS = 120;
|
|
1350
|
+
const CLI_STARTUP_HEARTBEAT_MS = 5_000;
|
|
1280
1351
|
const DEFERRED_SLURM_PASSTHROUGH_DELAY_MS = 1_500;
|
|
1281
1352
|
function getCodexOauthCallbackPort() {
|
|
1282
1353
|
const raw = (process.env.LABGATE_CODEX_OAUTH_CALLBACK_PORT || '').trim();
|
|
@@ -1305,6 +1376,64 @@ function buildCodexOauthPublishSpec(port) {
|
|
|
1305
1376
|
// Publishing without an explicit host IP gives dual-stack localhost reachability.
|
|
1306
1377
|
return `${port}:${port}`;
|
|
1307
1378
|
}
|
|
1379
|
+
function describeCliStartupPhase(agent, phase, runtime) {
|
|
1380
|
+
const agentLabel = String(agent || '').trim().toLowerCase() === 'codex' ? 'Codex' : 'Claude';
|
|
1381
|
+
if (phase === 'image') {
|
|
1382
|
+
if (runtime === 'apptainer')
|
|
1383
|
+
return `Preparing ${agentLabel} Apptainer sandbox`;
|
|
1384
|
+
if (runtime === 'podman')
|
|
1385
|
+
return `Preparing ${agentLabel} container sandbox`;
|
|
1386
|
+
return `Preparing ${agentLabel} sandbox`;
|
|
1387
|
+
}
|
|
1388
|
+
return `Launching ${agentLabel} inside sandbox`;
|
|
1389
|
+
}
|
|
1390
|
+
function startCliStartupHeartbeat(agent, phase, runtime) {
|
|
1391
|
+
const startedAt = Date.now();
|
|
1392
|
+
let stopped = false;
|
|
1393
|
+
let sawOutput = false;
|
|
1394
|
+
let interval = null;
|
|
1395
|
+
let spinnerFrame = 0;
|
|
1396
|
+
let currentPhase = phase;
|
|
1397
|
+
const useSpinner = !!(process.stderr.isTTY && process.stdout.isTTY);
|
|
1398
|
+
const render = () => {
|
|
1399
|
+
const elapsedSeconds = Math.max(1, Math.floor((Date.now() - startedAt) / 1000));
|
|
1400
|
+
const message = describeCliStartupPhase(agent, currentPhase, runtime);
|
|
1401
|
+
if (useSpinner) {
|
|
1402
|
+
const frame = CLI_STARTUP_SPINNER_FRAMES[spinnerFrame];
|
|
1403
|
+
spinnerFrame = (spinnerFrame + 1) % CLI_STARTUP_SPINNER_FRAMES.length;
|
|
1404
|
+
process.stderr.write(`\r\x1b[2K${log.dim('›')} ${message}... ${frame} ${elapsedSeconds}s`);
|
|
1405
|
+
return;
|
|
1406
|
+
}
|
|
1407
|
+
log.step(`${message}... (${elapsedSeconds}s elapsed)`);
|
|
1408
|
+
};
|
|
1409
|
+
const stop = () => {
|
|
1410
|
+
if (stopped)
|
|
1411
|
+
return;
|
|
1412
|
+
stopped = true;
|
|
1413
|
+
if (interval) {
|
|
1414
|
+
clearInterval(interval);
|
|
1415
|
+
interval = null;
|
|
1416
|
+
}
|
|
1417
|
+
if (useSpinner)
|
|
1418
|
+
process.stderr.write('\r\x1b[2K');
|
|
1419
|
+
};
|
|
1420
|
+
const noteOutput = (data) => {
|
|
1421
|
+
if (stopped || sawOutput)
|
|
1422
|
+
return;
|
|
1423
|
+
if (String(data || '').length === 0)
|
|
1424
|
+
return;
|
|
1425
|
+
sawOutput = true;
|
|
1426
|
+
stop();
|
|
1427
|
+
};
|
|
1428
|
+
render();
|
|
1429
|
+
interval = setInterval(() => {
|
|
1430
|
+
if (stopped || sawOutput)
|
|
1431
|
+
return;
|
|
1432
|
+
render();
|
|
1433
|
+
}, useSpinner ? CLI_STARTUP_SPINNER_MS : CLI_STARTUP_HEARTBEAT_MS);
|
|
1434
|
+
interval.unref();
|
|
1435
|
+
return { noteOutput, stop };
|
|
1436
|
+
}
|
|
1308
1437
|
function startCodexOauthStartupHeartbeat() {
|
|
1309
1438
|
const startedAt = Date.now();
|
|
1310
1439
|
let stopped = false;
|
|
@@ -1316,8 +1445,8 @@ function startCodexOauthStartupHeartbeat() {
|
|
|
1316
1445
|
const useSpinner = !!(process.stderr.isTTY && process.stdout.isTTY);
|
|
1317
1446
|
const renderSpinner = () => {
|
|
1318
1447
|
const elapsedSeconds = Math.max(1, Math.floor((Date.now() - startedAt) / 1000));
|
|
1319
|
-
const frame =
|
|
1320
|
-
spinnerFrame = (spinnerFrame + 1) %
|
|
1448
|
+
const frame = CLI_STARTUP_SPINNER_FRAMES[spinnerFrame];
|
|
1449
|
+
spinnerFrame = (spinnerFrame + 1) % CLI_STARTUP_SPINNER_FRAMES.length;
|
|
1321
1450
|
process.stderr.write(`\r\x1b[2K${log.dim('›')} Waiting for Codex login output... ${frame} ${elapsedSeconds}s`);
|
|
1322
1451
|
spinnerActive = true;
|
|
1323
1452
|
};
|
|
@@ -2040,6 +2169,48 @@ function logStartupTimings(entries, totalMs) {
|
|
|
2040
2169
|
const prefix = summary ? `${summary}, ` : '';
|
|
2041
2170
|
log.step(`[labgate] startup timings: ${prefix}total=${formatStartupDuration(totalMs)}`);
|
|
2042
2171
|
}
|
|
2172
|
+
function createStartupReportData(session, runtime, image) {
|
|
2173
|
+
const now = new Date().toISOString();
|
|
2174
|
+
const warnings = [];
|
|
2175
|
+
if (session.uiDetected === false)
|
|
2176
|
+
warnings.push('ui_not_detected');
|
|
2177
|
+
return {
|
|
2178
|
+
schema_version: 2,
|
|
2179
|
+
kind: 'labgate-startup-report',
|
|
2180
|
+
generated_at: now,
|
|
2181
|
+
updated_at: now,
|
|
2182
|
+
startup_completed_at: null,
|
|
2183
|
+
startup_completion: null,
|
|
2184
|
+
pid: process.pid,
|
|
2185
|
+
node: (0, os_1.hostname)(),
|
|
2186
|
+
agent: session.agent,
|
|
2187
|
+
runtime,
|
|
2188
|
+
workdir: session.workdir,
|
|
2189
|
+
image,
|
|
2190
|
+
dry_run: session.dryRun,
|
|
2191
|
+
status: session.dryRun ? 'dry-run' : 'starting',
|
|
2192
|
+
launch_mode: session.dryRun ? 'dry-run' : null,
|
|
2193
|
+
ui_detected: session.uiDetected ?? null,
|
|
2194
|
+
total_ms: 0,
|
|
2195
|
+
session_total_ms: 0,
|
|
2196
|
+
first_output_ms: null,
|
|
2197
|
+
timings_ms: {},
|
|
2198
|
+
cold_start_hints: [],
|
|
2199
|
+
warnings,
|
|
2200
|
+
slurm: {
|
|
2201
|
+
enabled: session.config.slurm.enabled,
|
|
2202
|
+
host_proxy_enabled: null,
|
|
2203
|
+
passthrough_mode: null,
|
|
2204
|
+
host_commands_found: null,
|
|
2205
|
+
staged_commands: null,
|
|
2206
|
+
reused_stage: null,
|
|
2207
|
+
},
|
|
2208
|
+
exit_code: null,
|
|
2209
|
+
};
|
|
2210
|
+
}
|
|
2211
|
+
function writeStartupReportFile(path, report) {
|
|
2212
|
+
(0, startup_stage_lock_js_1.writeTextFileAtomic)(path, JSON.stringify(report, null, 2) + '\n', { mode: 0o600 });
|
|
2213
|
+
}
|
|
2043
2214
|
// ── Shared session helpers ─────────────────────────────────
|
|
2044
2215
|
function logSessionStart(session, sessionId) {
|
|
2045
2216
|
if (!session.config.audit.enabled)
|
|
@@ -2137,15 +2308,107 @@ function printSessionInfo(session, sessionId, runtime) {
|
|
|
2137
2308
|
async function startSession(session) {
|
|
2138
2309
|
const startupStartedAt = Date.now();
|
|
2139
2310
|
const startupTimings = [];
|
|
2140
|
-
const recordStartupTiming = (label, startedAt) => {
|
|
2141
|
-
startupTimings.push([label, Math.max(0, Date.now() - startedAt)]);
|
|
2142
|
-
};
|
|
2143
2311
|
const preferred = session.config.runtime;
|
|
2144
2312
|
const runtime = session.dryRun ? getDryRunRuntime(preferred) : (0, runtime_js_1.getRuntime)(preferred);
|
|
2145
2313
|
const image = session.imageOverride ?? session.config.image;
|
|
2146
2314
|
const sessionId = (0, crypto_1.randomBytes)(4).toString('hex');
|
|
2147
2315
|
const footerMode = session.footerMode ?? 'sticky';
|
|
2148
2316
|
const footerLine = formatStatusFooter(session, runtime, sessionId, image);
|
|
2317
|
+
const startupReport = session.startupReportPath
|
|
2318
|
+
? createStartupReportData(session, runtime, image)
|
|
2319
|
+
: null;
|
|
2320
|
+
let startupReportWriteFailed = false;
|
|
2321
|
+
const flushStartupReport = () => {
|
|
2322
|
+
if (!startupReport || !session.startupReportPath || startupReportWriteFailed)
|
|
2323
|
+
return;
|
|
2324
|
+
const elapsedMs = Math.max(0, Date.now() - startupStartedAt);
|
|
2325
|
+
startupReport.updated_at = new Date().toISOString();
|
|
2326
|
+
startupReport.session_total_ms = elapsedMs;
|
|
2327
|
+
if (startupReport.startup_completed_at === null) {
|
|
2328
|
+
startupReport.total_ms = elapsedMs;
|
|
2329
|
+
}
|
|
2330
|
+
try {
|
|
2331
|
+
writeStartupReportFile(session.startupReportPath, startupReport);
|
|
2332
|
+
}
|
|
2333
|
+
catch (err) {
|
|
2334
|
+
startupReportWriteFailed = true;
|
|
2335
|
+
log.warn(`Could not write startup report to ${session.startupReportPath}: ${err?.message ?? String(err)}`);
|
|
2336
|
+
}
|
|
2337
|
+
};
|
|
2338
|
+
const noteStartupWarning = (warning) => {
|
|
2339
|
+
if (!startupReport)
|
|
2340
|
+
return;
|
|
2341
|
+
if (!startupReport.warnings.includes(warning)) {
|
|
2342
|
+
startupReport.warnings.push(warning);
|
|
2343
|
+
flushStartupReport();
|
|
2344
|
+
}
|
|
2345
|
+
};
|
|
2346
|
+
const noteColdStartHint = (hint) => {
|
|
2347
|
+
if (!startupReport)
|
|
2348
|
+
return;
|
|
2349
|
+
if (!startupReport.cold_start_hints.includes(hint)) {
|
|
2350
|
+
startupReport.cold_start_hints.push(hint);
|
|
2351
|
+
flushStartupReport();
|
|
2352
|
+
}
|
|
2353
|
+
};
|
|
2354
|
+
const setStartupStatus = (status) => {
|
|
2355
|
+
if (!startupReport)
|
|
2356
|
+
return;
|
|
2357
|
+
startupReport.status = status;
|
|
2358
|
+
flushStartupReport();
|
|
2359
|
+
};
|
|
2360
|
+
const setStartupLaunchMode = (mode) => {
|
|
2361
|
+
if (!startupReport)
|
|
2362
|
+
return;
|
|
2363
|
+
startupReport.launch_mode = mode;
|
|
2364
|
+
flushStartupReport();
|
|
2365
|
+
};
|
|
2366
|
+
const completeStartup = (completion) => {
|
|
2367
|
+
if (!startupReport || startupReport.startup_completed_at !== null)
|
|
2368
|
+
return;
|
|
2369
|
+
startupReport.startup_completed_at = new Date().toISOString();
|
|
2370
|
+
startupReport.startup_completion = completion;
|
|
2371
|
+
startupReport.total_ms = Math.max(0, Date.now() - startupStartedAt);
|
|
2372
|
+
flushStartupReport();
|
|
2373
|
+
};
|
|
2374
|
+
const noteStartupFirstOutput = (data) => {
|
|
2375
|
+
if (!startupReport)
|
|
2376
|
+
return;
|
|
2377
|
+
if (startupReport.first_output_ms !== null)
|
|
2378
|
+
return;
|
|
2379
|
+
if (String(data || '').length === 0)
|
|
2380
|
+
return;
|
|
2381
|
+
const firstOutputMs = Math.max(0, Date.now() - startupStartedAt);
|
|
2382
|
+
startupReport.first_output_ms = firstOutputMs;
|
|
2383
|
+
startupReport.timings_ms.launch_first_output = firstOutputMs;
|
|
2384
|
+
startupReport.status = 'running';
|
|
2385
|
+
completeStartup('first-output');
|
|
2386
|
+
flushStartupReport();
|
|
2387
|
+
};
|
|
2388
|
+
const updateSlurmStartupReport = (payload) => {
|
|
2389
|
+
if (!startupReport)
|
|
2390
|
+
return;
|
|
2391
|
+
if (payload.hostProxyEnabled !== undefined)
|
|
2392
|
+
startupReport.slurm.host_proxy_enabled = payload.hostProxyEnabled;
|
|
2393
|
+
if (payload.passthroughMode !== undefined)
|
|
2394
|
+
startupReport.slurm.passthrough_mode = payload.passthroughMode;
|
|
2395
|
+
if (payload.hostCommandsFound !== undefined)
|
|
2396
|
+
startupReport.slurm.host_commands_found = payload.hostCommandsFound;
|
|
2397
|
+
if (payload.stagedCommands !== undefined)
|
|
2398
|
+
startupReport.slurm.staged_commands = payload.stagedCommands;
|
|
2399
|
+
if (payload.reusedStage !== undefined)
|
|
2400
|
+
startupReport.slurm.reused_stage = payload.reusedStage;
|
|
2401
|
+
flushStartupReport();
|
|
2402
|
+
};
|
|
2403
|
+
const recordStartupTiming = (label, startedAt) => {
|
|
2404
|
+
const elapsedMs = Math.max(0, Date.now() - startedAt);
|
|
2405
|
+
startupTimings.push([label, elapsedMs]);
|
|
2406
|
+
if (startupReport) {
|
|
2407
|
+
startupReport.timings_ms[label] = elapsedMs;
|
|
2408
|
+
flushStartupReport();
|
|
2409
|
+
}
|
|
2410
|
+
};
|
|
2411
|
+
flushStartupReport();
|
|
2149
2412
|
// Extract agent auth token (CLI flag → env var)
|
|
2150
2413
|
const tokenEnv = session.dryRun ? [] : getAgentTokenEnv(session.agent, session.apiKey);
|
|
2151
2414
|
const bridgeCodexOauthForPodman = runtime === 'podman' && shouldBridgeCodexOauthForPodman(session.agent, session.config.network.mode);
|
|
@@ -2181,6 +2444,7 @@ async function startSession(session) {
|
|
|
2181
2444
|
let cleanupSlurmHostProxy = () => { };
|
|
2182
2445
|
let cleanupDeferredSlurmPassthrough = () => { };
|
|
2183
2446
|
let startDeferredSlurmPassthrough = () => { };
|
|
2447
|
+
let startupHeartbeat = null;
|
|
2184
2448
|
// If the agent isn't installed in the persistent sandbox home yet, warn that first run can be slow.
|
|
2185
2449
|
if (!session.dryRun) {
|
|
2186
2450
|
const sandboxHome = (0, config_js_1.getSandboxHome)();
|
|
@@ -2188,18 +2452,28 @@ async function startSession(session) {
|
|
|
2188
2452
|
const installedBin = (0, path_1.join)(sandboxHome, '.npm-global', 'bin', agentBin);
|
|
2189
2453
|
try {
|
|
2190
2454
|
if (!(0, fs_1.existsSync)(installedBin)) {
|
|
2455
|
+
noteColdStartHint('agent_missing_in_sandbox');
|
|
2191
2456
|
log.step('First run: preparing sandbox (pulling image + installing agent). This can take a minute...');
|
|
2192
2457
|
}
|
|
2193
2458
|
}
|
|
2194
2459
|
catch { /* ignore */ }
|
|
2460
|
+
if (runtime === 'apptainer') {
|
|
2461
|
+
try {
|
|
2462
|
+
const cachedSifPath = (0, path_1.join)((0, config_js_1.getImagesDir)(), imageToSifName(image));
|
|
2463
|
+
if (!(0, fs_1.existsSync)(cachedSifPath))
|
|
2464
|
+
noteColdStartHint('sif_cache_missing');
|
|
2465
|
+
}
|
|
2466
|
+
catch { /* ignore */ }
|
|
2467
|
+
}
|
|
2195
2468
|
}
|
|
2196
2469
|
// For Apptainer sessions, always try SLURM CLI passthrough so sbatch/squeue
|
|
2197
2470
|
// can work out of the box when present on the host. Full SLURM tracking/MCP
|
|
2198
2471
|
// remains controlled by slurm.enabled.
|
|
2199
2472
|
if (!session.dryRun && runtime === 'apptainer') {
|
|
2200
2473
|
const hostProxyStartedAt = Date.now();
|
|
2201
|
-
const hostProxy = startSlurmHostProxy(session);
|
|
2474
|
+
const hostProxy = await startSlurmHostProxy(session);
|
|
2202
2475
|
recordStartupTiming('slurm_host_proxy', hostProxyStartedAt);
|
|
2476
|
+
updateSlurmStartupReport({ hostProxyEnabled: !!hostProxy });
|
|
2203
2477
|
const trackingEnabled = session.config.slurm.enabled;
|
|
2204
2478
|
const runSlurmPassthroughStage = (mode) => {
|
|
2205
2479
|
const stageStartedAt = Date.now();
|
|
@@ -2212,8 +2486,15 @@ async function startSession(session) {
|
|
|
2212
2486
|
preferHostProxy: false,
|
|
2213
2487
|
});
|
|
2214
2488
|
recordStartupTiming(mode === 'startup' ? 'slurm_passthrough_stage' : 'slurm_passthrough_stage_background', stageStartedAt);
|
|
2489
|
+
updateSlurmStartupReport({
|
|
2490
|
+
passthroughMode: staged.ok ? 'staged' : 'unavailable',
|
|
2491
|
+
hostCommandsFound: staged.hostCommands.length,
|
|
2492
|
+
stagedCommands: staged.staged.length,
|
|
2493
|
+
reusedStage: staged.reused,
|
|
2494
|
+
});
|
|
2215
2495
|
if (trackingEnabled) {
|
|
2216
2496
|
if (!staged.ok) {
|
|
2497
|
+
noteStartupWarning('slurm_commands_unavailable');
|
|
2217
2498
|
log.warn('SLURM is enabled but no SLURM commands were found on the host PATH. ' +
|
|
2218
2499
|
'Inside-sandbox SLURM commands (squeue/sbatch/...) will be unavailable.');
|
|
2219
2500
|
}
|
|
@@ -2236,6 +2517,13 @@ async function startSession(session) {
|
|
|
2236
2517
|
}
|
|
2237
2518
|
catch (err) {
|
|
2238
2519
|
recordStartupTiming(mode === 'startup' ? 'slurm_passthrough_stage' : 'slurm_passthrough_stage_background', stageStartedAt);
|
|
2520
|
+
updateSlurmStartupReport({
|
|
2521
|
+
passthroughMode: 'unavailable',
|
|
2522
|
+
reusedStage: false,
|
|
2523
|
+
});
|
|
2524
|
+
noteStartupWarning(mode === 'startup'
|
|
2525
|
+
? 'slurm_passthrough_stage_failed'
|
|
2526
|
+
: 'slurm_passthrough_stage_background_failed');
|
|
2239
2527
|
log.warn(mode === 'startup'
|
|
2240
2528
|
? `SLURM passthrough staging failed: ${err?.message ?? String(err)}`
|
|
2241
2529
|
: `Deferred SLURM passthrough staging failed: ${err?.message ?? String(err)}`);
|
|
@@ -2253,8 +2541,17 @@ async function startSession(session) {
|
|
|
2253
2541
|
preferHostProxy: true,
|
|
2254
2542
|
});
|
|
2255
2543
|
recordStartupTiming('slurm_passthrough_stage', stageStartedAt);
|
|
2544
|
+
updateSlurmStartupReport({
|
|
2545
|
+
passthroughMode: staged.ok
|
|
2546
|
+
? (staged.mode === 'proxy-only' ? 'proxy-only' : 'staged')
|
|
2547
|
+
: 'unavailable',
|
|
2548
|
+
hostCommandsFound: staged.hostCommands.length,
|
|
2549
|
+
stagedCommands: staged.staged.length,
|
|
2550
|
+
reusedStage: staged.reused,
|
|
2551
|
+
});
|
|
2256
2552
|
if (trackingEnabled) {
|
|
2257
2553
|
if (!staged.ok) {
|
|
2554
|
+
noteStartupWarning('slurm_commands_unavailable');
|
|
2258
2555
|
log.warn('SLURM is enabled but no SLURM commands were found on the host PATH. ' +
|
|
2259
2556
|
'Inside-sandbox SLURM commands (squeue/sbatch/...) will be unavailable.');
|
|
2260
2557
|
}
|
|
@@ -2275,12 +2572,19 @@ async function startSession(session) {
|
|
|
2275
2572
|
}
|
|
2276
2573
|
}
|
|
2277
2574
|
else {
|
|
2575
|
+
noteStartupWarning('slurm_host_proxy_unavailable');
|
|
2278
2576
|
if (trackingEnabled) {
|
|
2279
2577
|
log.step('SLURM host proxy unavailable. Staging SLURM passthrough before startup.');
|
|
2280
2578
|
runSlurmPassthroughStage('startup');
|
|
2281
2579
|
}
|
|
2282
2580
|
else {
|
|
2283
2581
|
log.step('SLURM host proxy unavailable. Deferring SLURM passthrough staging until after startup.');
|
|
2582
|
+
updateSlurmStartupReport({
|
|
2583
|
+
passthroughMode: 'deferred-stage',
|
|
2584
|
+
hostCommandsFound: null,
|
|
2585
|
+
stagedCommands: null,
|
|
2586
|
+
reusedStage: null,
|
|
2587
|
+
});
|
|
2284
2588
|
let cancelled = false;
|
|
2285
2589
|
let timer = null;
|
|
2286
2590
|
let started = false;
|
|
@@ -2308,32 +2612,58 @@ async function startSession(session) {
|
|
|
2308
2612
|
}
|
|
2309
2613
|
}
|
|
2310
2614
|
let args;
|
|
2311
|
-
|
|
2312
|
-
|
|
2313
|
-
if (
|
|
2314
|
-
|
|
2615
|
+
const imagePrepareStartedAt = Date.now();
|
|
2616
|
+
try {
|
|
2617
|
+
if (runtime === 'apptainer') {
|
|
2618
|
+
startupHeartbeat = startCliStartupHeartbeat(session.agent, 'image', runtime);
|
|
2619
|
+
let sifPath;
|
|
2620
|
+
if (session.dryRun) {
|
|
2621
|
+
sifPath = (0, path_1.join)((0, config_js_1.getImagesDir)(), imageToSifName(image));
|
|
2622
|
+
}
|
|
2623
|
+
else {
|
|
2624
|
+
sifPath = await ensureSifImage(runtime, image, {
|
|
2625
|
+
onTiming: (label, elapsedMs) => {
|
|
2626
|
+
if (startupReport) {
|
|
2627
|
+
startupReport.timings_ms[label] = elapsedMs;
|
|
2628
|
+
flushStartupReport();
|
|
2629
|
+
}
|
|
2630
|
+
startupTimings.push([label, elapsedMs]);
|
|
2631
|
+
},
|
|
2632
|
+
onWarning: noteStartupWarning,
|
|
2633
|
+
onHint: noteColdStartHint,
|
|
2634
|
+
});
|
|
2635
|
+
}
|
|
2636
|
+
const buildArgsStartedAt = Date.now();
|
|
2637
|
+
args = buildApptainerArgs(session, sifPath, sessionId, runtimeEnvArgs);
|
|
2638
|
+
recordStartupTiming('image_prepare_build_args', buildArgsStartedAt);
|
|
2315
2639
|
}
|
|
2316
2640
|
else {
|
|
2317
|
-
|
|
2318
|
-
|
|
2319
|
-
|
|
2320
|
-
|
|
2321
|
-
|
|
2322
|
-
|
|
2323
|
-
|
|
2324
|
-
log.step(`Codex OAuth callback bridge enabled on localhost:${port} ` +
|
|
2325
|
-
'(Podman macOS uses bridge networking for callback compatibility).');
|
|
2326
|
-
if (needsCodexOauthStartupHeartbeat) {
|
|
2327
|
-
log.step('Launching Codex OAuth flow. Login output may take ~30s to appear.');
|
|
2641
|
+
if (bridgeCodexOauthForPodman) {
|
|
2642
|
+
const port = getCodexOauthCallbackPort();
|
|
2643
|
+
log.step(`Codex OAuth callback bridge enabled on localhost:${port} ` +
|
|
2644
|
+
'(Podman macOS uses bridge networking for callback compatibility).');
|
|
2645
|
+
if (needsCodexOauthStartupHeartbeat) {
|
|
2646
|
+
log.step('Launching Codex OAuth flow. Login output may take ~30s to appear.');
|
|
2647
|
+
}
|
|
2328
2648
|
}
|
|
2649
|
+
startupHeartbeat = startCliStartupHeartbeat(session.agent, 'image', runtime);
|
|
2650
|
+
if (!session.dryRun) {
|
|
2651
|
+
ensurePodmanImage(runtime, image);
|
|
2652
|
+
}
|
|
2653
|
+
args = buildPodmanArgs(session, image, sessionId, runtimeEnvArgs, { tty: !!(process.stdout.isTTY && process.stdin.isTTY) });
|
|
2329
2654
|
}
|
|
2330
|
-
|
|
2331
|
-
|
|
2332
|
-
|
|
2333
|
-
|
|
2655
|
+
}
|
|
2656
|
+
finally {
|
|
2657
|
+
recordStartupTiming('image_prepare', imagePrepareStartedAt);
|
|
2658
|
+
startupHeartbeat?.stop();
|
|
2659
|
+
startupHeartbeat = null;
|
|
2334
2660
|
}
|
|
2335
2661
|
if (session.dryRun) {
|
|
2662
|
+
setStartupLaunchMode('dry-run');
|
|
2663
|
+
setStartupStatus('dry-run');
|
|
2664
|
+
completeStartup('dry-run');
|
|
2336
2665
|
prettyPrintCommand(runtime, args);
|
|
2666
|
+
flushStartupReport();
|
|
2337
2667
|
return;
|
|
2338
2668
|
}
|
|
2339
2669
|
// Create OAuth URL interceptor as a fallback when BROWSER hook does not fire.
|
|
@@ -2372,6 +2702,7 @@ async function startSession(session) {
|
|
|
2372
2702
|
sessionSlurmPoller.start();
|
|
2373
2703
|
}
|
|
2374
2704
|
catch (err) {
|
|
2705
|
+
noteStartupWarning('slurm_tracking_unavailable');
|
|
2375
2706
|
log.warn(`SLURM tracking unavailable: ${err?.message ?? String(err)}`);
|
|
2376
2707
|
cleanupSlurm();
|
|
2377
2708
|
}
|
|
@@ -2389,7 +2720,11 @@ async function startSession(session) {
|
|
|
2389
2720
|
const wantsSticky = footerMode === 'sticky';
|
|
2390
2721
|
const needsOAuthPtyFallback = !!oauthInterceptor;
|
|
2391
2722
|
const hasTty = !!(process.stdout.isTTY && process.stdin.isTTY);
|
|
2392
|
-
const
|
|
2723
|
+
const wantsStartupOutputCapture = !!startupReport;
|
|
2724
|
+
const shouldUsePty = hasTty && (wantsSticky
|
|
2725
|
+
|| needsOAuthPtyFallback
|
|
2726
|
+
|| needsCodexOauthStartupHeartbeat
|
|
2727
|
+
|| wantsStartupOutputCapture);
|
|
2393
2728
|
if (shouldUsePty) {
|
|
2394
2729
|
const pty = await loadPty();
|
|
2395
2730
|
if (!pty) {
|
|
@@ -2402,6 +2737,10 @@ async function startSession(session) {
|
|
|
2402
2737
|
else if (needsCodexOauthStartupHeartbeat) {
|
|
2403
2738
|
log.step('Codex startup heartbeat unavailable (node-pty missing).');
|
|
2404
2739
|
}
|
|
2740
|
+
else if (wantsStartupOutputCapture) {
|
|
2741
|
+
noteStartupWarning('startup_output_capture_unavailable');
|
|
2742
|
+
log.step('Startup output capture unavailable (node-pty missing).');
|
|
2743
|
+
}
|
|
2405
2744
|
}
|
|
2406
2745
|
else {
|
|
2407
2746
|
let runtimePath;
|
|
@@ -2419,6 +2758,9 @@ async function startSession(session) {
|
|
|
2419
2758
|
const cols = process.stdout.columns || 80;
|
|
2420
2759
|
const rows = process.stdout.rows || 24;
|
|
2421
2760
|
let child;
|
|
2761
|
+
const launchHeartbeat = startCliStartupHeartbeat(session.agent, 'launch', runtime);
|
|
2762
|
+
setStartupStatus('launching');
|
|
2763
|
+
setStartupLaunchMode('pty');
|
|
2422
2764
|
try {
|
|
2423
2765
|
child = pty.spawn(runtimePath, args, {
|
|
2424
2766
|
name: 'xterm-256color',
|
|
@@ -2429,6 +2771,8 @@ async function startSession(session) {
|
|
|
2429
2771
|
});
|
|
2430
2772
|
}
|
|
2431
2773
|
catch (err) {
|
|
2774
|
+
launchHeartbeat.stop();
|
|
2775
|
+
noteStartupWarning('pty_spawn_failed');
|
|
2432
2776
|
log.step(`PTY spawn failed (${err?.message ?? String(err)}). Falling back to standard spawn.`);
|
|
2433
2777
|
// Fall through to standard spawn path below.
|
|
2434
2778
|
child = null;
|
|
@@ -2452,7 +2796,9 @@ async function startSession(session) {
|
|
|
2452
2796
|
renderStickyFooter(footerLine);
|
|
2453
2797
|
}
|
|
2454
2798
|
child.onData((data) => {
|
|
2799
|
+
launchHeartbeat.noteOutput(data);
|
|
2455
2800
|
codexOauthHeartbeat?.noteOutput(data);
|
|
2801
|
+
noteStartupFirstOutput(data);
|
|
2456
2802
|
if (oauthInterceptor)
|
|
2457
2803
|
oauthInterceptor.feed(data);
|
|
2458
2804
|
process.stdout.write(data);
|
|
@@ -2475,7 +2821,14 @@ async function startSession(session) {
|
|
|
2475
2821
|
const timeoutHandle = setupSessionTimeout(session, sessionId, runtime, () => exited, () => child.kill('SIGTERM'));
|
|
2476
2822
|
child.onExit((event) => {
|
|
2477
2823
|
exited = true;
|
|
2824
|
+
launchHeartbeat.stop();
|
|
2478
2825
|
codexOauthHeartbeat?.stop();
|
|
2826
|
+
if (startupReport) {
|
|
2827
|
+
completeStartup('process-exit');
|
|
2828
|
+
startupReport.exit_code = event.exitCode ?? 0;
|
|
2829
|
+
startupReport.status = 'exited';
|
|
2830
|
+
flushStartupReport();
|
|
2831
|
+
}
|
|
2479
2832
|
if (timeoutHandle)
|
|
2480
2833
|
clearTimeout(timeoutHandle);
|
|
2481
2834
|
browserHook?.cleanup();
|
|
@@ -2505,17 +2858,35 @@ async function startSession(session) {
|
|
|
2505
2858
|
else if (needsOAuthPtyFallback && !hasTty) {
|
|
2506
2859
|
log.step('OAuth URL fallback interceptor requires a TTY; relying on BROWSER hook only.');
|
|
2507
2860
|
}
|
|
2861
|
+
else if (wantsStartupOutputCapture && !hasTty) {
|
|
2862
|
+
noteStartupWarning('startup_output_capture_unavailable');
|
|
2863
|
+
log.step('Startup output capture requires a TTY; report will omit first-output timing.');
|
|
2864
|
+
}
|
|
2508
2865
|
if (footerMode === 'sticky') {
|
|
2509
2866
|
console.log(footerLine);
|
|
2510
2867
|
}
|
|
2868
|
+
log.step(`${describeCliStartupPhase(session.agent, 'launch', runtime)}...`);
|
|
2511
2869
|
console.log('');
|
|
2512
2870
|
// Spawn path: must use stdio:'inherit' to preserve TTY for Claude Code.
|
|
2513
2871
|
// OAuth interception relies on the BROWSER hook + file watcher here
|
|
2514
2872
|
// (the PTY path above uses onData for direct interception).
|
|
2873
|
+
setStartupStatus('launching');
|
|
2874
|
+
setStartupLaunchMode('inherit');
|
|
2515
2875
|
const child = (0, child_process_1.spawn)(runtime, args, { stdio: 'inherit' });
|
|
2876
|
+
if (startupReport && startupReport.first_output_ms === null) {
|
|
2877
|
+
startupReport.status = 'running';
|
|
2878
|
+
completeStartup('spawn');
|
|
2879
|
+
flushStartupReport();
|
|
2880
|
+
}
|
|
2516
2881
|
startDeferredSlurmPassthrough();
|
|
2517
2882
|
const timeoutHandle = setupSessionTimeout(session, sessionId, runtime, () => child.exitCode !== null, () => child.kill('SIGTERM'));
|
|
2518
2883
|
child.on('close', (code) => {
|
|
2884
|
+
if (startupReport) {
|
|
2885
|
+
completeStartup('process-exit');
|
|
2886
|
+
startupReport.exit_code = code ?? 0;
|
|
2887
|
+
startupReport.status = 'exited';
|
|
2888
|
+
flushStartupReport();
|
|
2889
|
+
}
|
|
2519
2890
|
if (timeoutHandle)
|
|
2520
2891
|
clearTimeout(timeoutHandle);
|
|
2521
2892
|
browserHook?.cleanup();
|