labgate 0.5.43 → 0.5.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,7 +42,9 @@ exports.imageToSifName = imageToSifName;
42
42
  exports.isUsableApptainerSif = isUsableApptainerSif;
43
43
  exports.ensureSifImage = ensureSifImage;
44
44
  exports.resolveSlurmProxyPathToHost = resolveSlurmProxyPathToHost;
45
+ exports.listenOnUnixSocket = listenOnUnixSocket;
45
46
  exports.buildCodexOauthPublishSpec = buildCodexOauthPublishSpec;
47
+ exports.describeCliStartupPhase = describeCliStartupPhase;
46
48
  exports.buildEntrypoint = buildEntrypoint;
47
49
  exports.setupBrowserHook = setupBrowserHook;
48
50
  exports.getAgentTokenEnv = getAgentTokenEnv;
@@ -713,6 +715,8 @@ function imageToSifName(image) {
713
715
  return `${readable}-${hash}.sif`;
714
716
  }
715
717
  const APPTAINER_SIF_INSPECT_TIMEOUT_MS = 15_000;
718
+ const SLOW_APPTAINER_SIF_INSPECT_WARN_MS = 5_000;
719
+ const SLOW_IMAGE_PULL_LOCK_WAIT_WARN_MS = 5_000;
716
720
  function isUsableApptainerSif(runtime, sifPath) {
717
721
  if (!(0, fs_1.existsSync)(sifPath))
718
722
  return false;
@@ -731,19 +735,36 @@ function isUsableApptainerSif(runtime, sifPath) {
731
735
  * Ensure a SIF image exists in the cache directory.
732
736
  * Pulls from docker:// URI if not already cached.
733
737
  */
734
- async function ensureSifImage(runtime, image) {
738
+ async function ensureSifImage(runtime, image, hooks = {}) {
739
+ const recordTiming = (label, startedAt) => {
740
+ hooks.onTiming?.(label, Math.max(0, Date.now() - startedAt));
741
+ };
735
742
  const imagesDir = (0, config_js_1.getImagesDir)();
736
743
  (0, fs_1.mkdirSync)(imagesDir, { recursive: true });
737
744
  const sifPath = (0, path_1.join)(imagesDir, imageToSifName(image));
738
745
  const pullLockPath = `${sifPath}.pull.lock`;
739
- if (isUsableApptainerSif(runtime, sifPath) && !(0, fs_1.existsSync)(pullLockPath)) {
746
+ const initialInspectStartedAt = Date.now();
747
+ const initialCacheHit = isUsableApptainerSif(runtime, sifPath);
748
+ recordTiming('image_prepare_sif_cache_inspect', initialInspectStartedAt);
749
+ if (Math.max(0, Date.now() - initialInspectStartedAt) >= SLOW_APPTAINER_SIF_INSPECT_WARN_MS) {
750
+ hooks.onWarning?.('slow_sif_cache_inspect');
751
+ }
752
+ if (initialCacheHit && !(0, fs_1.existsSync)(pullLockPath)) {
740
753
  return sifPath;
741
754
  }
755
+ let pullLockTimingRecorded = false;
742
756
  try {
743
757
  await (0, image_pull_lock_js_1.withImagePullFileLock)(pullLockPath, image, async () => {
744
- if (isUsableApptainerSif(runtime, sifPath))
758
+ const reInspectStartedAt = Date.now();
759
+ const cacheHitAfterLock = isUsableApptainerSif(runtime, sifPath);
760
+ recordTiming('image_prepare_sif_cache_reinspect', reInspectStartedAt);
761
+ if (Math.max(0, Date.now() - reInspectStartedAt) >= SLOW_APPTAINER_SIF_INSPECT_WARN_MS) {
762
+ hooks.onWarning?.('slow_sif_cache_reinspect');
763
+ }
764
+ if (cacheHitAfterLock)
745
765
  return;
746
766
  if ((0, fs_1.existsSync)(sifPath)) {
767
+ hooks.onWarning?.('cached_sif_failed_validation');
747
768
  log.warn(`Cached SIF for ${log.dim(image)} failed validation. Re-pulling image.`);
748
769
  try {
749
770
  (0, fs_1.rmSync)(sifPath, { force: true });
@@ -755,10 +776,19 @@ async function ensureSifImage(runtime, image) {
755
776
  const tempSifPath = `${sifPath}.tmp-${process.pid}-${(0, crypto_1.randomBytes)(6).toString('hex')}`;
756
777
  log.info(`Pulling image ${log.dim(image)}`);
757
778
  try {
779
+ hooks.onHint?.('sif_pull_required');
780
+ const pullStartedAt = Date.now();
758
781
  (0, child_process_1.execFileSync)(runtime, ['pull', tempSifPath, `docker://${image}`], {
759
782
  stdio: 'inherit',
760
783
  });
761
- if (!isUsableApptainerSif(runtime, tempSifPath)) {
784
+ recordTiming('image_prepare_sif_pull', pullStartedAt);
785
+ const validatePulledStartedAt = Date.now();
786
+ const pulledSifUsable = isUsableApptainerSif(runtime, tempSifPath);
787
+ recordTiming('image_prepare_sif_pull_validate', validatePulledStartedAt);
788
+ if (Math.max(0, Date.now() - validatePulledStartedAt) >= SLOW_APPTAINER_SIF_INSPECT_WARN_MS) {
789
+ hooks.onWarning?.('slow_sif_pull_validate');
790
+ }
791
+ if (!pulledSifUsable) {
762
792
  throw new Error(`Pulled SIF failed validation: ${tempSifPath}`);
763
793
  }
764
794
  (0, fs_1.renameSync)(tempSifPath, sifPath);
@@ -772,6 +802,13 @@ async function ensureSifImage(runtime, image) {
772
802
  }
773
803
  }
774
804
  }, {
805
+ onAcquired: ({ waitedMs }) => {
806
+ pullLockTimingRecorded = true;
807
+ hooks.onTiming?.('image_prepare_pull_lock_wait', waitedMs);
808
+ if (waitedMs >= SLOW_IMAGE_PULL_LOCK_WAIT_WARN_MS) {
809
+ hooks.onWarning?.('slow_image_pull_lock_wait');
810
+ }
811
+ },
775
812
  onWait: ({ owner }) => {
776
813
  const ownerLabel = owner && owner !== 'unknown' ? ` (owner: ${owner})` : '';
777
814
  log.step(`Waiting for shared image pull lock for ${log.dim(image)}${ownerLabel}...`);
@@ -789,6 +826,9 @@ async function ensureSifImage(runtime, image) {
789
826
  console.error(String(msg).trim().slice(0, 500));
790
827
  process.exit(1);
791
828
  }
829
+ if (!pullLockTimingRecorded) {
830
+ hooks.onTiming?.('image_prepare_pull_lock_wait', 0);
831
+ }
792
832
  return sifPath;
793
833
  }
794
834
  function ensurePodmanImage(runtime, image) {
@@ -1018,7 +1058,32 @@ function mapSlurmProxyArgsToHost(session, args, containerCwd) {
1018
1058
  }
1019
1059
  return { ok: true, args: mappedArgs };
1020
1060
  }
1021
- function startSlurmHostProxy(session) {
1061
+ async function listenOnUnixSocket(server, socketHostPath) {
1062
+ await new Promise((resolve, reject) => {
1063
+ const cleanup = () => {
1064
+ server.off('error', onError);
1065
+ server.off('listening', onListening);
1066
+ };
1067
+ const onError = (err) => {
1068
+ cleanup();
1069
+ reject(err);
1070
+ };
1071
+ const onListening = () => {
1072
+ cleanup();
1073
+ resolve();
1074
+ };
1075
+ server.once('error', onError);
1076
+ server.once('listening', onListening);
1077
+ try {
1078
+ server.listen(socketHostPath);
1079
+ }
1080
+ catch (err) {
1081
+ cleanup();
1082
+ reject(err);
1083
+ }
1084
+ });
1085
+ }
1086
+ async function startSlurmHostProxy(session) {
1022
1087
  const sandboxHome = (0, config_js_1.getSandboxHome)();
1023
1088
  const socketHostDir = (0, path_1.join)(sandboxHome, '.labgate', 'slurm', 'host-proxy');
1024
1089
  const socketHostPath = (0, path_1.join)(socketHostDir, 'slurm.sock');
@@ -1141,10 +1206,14 @@ function startSlurmHostProxy(session) {
1141
1206
  log.warn(`SLURM host proxy server error: ${err?.message ?? String(err)}`);
1142
1207
  });
1143
1208
  try {
1144
- server.listen(socketHostPath);
1209
+ await listenOnUnixSocket(server, socketHostPath);
1145
1210
  }
1146
1211
  catch (err) {
1147
1212
  log.warn(`Could not start SLURM host proxy: ${err?.message ?? String(err)}`);
1213
+ try {
1214
+ server.close();
1215
+ }
1216
+ catch { /* ignore */ }
1148
1217
  try {
1149
1218
  (0, fs_1.unlinkSync)(socketHostPath);
1150
1219
  }
@@ -1276,7 +1345,9 @@ const DEFAULT_CODEX_OAUTH_CALLBACK_PORT = 1455;
1276
1345
  const CODEX_OAUTH_STARTUP_HEARTBEAT_MS = 10_000;
1277
1346
  const CODEX_OAUTH_STARTUP_HEARTBEAT_MAX_MS = 50_000;
1278
1347
  const CODEX_OAUTH_STARTUP_SPINNER_MS = 125;
1279
- const CODEX_OAUTH_STARTUP_SPINNER_FRAMES = ['|', '/', '-', '\\'];
1348
+ const CLI_STARTUP_SPINNER_FRAMES = ['|', '/', '-', '\\'];
1349
+ const CLI_STARTUP_SPINNER_MS = 120;
1350
+ const CLI_STARTUP_HEARTBEAT_MS = 5_000;
1280
1351
  const DEFERRED_SLURM_PASSTHROUGH_DELAY_MS = 1_500;
1281
1352
  function getCodexOauthCallbackPort() {
1282
1353
  const raw = (process.env.LABGATE_CODEX_OAUTH_CALLBACK_PORT || '').trim();
@@ -1305,6 +1376,64 @@ function buildCodexOauthPublishSpec(port) {
1305
1376
  // Publishing without an explicit host IP gives dual-stack localhost reachability.
1306
1377
  return `${port}:${port}`;
1307
1378
  }
1379
+ function describeCliStartupPhase(agent, phase, runtime) {
1380
+ const agentLabel = String(agent || '').trim().toLowerCase() === 'codex' ? 'Codex' : 'Claude';
1381
+ if (phase === 'image') {
1382
+ if (runtime === 'apptainer')
1383
+ return `Preparing ${agentLabel} Apptainer sandbox`;
1384
+ if (runtime === 'podman')
1385
+ return `Preparing ${agentLabel} container sandbox`;
1386
+ return `Preparing ${agentLabel} sandbox`;
1387
+ }
1388
+ return `Launching ${agentLabel} inside sandbox`;
1389
+ }
1390
+ function startCliStartupHeartbeat(agent, phase, runtime) {
1391
+ const startedAt = Date.now();
1392
+ let stopped = false;
1393
+ let sawOutput = false;
1394
+ let interval = null;
1395
+ let spinnerFrame = 0;
1396
+ let currentPhase = phase;
1397
+ const useSpinner = !!(process.stderr.isTTY && process.stdout.isTTY);
1398
+ const render = () => {
1399
+ const elapsedSeconds = Math.max(1, Math.floor((Date.now() - startedAt) / 1000));
1400
+ const message = describeCliStartupPhase(agent, currentPhase, runtime);
1401
+ if (useSpinner) {
1402
+ const frame = CLI_STARTUP_SPINNER_FRAMES[spinnerFrame];
1403
+ spinnerFrame = (spinnerFrame + 1) % CLI_STARTUP_SPINNER_FRAMES.length;
1404
+ process.stderr.write(`\r\x1b[2K${log.dim('›')} ${message}... ${frame} ${elapsedSeconds}s`);
1405
+ return;
1406
+ }
1407
+ log.step(`${message}... (${elapsedSeconds}s elapsed)`);
1408
+ };
1409
+ const stop = () => {
1410
+ if (stopped)
1411
+ return;
1412
+ stopped = true;
1413
+ if (interval) {
1414
+ clearInterval(interval);
1415
+ interval = null;
1416
+ }
1417
+ if (useSpinner)
1418
+ process.stderr.write('\r\x1b[2K');
1419
+ };
1420
+ const noteOutput = (data) => {
1421
+ if (stopped || sawOutput)
1422
+ return;
1423
+ if (String(data || '').length === 0)
1424
+ return;
1425
+ sawOutput = true;
1426
+ stop();
1427
+ };
1428
+ render();
1429
+ interval = setInterval(() => {
1430
+ if (stopped || sawOutput)
1431
+ return;
1432
+ render();
1433
+ }, useSpinner ? CLI_STARTUP_SPINNER_MS : CLI_STARTUP_HEARTBEAT_MS);
1434
+ interval.unref();
1435
+ return { noteOutput, stop };
1436
+ }
1308
1437
  function startCodexOauthStartupHeartbeat() {
1309
1438
  const startedAt = Date.now();
1310
1439
  let stopped = false;
@@ -1316,8 +1445,8 @@ function startCodexOauthStartupHeartbeat() {
1316
1445
  const useSpinner = !!(process.stderr.isTTY && process.stdout.isTTY);
1317
1446
  const renderSpinner = () => {
1318
1447
  const elapsedSeconds = Math.max(1, Math.floor((Date.now() - startedAt) / 1000));
1319
- const frame = CODEX_OAUTH_STARTUP_SPINNER_FRAMES[spinnerFrame];
1320
- spinnerFrame = (spinnerFrame + 1) % CODEX_OAUTH_STARTUP_SPINNER_FRAMES.length;
1448
+ const frame = CLI_STARTUP_SPINNER_FRAMES[spinnerFrame];
1449
+ spinnerFrame = (spinnerFrame + 1) % CLI_STARTUP_SPINNER_FRAMES.length;
1321
1450
  process.stderr.write(`\r\x1b[2K${log.dim('›')} Waiting for Codex login output... ${frame} ${elapsedSeconds}s`);
1322
1451
  spinnerActive = true;
1323
1452
  };
@@ -2040,6 +2169,48 @@ function logStartupTimings(entries, totalMs) {
2040
2169
  const prefix = summary ? `${summary}, ` : '';
2041
2170
  log.step(`[labgate] startup timings: ${prefix}total=${formatStartupDuration(totalMs)}`);
2042
2171
  }
2172
+ function createStartupReportData(session, runtime, image) {
2173
+ const now = new Date().toISOString();
2174
+ const warnings = [];
2175
+ if (session.uiDetected === false)
2176
+ warnings.push('ui_not_detected');
2177
+ return {
2178
+ schema_version: 2,
2179
+ kind: 'labgate-startup-report',
2180
+ generated_at: now,
2181
+ updated_at: now,
2182
+ startup_completed_at: null,
2183
+ startup_completion: null,
2184
+ pid: process.pid,
2185
+ node: (0, os_1.hostname)(),
2186
+ agent: session.agent,
2187
+ runtime,
2188
+ workdir: session.workdir,
2189
+ image,
2190
+ dry_run: session.dryRun,
2191
+ status: session.dryRun ? 'dry-run' : 'starting',
2192
+ launch_mode: session.dryRun ? 'dry-run' : null,
2193
+ ui_detected: session.uiDetected ?? null,
2194
+ total_ms: 0,
2195
+ session_total_ms: 0,
2196
+ first_output_ms: null,
2197
+ timings_ms: {},
2198
+ cold_start_hints: [],
2199
+ warnings,
2200
+ slurm: {
2201
+ enabled: session.config.slurm.enabled,
2202
+ host_proxy_enabled: null,
2203
+ passthrough_mode: null,
2204
+ host_commands_found: null,
2205
+ staged_commands: null,
2206
+ reused_stage: null,
2207
+ },
2208
+ exit_code: null,
2209
+ };
2210
+ }
2211
+ function writeStartupReportFile(path, report) {
2212
+ (0, startup_stage_lock_js_1.writeTextFileAtomic)(path, JSON.stringify(report, null, 2) + '\n', { mode: 0o600 });
2213
+ }
2043
2214
  // ── Shared session helpers ─────────────────────────────────
2044
2215
  function logSessionStart(session, sessionId) {
2045
2216
  if (!session.config.audit.enabled)
@@ -2137,15 +2308,107 @@ function printSessionInfo(session, sessionId, runtime) {
2137
2308
  async function startSession(session) {
2138
2309
  const startupStartedAt = Date.now();
2139
2310
  const startupTimings = [];
2140
- const recordStartupTiming = (label, startedAt) => {
2141
- startupTimings.push([label, Math.max(0, Date.now() - startedAt)]);
2142
- };
2143
2311
  const preferred = session.config.runtime;
2144
2312
  const runtime = session.dryRun ? getDryRunRuntime(preferred) : (0, runtime_js_1.getRuntime)(preferred);
2145
2313
  const image = session.imageOverride ?? session.config.image;
2146
2314
  const sessionId = (0, crypto_1.randomBytes)(4).toString('hex');
2147
2315
  const footerMode = session.footerMode ?? 'sticky';
2148
2316
  const footerLine = formatStatusFooter(session, runtime, sessionId, image);
2317
+ const startupReport = session.startupReportPath
2318
+ ? createStartupReportData(session, runtime, image)
2319
+ : null;
2320
+ let startupReportWriteFailed = false;
2321
+ const flushStartupReport = () => {
2322
+ if (!startupReport || !session.startupReportPath || startupReportWriteFailed)
2323
+ return;
2324
+ const elapsedMs = Math.max(0, Date.now() - startupStartedAt);
2325
+ startupReport.updated_at = new Date().toISOString();
2326
+ startupReport.session_total_ms = elapsedMs;
2327
+ if (startupReport.startup_completed_at === null) {
2328
+ startupReport.total_ms = elapsedMs;
2329
+ }
2330
+ try {
2331
+ writeStartupReportFile(session.startupReportPath, startupReport);
2332
+ }
2333
+ catch (err) {
2334
+ startupReportWriteFailed = true;
2335
+ log.warn(`Could not write startup report to ${session.startupReportPath}: ${err?.message ?? String(err)}`);
2336
+ }
2337
+ };
2338
+ const noteStartupWarning = (warning) => {
2339
+ if (!startupReport)
2340
+ return;
2341
+ if (!startupReport.warnings.includes(warning)) {
2342
+ startupReport.warnings.push(warning);
2343
+ flushStartupReport();
2344
+ }
2345
+ };
2346
+ const noteColdStartHint = (hint) => {
2347
+ if (!startupReport)
2348
+ return;
2349
+ if (!startupReport.cold_start_hints.includes(hint)) {
2350
+ startupReport.cold_start_hints.push(hint);
2351
+ flushStartupReport();
2352
+ }
2353
+ };
2354
+ const setStartupStatus = (status) => {
2355
+ if (!startupReport)
2356
+ return;
2357
+ startupReport.status = status;
2358
+ flushStartupReport();
2359
+ };
2360
+ const setStartupLaunchMode = (mode) => {
2361
+ if (!startupReport)
2362
+ return;
2363
+ startupReport.launch_mode = mode;
2364
+ flushStartupReport();
2365
+ };
2366
+ const completeStartup = (completion) => {
2367
+ if (!startupReport || startupReport.startup_completed_at !== null)
2368
+ return;
2369
+ startupReport.startup_completed_at = new Date().toISOString();
2370
+ startupReport.startup_completion = completion;
2371
+ startupReport.total_ms = Math.max(0, Date.now() - startupStartedAt);
2372
+ flushStartupReport();
2373
+ };
2374
+ const noteStartupFirstOutput = (data) => {
2375
+ if (!startupReport)
2376
+ return;
2377
+ if (startupReport.first_output_ms !== null)
2378
+ return;
2379
+ if (String(data || '').length === 0)
2380
+ return;
2381
+ const firstOutputMs = Math.max(0, Date.now() - startupStartedAt);
2382
+ startupReport.first_output_ms = firstOutputMs;
2383
+ startupReport.timings_ms.launch_first_output = firstOutputMs;
2384
+ startupReport.status = 'running';
2385
+ completeStartup('first-output');
2386
+ flushStartupReport();
2387
+ };
2388
+ const updateSlurmStartupReport = (payload) => {
2389
+ if (!startupReport)
2390
+ return;
2391
+ if (payload.hostProxyEnabled !== undefined)
2392
+ startupReport.slurm.host_proxy_enabled = payload.hostProxyEnabled;
2393
+ if (payload.passthroughMode !== undefined)
2394
+ startupReport.slurm.passthrough_mode = payload.passthroughMode;
2395
+ if (payload.hostCommandsFound !== undefined)
2396
+ startupReport.slurm.host_commands_found = payload.hostCommandsFound;
2397
+ if (payload.stagedCommands !== undefined)
2398
+ startupReport.slurm.staged_commands = payload.stagedCommands;
2399
+ if (payload.reusedStage !== undefined)
2400
+ startupReport.slurm.reused_stage = payload.reusedStage;
2401
+ flushStartupReport();
2402
+ };
2403
+ const recordStartupTiming = (label, startedAt) => {
2404
+ const elapsedMs = Math.max(0, Date.now() - startedAt);
2405
+ startupTimings.push([label, elapsedMs]);
2406
+ if (startupReport) {
2407
+ startupReport.timings_ms[label] = elapsedMs;
2408
+ flushStartupReport();
2409
+ }
2410
+ };
2411
+ flushStartupReport();
2149
2412
  // Extract agent auth token (CLI flag → env var)
2150
2413
  const tokenEnv = session.dryRun ? [] : getAgentTokenEnv(session.agent, session.apiKey);
2151
2414
  const bridgeCodexOauthForPodman = runtime === 'podman' && shouldBridgeCodexOauthForPodman(session.agent, session.config.network.mode);
@@ -2181,6 +2444,7 @@ async function startSession(session) {
2181
2444
  let cleanupSlurmHostProxy = () => { };
2182
2445
  let cleanupDeferredSlurmPassthrough = () => { };
2183
2446
  let startDeferredSlurmPassthrough = () => { };
2447
+ let startupHeartbeat = null;
2184
2448
  // If the agent isn't installed in the persistent sandbox home yet, warn that first run can be slow.
2185
2449
  if (!session.dryRun) {
2186
2450
  const sandboxHome = (0, config_js_1.getSandboxHome)();
@@ -2188,18 +2452,28 @@ async function startSession(session) {
2188
2452
  const installedBin = (0, path_1.join)(sandboxHome, '.npm-global', 'bin', agentBin);
2189
2453
  try {
2190
2454
  if (!(0, fs_1.existsSync)(installedBin)) {
2455
+ noteColdStartHint('agent_missing_in_sandbox');
2191
2456
  log.step('First run: preparing sandbox (pulling image + installing agent). This can take a minute...');
2192
2457
  }
2193
2458
  }
2194
2459
  catch { /* ignore */ }
2460
+ if (runtime === 'apptainer') {
2461
+ try {
2462
+ const cachedSifPath = (0, path_1.join)((0, config_js_1.getImagesDir)(), imageToSifName(image));
2463
+ if (!(0, fs_1.existsSync)(cachedSifPath))
2464
+ noteColdStartHint('sif_cache_missing');
2465
+ }
2466
+ catch { /* ignore */ }
2467
+ }
2195
2468
  }
2196
2469
  // For Apptainer sessions, always try SLURM CLI passthrough so sbatch/squeue
2197
2470
  // can work out of the box when present on the host. Full SLURM tracking/MCP
2198
2471
  // remains controlled by slurm.enabled.
2199
2472
  if (!session.dryRun && runtime === 'apptainer') {
2200
2473
  const hostProxyStartedAt = Date.now();
2201
- const hostProxy = startSlurmHostProxy(session);
2474
+ const hostProxy = await startSlurmHostProxy(session);
2202
2475
  recordStartupTiming('slurm_host_proxy', hostProxyStartedAt);
2476
+ updateSlurmStartupReport({ hostProxyEnabled: !!hostProxy });
2203
2477
  const trackingEnabled = session.config.slurm.enabled;
2204
2478
  const runSlurmPassthroughStage = (mode) => {
2205
2479
  const stageStartedAt = Date.now();
@@ -2212,8 +2486,15 @@ async function startSession(session) {
2212
2486
  preferHostProxy: false,
2213
2487
  });
2214
2488
  recordStartupTiming(mode === 'startup' ? 'slurm_passthrough_stage' : 'slurm_passthrough_stage_background', stageStartedAt);
2489
+ updateSlurmStartupReport({
2490
+ passthroughMode: staged.ok ? 'staged' : 'unavailable',
2491
+ hostCommandsFound: staged.hostCommands.length,
2492
+ stagedCommands: staged.staged.length,
2493
+ reusedStage: staged.reused,
2494
+ });
2215
2495
  if (trackingEnabled) {
2216
2496
  if (!staged.ok) {
2497
+ noteStartupWarning('slurm_commands_unavailable');
2217
2498
  log.warn('SLURM is enabled but no SLURM commands were found on the host PATH. ' +
2218
2499
  'Inside-sandbox SLURM commands (squeue/sbatch/...) will be unavailable.');
2219
2500
  }
@@ -2236,6 +2517,13 @@ async function startSession(session) {
2236
2517
  }
2237
2518
  catch (err) {
2238
2519
  recordStartupTiming(mode === 'startup' ? 'slurm_passthrough_stage' : 'slurm_passthrough_stage_background', stageStartedAt);
2520
+ updateSlurmStartupReport({
2521
+ passthroughMode: 'unavailable',
2522
+ reusedStage: false,
2523
+ });
2524
+ noteStartupWarning(mode === 'startup'
2525
+ ? 'slurm_passthrough_stage_failed'
2526
+ : 'slurm_passthrough_stage_background_failed');
2239
2527
  log.warn(mode === 'startup'
2240
2528
  ? `SLURM passthrough staging failed: ${err?.message ?? String(err)}`
2241
2529
  : `Deferred SLURM passthrough staging failed: ${err?.message ?? String(err)}`);
@@ -2253,8 +2541,17 @@ async function startSession(session) {
2253
2541
  preferHostProxy: true,
2254
2542
  });
2255
2543
  recordStartupTiming('slurm_passthrough_stage', stageStartedAt);
2544
+ updateSlurmStartupReport({
2545
+ passthroughMode: staged.ok
2546
+ ? (staged.mode === 'proxy-only' ? 'proxy-only' : 'staged')
2547
+ : 'unavailable',
2548
+ hostCommandsFound: staged.hostCommands.length,
2549
+ stagedCommands: staged.staged.length,
2550
+ reusedStage: staged.reused,
2551
+ });
2256
2552
  if (trackingEnabled) {
2257
2553
  if (!staged.ok) {
2554
+ noteStartupWarning('slurm_commands_unavailable');
2258
2555
  log.warn('SLURM is enabled but no SLURM commands were found on the host PATH. ' +
2259
2556
  'Inside-sandbox SLURM commands (squeue/sbatch/...) will be unavailable.');
2260
2557
  }
@@ -2275,12 +2572,19 @@ async function startSession(session) {
2275
2572
  }
2276
2573
  }
2277
2574
  else {
2575
+ noteStartupWarning('slurm_host_proxy_unavailable');
2278
2576
  if (trackingEnabled) {
2279
2577
  log.step('SLURM host proxy unavailable. Staging SLURM passthrough before startup.');
2280
2578
  runSlurmPassthroughStage('startup');
2281
2579
  }
2282
2580
  else {
2283
2581
  log.step('SLURM host proxy unavailable. Deferring SLURM passthrough staging until after startup.');
2582
+ updateSlurmStartupReport({
2583
+ passthroughMode: 'deferred-stage',
2584
+ hostCommandsFound: null,
2585
+ stagedCommands: null,
2586
+ reusedStage: null,
2587
+ });
2284
2588
  let cancelled = false;
2285
2589
  let timer = null;
2286
2590
  let started = false;
@@ -2308,32 +2612,58 @@ async function startSession(session) {
2308
2612
  }
2309
2613
  }
2310
2614
  let args;
2311
- if (runtime === 'apptainer') {
2312
- let sifPath;
2313
- if (session.dryRun) {
2314
- sifPath = (0, path_1.join)((0, config_js_1.getImagesDir)(), imageToSifName(image));
2615
+ const imagePrepareStartedAt = Date.now();
2616
+ try {
2617
+ if (runtime === 'apptainer') {
2618
+ startupHeartbeat = startCliStartupHeartbeat(session.agent, 'image', runtime);
2619
+ let sifPath;
2620
+ if (session.dryRun) {
2621
+ sifPath = (0, path_1.join)((0, config_js_1.getImagesDir)(), imageToSifName(image));
2622
+ }
2623
+ else {
2624
+ sifPath = await ensureSifImage(runtime, image, {
2625
+ onTiming: (label, elapsedMs) => {
2626
+ if (startupReport) {
2627
+ startupReport.timings_ms[label] = elapsedMs;
2628
+ flushStartupReport();
2629
+ }
2630
+ startupTimings.push([label, elapsedMs]);
2631
+ },
2632
+ onWarning: noteStartupWarning,
2633
+ onHint: noteColdStartHint,
2634
+ });
2635
+ }
2636
+ const buildArgsStartedAt = Date.now();
2637
+ args = buildApptainerArgs(session, sifPath, sessionId, runtimeEnvArgs);
2638
+ recordStartupTiming('image_prepare_build_args', buildArgsStartedAt);
2315
2639
  }
2316
2640
  else {
2317
- sifPath = await ensureSifImage(runtime, image);
2318
- }
2319
- args = buildApptainerArgs(session, sifPath, sessionId, runtimeEnvArgs);
2320
- }
2321
- else {
2322
- if (bridgeCodexOauthForPodman) {
2323
- const port = getCodexOauthCallbackPort();
2324
- log.step(`Codex OAuth callback bridge enabled on localhost:${port} ` +
2325
- '(Podman macOS uses bridge networking for callback compatibility).');
2326
- if (needsCodexOauthStartupHeartbeat) {
2327
- log.step('Launching Codex OAuth flow. Login output may take ~30s to appear.');
2641
+ if (bridgeCodexOauthForPodman) {
2642
+ const port = getCodexOauthCallbackPort();
2643
+ log.step(`Codex OAuth callback bridge enabled on localhost:${port} ` +
2644
+ '(Podman macOS uses bridge networking for callback compatibility).');
2645
+ if (needsCodexOauthStartupHeartbeat) {
2646
+ log.step('Launching Codex OAuth flow. Login output may take ~30s to appear.');
2647
+ }
2328
2648
  }
2649
+ startupHeartbeat = startCliStartupHeartbeat(session.agent, 'image', runtime);
2650
+ if (!session.dryRun) {
2651
+ ensurePodmanImage(runtime, image);
2652
+ }
2653
+ args = buildPodmanArgs(session, image, sessionId, runtimeEnvArgs, { tty: !!(process.stdout.isTTY && process.stdin.isTTY) });
2329
2654
  }
2330
- if (!session.dryRun) {
2331
- ensurePodmanImage(runtime, image);
2332
- }
2333
- args = buildPodmanArgs(session, image, sessionId, runtimeEnvArgs, { tty: !!(process.stdout.isTTY && process.stdin.isTTY) });
2655
+ }
2656
+ finally {
2657
+ recordStartupTiming('image_prepare', imagePrepareStartedAt);
2658
+ startupHeartbeat?.stop();
2659
+ startupHeartbeat = null;
2334
2660
  }
2335
2661
  if (session.dryRun) {
2662
+ setStartupLaunchMode('dry-run');
2663
+ setStartupStatus('dry-run');
2664
+ completeStartup('dry-run');
2336
2665
  prettyPrintCommand(runtime, args);
2666
+ flushStartupReport();
2337
2667
  return;
2338
2668
  }
2339
2669
  // Create OAuth URL interceptor as a fallback when BROWSER hook does not fire.
@@ -2372,6 +2702,7 @@ async function startSession(session) {
2372
2702
  sessionSlurmPoller.start();
2373
2703
  }
2374
2704
  catch (err) {
2705
+ noteStartupWarning('slurm_tracking_unavailable');
2375
2706
  log.warn(`SLURM tracking unavailable: ${err?.message ?? String(err)}`);
2376
2707
  cleanupSlurm();
2377
2708
  }
@@ -2389,7 +2720,11 @@ async function startSession(session) {
2389
2720
  const wantsSticky = footerMode === 'sticky';
2390
2721
  const needsOAuthPtyFallback = !!oauthInterceptor;
2391
2722
  const hasTty = !!(process.stdout.isTTY && process.stdin.isTTY);
2392
- const shouldUsePty = hasTty && (wantsSticky || needsOAuthPtyFallback || needsCodexOauthStartupHeartbeat);
2723
+ const wantsStartupOutputCapture = !!startupReport;
2724
+ const shouldUsePty = hasTty && (wantsSticky
2725
+ || needsOAuthPtyFallback
2726
+ || needsCodexOauthStartupHeartbeat
2727
+ || wantsStartupOutputCapture);
2393
2728
  if (shouldUsePty) {
2394
2729
  const pty = await loadPty();
2395
2730
  if (!pty) {
@@ -2402,6 +2737,10 @@ async function startSession(session) {
2402
2737
  else if (needsCodexOauthStartupHeartbeat) {
2403
2738
  log.step('Codex startup heartbeat unavailable (node-pty missing).');
2404
2739
  }
2740
+ else if (wantsStartupOutputCapture) {
2741
+ noteStartupWarning('startup_output_capture_unavailable');
2742
+ log.step('Startup output capture unavailable (node-pty missing).');
2743
+ }
2405
2744
  }
2406
2745
  else {
2407
2746
  let runtimePath;
@@ -2419,6 +2758,9 @@ async function startSession(session) {
2419
2758
  const cols = process.stdout.columns || 80;
2420
2759
  const rows = process.stdout.rows || 24;
2421
2760
  let child;
2761
+ const launchHeartbeat = startCliStartupHeartbeat(session.agent, 'launch', runtime);
2762
+ setStartupStatus('launching');
2763
+ setStartupLaunchMode('pty');
2422
2764
  try {
2423
2765
  child = pty.spawn(runtimePath, args, {
2424
2766
  name: 'xterm-256color',
@@ -2429,6 +2771,8 @@ async function startSession(session) {
2429
2771
  });
2430
2772
  }
2431
2773
  catch (err) {
2774
+ launchHeartbeat.stop();
2775
+ noteStartupWarning('pty_spawn_failed');
2432
2776
  log.step(`PTY spawn failed (${err?.message ?? String(err)}). Falling back to standard spawn.`);
2433
2777
  // Fall through to standard spawn path below.
2434
2778
  child = null;
@@ -2452,7 +2796,9 @@ async function startSession(session) {
2452
2796
  renderStickyFooter(footerLine);
2453
2797
  }
2454
2798
  child.onData((data) => {
2799
+ launchHeartbeat.noteOutput(data);
2455
2800
  codexOauthHeartbeat?.noteOutput(data);
2801
+ noteStartupFirstOutput(data);
2456
2802
  if (oauthInterceptor)
2457
2803
  oauthInterceptor.feed(data);
2458
2804
  process.stdout.write(data);
@@ -2475,7 +2821,14 @@ async function startSession(session) {
2475
2821
  const timeoutHandle = setupSessionTimeout(session, sessionId, runtime, () => exited, () => child.kill('SIGTERM'));
2476
2822
  child.onExit((event) => {
2477
2823
  exited = true;
2824
+ launchHeartbeat.stop();
2478
2825
  codexOauthHeartbeat?.stop();
2826
+ if (startupReport) {
2827
+ completeStartup('process-exit');
2828
+ startupReport.exit_code = event.exitCode ?? 0;
2829
+ startupReport.status = 'exited';
2830
+ flushStartupReport();
2831
+ }
2479
2832
  if (timeoutHandle)
2480
2833
  clearTimeout(timeoutHandle);
2481
2834
  browserHook?.cleanup();
@@ -2505,17 +2858,35 @@ async function startSession(session) {
2505
2858
  else if (needsOAuthPtyFallback && !hasTty) {
2506
2859
  log.step('OAuth URL fallback interceptor requires a TTY; relying on BROWSER hook only.');
2507
2860
  }
2861
+ else if (wantsStartupOutputCapture && !hasTty) {
2862
+ noteStartupWarning('startup_output_capture_unavailable');
2863
+ log.step('Startup output capture requires a TTY; report will omit first-output timing.');
2864
+ }
2508
2865
  if (footerMode === 'sticky') {
2509
2866
  console.log(footerLine);
2510
2867
  }
2868
+ log.step(`${describeCliStartupPhase(session.agent, 'launch', runtime)}...`);
2511
2869
  console.log('');
2512
2870
  // Spawn path: must use stdio:'inherit' to preserve TTY for Claude Code.
2513
2871
  // OAuth interception relies on the BROWSER hook + file watcher here
2514
2872
  // (the PTY path above uses onData for direct interception).
2873
+ setStartupStatus('launching');
2874
+ setStartupLaunchMode('inherit');
2515
2875
  const child = (0, child_process_1.spawn)(runtime, args, { stdio: 'inherit' });
2876
+ if (startupReport && startupReport.first_output_ms === null) {
2877
+ startupReport.status = 'running';
2878
+ completeStartup('spawn');
2879
+ flushStartupReport();
2880
+ }
2516
2881
  startDeferredSlurmPassthrough();
2517
2882
  const timeoutHandle = setupSessionTimeout(session, sessionId, runtime, () => child.exitCode !== null, () => child.kill('SIGTERM'));
2518
2883
  child.on('close', (code) => {
2884
+ if (startupReport) {
2885
+ completeStartup('process-exit');
2886
+ startupReport.exit_code = code ?? 0;
2887
+ startupReport.status = 'exited';
2888
+ flushStartupReport();
2889
+ }
2519
2890
  if (timeoutHandle)
2520
2891
  clearTimeout(timeoutHandle);
2521
2892
  browserHook?.cleanup();