@vm0/runner 3.13.0 → 3.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +258 -97
  2. package/package.json +1 -1
package/index.js CHANGED
@@ -10539,9 +10539,20 @@ import { Command as Command2 } from "commander";
10539
10539
  import { existsSync as existsSync5, readFileSync as readFileSync3, readdirSync as readdirSync2 } from "fs";
10540
10540
  import { execSync as execSync3 } from "child_process";
10541
10541
 
10542
- // src/lib/firecracker/process.ts
10543
- import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync4 } from "fs";
10542
+ // src/lib/process.ts
10543
+ import { readdirSync, readFileSync as readFileSync2, readlinkSync, existsSync as existsSync4 } from "fs";
10544
10544
  import path8 from "path";
10545
+ function isOrphanProcess(pid) {
10546
+ try {
10547
+ const stat = readFileSync2(`/proc/${pid}/stat`, "utf-8");
10548
+ const lastParen = stat.lastIndexOf(")");
10549
+ if (lastParen === -1) return false;
10550
+ const fields = stat.slice(lastParen + 1).trim().split(/\s+/);
10551
+ return fields[1] === "1";
10552
+ } catch {
10553
+ return false;
10554
+ }
10555
+ }
10545
10556
  function parseFirecrackerCmdline(cmdline) {
10546
10557
  const args = cmdline.split("\0");
10547
10558
  if (!args[0]?.includes("firecracker")) return null;
@@ -10576,6 +10587,27 @@ function parseMitmproxyCmdline(cmdline) {
10576
10587
  }
10577
10588
  return null;
10578
10589
  }
10590
+ function parseRunnerCmdline(cmdline) {
10591
+ const args = cmdline.split("\0").filter((a) => a !== "");
10592
+ const startIdx = args.indexOf("start");
10593
+ const benchmarkIdx = args.indexOf("benchmark");
10594
+ let mode;
10595
+ let modeIdx;
10596
+ if (startIdx !== -1 && (benchmarkIdx === -1 || startIdx < benchmarkIdx)) {
10597
+ mode = "start";
10598
+ modeIdx = startIdx;
10599
+ } else if (benchmarkIdx !== -1) {
10600
+ mode = "benchmark";
10601
+ modeIdx = benchmarkIdx;
10602
+ } else {
10603
+ return null;
10604
+ }
10605
+ const configIdx = args.indexOf("--config", modeIdx + 1);
10606
+ if (configIdx === -1 || configIdx >= args.length - 1) return null;
10607
+ const configPath = args[configIdx + 1];
10608
+ if (!configPath?.match(/\.ya?ml$/)) return null;
10609
+ return { configPath, mode };
10610
+ }
10579
10611
  function findFirecrackerProcesses() {
10580
10612
  const processes = [];
10581
10613
  const procDir = "/proc";
@@ -10594,7 +10626,12 @@ function findFirecrackerProcesses() {
10594
10626
  const cmdline = readFileSync2(cmdlinePath, "utf-8");
10595
10627
  const parsed = parseFirecrackerCmdline(cmdline);
10596
10628
  if (parsed) {
10597
- processes.push({ pid, vmId: parsed.vmId, baseDir: parsed.baseDir });
10629
+ processes.push({
10630
+ pid,
10631
+ vmId: parsed.vmId,
10632
+ baseDir: parsed.baseDir,
10633
+ isOrphan: isOrphanProcess(pid)
10634
+ });
10598
10635
  }
10599
10636
  } catch {
10600
10637
  continue;
@@ -10625,7 +10662,57 @@ function findMitmproxyProcesses() {
10625
10662
  const cmdline = readFileSync2(cmdlinePath, "utf-8");
10626
10663
  const baseDir = parseMitmproxyCmdline(cmdline);
10627
10664
  if (baseDir) {
10628
- processes.push({ pid, baseDir });
10665
+ processes.push({ pid, baseDir, isOrphan: isOrphanProcess(pid) });
10666
+ }
10667
+ } catch {
10668
+ continue;
10669
+ }
10670
+ }
10671
+ return processes;
10672
+ }
10673
+ function isNodeIndexJs(cmdline) {
10674
+ const args = cmdline.split("\0").filter((a) => a !== "");
10675
+ if (args.length < 2) return false;
10676
+ if (!args[0]?.includes("node")) return false;
10677
+ return args[1]?.endsWith("index.js") ?? false;
10678
+ }
10679
+ function findRunnerProcesses() {
10680
+ const processes = [];
10681
+ const procDir = "/proc";
10682
+ let entries;
10683
+ try {
10684
+ entries = readdirSync(procDir);
10685
+ } catch {
10686
+ return [];
10687
+ }
10688
+ for (const entry of entries) {
10689
+ if (!/^\d+$/.test(entry)) continue;
10690
+ const pid = parseInt(entry, 10);
10691
+ const cmdlinePath = path8.join(procDir, entry, "cmdline");
10692
+ if (!existsSync4(cmdlinePath)) continue;
10693
+ try {
10694
+ const cmdline = readFileSync2(cmdlinePath, "utf-8");
10695
+ const parsed = parseRunnerCmdline(cmdline);
10696
+ if (parsed) {
10697
+ processes.push({
10698
+ pid,
10699
+ configPath: parsed.configPath,
10700
+ mode: parsed.mode
10701
+ });
10702
+ continue;
10703
+ }
10704
+ if (isNodeIndexJs(cmdline)) {
10705
+ const cwdPath = path8.join(procDir, entry, "cwd");
10706
+ const cwd = readlinkSync(cwdPath);
10707
+ const configPath = path8.join(cwd, "runner.yaml");
10708
+ if (existsSync4(configPath)) {
10709
+ processes.push({
10710
+ pid,
10711
+ configPath,
10712
+ mode: "start"
10713
+ // Default to start mode (cannot determine from cmdline)
10714
+ });
10715
+ }
10629
10716
  }
10630
10717
  } catch {
10631
10718
  continue;
@@ -10646,68 +10733,39 @@ var RunnerStatusSchema = z31.object({
10646
10733
  });
10647
10734
 
10648
10735
  // src/commands/doctor.ts
10649
- function displayRunnerStatus(statusFilePath, warnings) {
10736
+ function getRunnerStatus(statusFilePath, warnings) {
10650
10737
  if (!existsSync5(statusFilePath)) {
10651
- console.log("Mode: unknown (no status.json)");
10652
10738
  return null;
10653
10739
  }
10654
10740
  try {
10655
- const status = RunnerStatusSchema.parse(
10741
+ return RunnerStatusSchema.parse(
10656
10742
  JSON.parse(readFileSync3(statusFilePath, "utf-8"))
10657
10743
  );
10658
- console.log(`Mode: ${status.mode}`);
10659
- if (status.started_at) {
10660
- const started = new Date(status.started_at);
10661
- const uptime = formatUptime(Date.now() - started.getTime());
10662
- console.log(`Started: ${started.toLocaleString()} (uptime: ${uptime})`);
10663
- }
10664
- return status;
10665
10744
  } catch {
10666
- console.log("Mode: unknown (status.json unreadable)");
10667
10745
  warnings.push({ message: "status.json exists but cannot be parsed" });
10668
10746
  return null;
10669
10747
  }
10670
10748
  }
10671
10749
  async function checkApiConnectivity(config, warnings) {
10672
- console.log("API Connectivity:");
10673
10750
  try {
10674
10751
  await pollForJob(config.server, config.group);
10675
- console.log(` \u2713 Connected to ${config.server.url}`);
10676
- console.log(" \u2713 Authentication: OK");
10752
+ return true;
10677
10753
  } catch (error) {
10678
- console.log(` \u2717 Cannot connect to ${config.server.url}`);
10679
- console.log(
10680
- ` Error: ${error instanceof Error ? error.message : "Unknown error"}`
10681
- );
10682
10754
  warnings.push({
10683
10755
  message: `Cannot connect to API: ${error instanceof Error ? error.message : "Unknown error"}`
10684
10756
  });
10757
+ return false;
10685
10758
  }
10686
10759
  }
10687
- function checkNetwork(config, warnings) {
10688
- console.log("Network:");
10689
- const mitmProcesses = findMitmproxyProcesses();
10690
- const mitmProc = mitmProcesses.find((p) => p.baseDir === config.base_dir);
10691
- if (mitmProc) {
10692
- console.log(
10693
- ` \u2713 Proxy mitmproxy (PID ${mitmProc.pid}) on :${config.proxy.port}`
10694
- );
10695
- } else {
10696
- console.log(` \u2717 Proxy mitmproxy not running`);
10697
- warnings.push({ message: "Proxy mitmproxy is not running" });
10698
- }
10699
- console.log(
10700
- ` \u2139 Namespaces: each VM runs in isolated namespace with IP ${SNAPSHOT_NETWORK.guestIp}`
10701
- );
10702
- }
10703
- function buildJobInfo(status, processes) {
10760
+ function buildJobInfo(status, processes, baseDir) {
10704
10761
  const jobs = [];
10705
10762
  const statusVmIds = /* @__PURE__ */ new Set();
10763
+ const runnerProcesses = processes.filter((p) => p.baseDir === baseDir);
10706
10764
  if (status?.active_run_ids) {
10707
10765
  for (const runId of status.active_run_ids) {
10708
10766
  const vmId = createVmId(runId);
10709
10767
  statusVmIds.add(vmId);
10710
- const proc = processes.find((p) => p.vmId === vmId);
10768
+ const proc = runnerProcesses.find((p) => p.vmId === vmId);
10711
10769
  jobs.push({
10712
10770
  runId,
10713
10771
  vmId,
@@ -10717,18 +10775,6 @@ function buildJobInfo(status, processes) {
10717
10775
  }
10718
10776
  return { jobs, statusVmIds };
10719
10777
  }
10720
- function displayRuns(jobs, maxConcurrent) {
10721
- console.log(`Runs (${jobs.length} active, max ${maxConcurrent}):`);
10722
- if (jobs.length === 0) {
10723
- console.log(" No active runs");
10724
- return;
10725
- }
10726
- console.log(" Run ID VM ID Status");
10727
- for (const job of jobs) {
10728
- const statusText = job.firecrackerPid ? `\u2713 Running (PID ${job.firecrackerPid})` : "\u26A0\uFE0F No process";
10729
- console.log(` ${job.runId} ${job.vmId} ${statusText}`);
10730
- }
10731
- }
10732
10778
  async function findOrphanNetworkNamespaces(warnings) {
10733
10779
  let allNamespaces = [];
10734
10780
  try {
@@ -10777,12 +10823,12 @@ async function findOrphanNetworkNamespaces(warnings) {
10777
10823
  return [];
10778
10824
  }
10779
10825
  }
10780
- async function detectOrphanResources(jobs, allProcesses, workspaces, statusVmIds, baseDir, warnings) {
10826
+ function detectRunnerOrphanResources(jobs, allProcesses, workspaces, statusVmIds, baseDir, warnings) {
10781
10827
  const processes = allProcesses.filter((p) => p.baseDir === baseDir);
10782
10828
  for (const job of jobs) {
10783
10829
  if (!job.firecrackerPid) {
10784
10830
  warnings.push({
10785
- message: `Run ${job.vmId} in status.json but no Firecracker process running`
10831
+ message: `Run ${job.vmId} in status.json but no Firecracker process`
10786
10832
  });
10787
10833
  }
10788
10834
  }
@@ -10794,31 +10840,15 @@ async function detectOrphanResources(jobs, allProcesses, workspaces, statusVmIds
10794
10840
  });
10795
10841
  }
10796
10842
  }
10797
- const orphanNetns = await findOrphanNetworkNamespaces(warnings);
10798
- for (const ns of orphanNetns) {
10799
- warnings.push({
10800
- message: `Orphan network namespace: ${ns} (runner process not running)`
10801
- });
10802
- }
10803
10843
  for (const ws of workspaces) {
10804
10844
  const vmId = runnerPaths.extractVmId(ws);
10805
10845
  if (!processVmIds.has(vmId) && !statusVmIds.has(vmId)) {
10806
10846
  warnings.push({
10807
- message: `Orphan workspace: ${ws} (no matching job or process)`
10847
+ message: `Orphan workspace: ${ws}`
10808
10848
  });
10809
10849
  }
10810
10850
  }
10811
10851
  }
10812
- function displayWarnings(warnings) {
10813
- console.log("Warnings:");
10814
- if (warnings.length === 0) {
10815
- console.log(" None");
10816
- } else {
10817
- for (const w of warnings) {
10818
- console.log(` - ${w.message}`);
10819
- }
10820
- }
10821
- }
10822
10852
  function formatUptime(ms) {
10823
10853
  const seconds = Math.floor(ms / 1e3);
10824
10854
  const minutes = Math.floor(seconds / 60);
@@ -10829,34 +10859,165 @@ function formatUptime(ms) {
10829
10859
  if (minutes > 0) return `${minutes}m`;
10830
10860
  return `${seconds}s`;
10831
10861
  }
10832
- var doctorCommand = new Command2("doctor").description("Diagnose runner health, check network, and detect issues").option("--config <path>", "Config file path", "./runner.yaml").action(async (options) => {
10862
+ async function displayRunnerHealth(runner, index, allFirecrackerProcesses, allMitmproxyProcesses) {
10863
+ const warnings = [];
10864
+ const { config, pid, mode } = runner;
10865
+ const baseDir = config.base_dir;
10866
+ console.log(`[${index}] ${baseDir} (PID ${pid}) [${mode}]`);
10867
+ const statusFilePath = runnerPaths.statusFile(baseDir);
10868
+ const status = getRunnerStatus(statusFilePath, warnings);
10869
+ if (status) {
10870
+ let statusLine = ` Mode: ${status.mode}`;
10871
+ if (status.started_at) {
10872
+ const started = new Date(status.started_at);
10873
+ const uptime = formatUptime(Date.now() - started.getTime());
10874
+ statusLine += `, uptime: ${uptime}`;
10875
+ }
10876
+ console.log(statusLine);
10877
+ } else {
10878
+ console.log(" Mode: unknown (no status.json)");
10879
+ }
10880
+ const apiOk = await checkApiConnectivity(config, warnings);
10881
+ if (apiOk) {
10882
+ console.log(` API: \u2713 Connected to ${config.server.url}`);
10883
+ } else {
10884
+ console.log(` API: \u2717 Cannot connect to ${config.server.url}`);
10885
+ }
10886
+ const mitmProc = allMitmproxyProcesses.find((p) => p.baseDir === baseDir);
10887
+ if (mitmProc) {
10888
+ console.log(
10889
+ ` Proxy: \u2713 mitmproxy (PID ${mitmProc.pid}) on :${config.proxy.port}`
10890
+ );
10891
+ } else if (mode === "start") {
10892
+ console.log(" Proxy: \u2717 not running");
10893
+ warnings.push({ message: "Proxy mitmproxy is not running" });
10894
+ } else {
10895
+ console.log(" Proxy: - (not running)");
10896
+ }
10897
+ const { jobs, statusVmIds } = buildJobInfo(
10898
+ status,
10899
+ allFirecrackerProcesses,
10900
+ baseDir
10901
+ );
10902
+ console.log(
10903
+ ` Runs (${jobs.length} active, max ${config.sandbox.max_concurrent}):`
10904
+ );
10905
+ if (jobs.length === 0) {
10906
+ console.log(" No active runs");
10907
+ } else {
10908
+ for (const job of jobs) {
10909
+ const statusText = job.firecrackerPid ? `\u2713 Running (PID ${job.firecrackerPid})` : "\u26A0\uFE0F No process";
10910
+ console.log(` ${job.vmId} ${statusText}`);
10911
+ }
10912
+ }
10913
+ const workspacesDir = runnerPaths.workspacesDir(baseDir);
10914
+ const workspaces = existsSync5(workspacesDir) ? readdirSync2(workspacesDir).filter(runnerPaths.isVmWorkspace) : [];
10915
+ detectRunnerOrphanResources(
10916
+ jobs,
10917
+ allFirecrackerProcesses,
10918
+ workspaces,
10919
+ statusVmIds,
10920
+ baseDir,
10921
+ warnings
10922
+ );
10923
+ console.log(` Warnings:`);
10924
+ if (warnings.length === 0) {
10925
+ console.log(" None");
10926
+ } else {
10927
+ for (const w of warnings) {
10928
+ console.log(` - ${w.message}`);
10929
+ }
10930
+ }
10931
+ return warnings;
10932
+ }
10933
+ async function detectGlobalOrphans(discoveredRunners, allFirecrackerProcesses, allMitmproxyProcesses, globalWarnings) {
10934
+ const runnerBaseDirs = new Set(
10935
+ discoveredRunners.map((r) => r.config.base_dir)
10936
+ );
10937
+ for (const mitm of allMitmproxyProcesses) {
10938
+ if (mitm.isOrphan) {
10939
+ globalWarnings.push({
10940
+ message: `Orphan mitmproxy: PID ${mitm.pid} (PPID=1, parent process dead)`
10941
+ });
10942
+ } else if (!runnerBaseDirs.has(mitm.baseDir)) {
10943
+ globalWarnings.push({
10944
+ message: `Orphan mitmproxy: PID ${mitm.pid} (baseDir ${mitm.baseDir}, runner not running)`
10945
+ });
10946
+ }
10947
+ }
10948
+ for (const fc of allFirecrackerProcesses) {
10949
+ if (fc.isOrphan) {
10950
+ globalWarnings.push({
10951
+ message: `Orphan Firecracker: PID ${fc.pid} (vmId ${fc.vmId}, PPID=1, parent process dead)`
10952
+ });
10953
+ } else if (!runnerBaseDirs.has(fc.baseDir)) {
10954
+ globalWarnings.push({
10955
+ message: `Orphan Firecracker: PID ${fc.pid} (vmId ${fc.vmId}, baseDir ${fc.baseDir}, runner not running)`
10956
+ });
10957
+ }
10958
+ }
10959
+ const orphanNetns = await findOrphanNetworkNamespaces(globalWarnings);
10960
+ for (const ns of orphanNetns) {
10961
+ globalWarnings.push({
10962
+ message: `Orphan namespace: ${ns} (runner process not running)`
10963
+ });
10964
+ }
10965
+ }
10966
+ var doctorCommand = new Command2("doctor").description("Diagnose health of all runners on this host").action(async () => {
10833
10967
  try {
10834
- const config = loadConfig(options.config);
10835
- const statusFilePath = runnerPaths.statusFile(config.base_dir);
10836
- const workspacesDir = runnerPaths.workspacesDir(config.base_dir);
10837
- const warnings = [];
10838
- console.log(`Runner: ${config.name}`);
10839
- const status = displayRunnerStatus(statusFilePath, warnings);
10840
- console.log("");
10841
- await checkApiConnectivity(config, warnings);
10842
- console.log("");
10843
- checkNetwork(config, warnings);
10844
- console.log("");
10845
- const processes = findFirecrackerProcesses();
10846
- const workspaces = existsSync5(workspacesDir) ? readdirSync2(workspacesDir).filter(runnerPaths.isVmWorkspace) : [];
10847
- const { jobs, statusVmIds } = buildJobInfo(status, processes);
10848
- displayRuns(jobs, config.sandbox.max_concurrent);
10968
+ const globalWarnings = [];
10969
+ let totalWarnings = 0;
10970
+ const allFirecrackerProcesses = findFirecrackerProcesses();
10971
+ const allMitmproxyProcesses = findMitmproxyProcesses();
10972
+ const runnerProcesses = findRunnerProcesses();
10973
+ const discoveredRunners = [];
10974
+ for (const rp of runnerProcesses) {
10975
+ try {
10976
+ const config = loadConfig(rp.configPath);
10977
+ discoveredRunners.push({
10978
+ pid: rp.pid,
10979
+ config,
10980
+ mode: rp.mode
10981
+ });
10982
+ } catch (err) {
10983
+ globalWarnings.push({
10984
+ message: `Failed to load config ${rp.configPath}: ${err instanceof Error ? err.message : "Unknown error"}`
10985
+ });
10986
+ }
10987
+ }
10988
+ console.log(`Runners (${discoveredRunners.length} found):`);
10849
10989
  console.log("");
10850
- await detectOrphanResources(
10851
- jobs,
10852
- processes,
10853
- workspaces,
10854
- statusVmIds,
10855
- config.base_dir,
10856
- warnings
10990
+ if (discoveredRunners.length === 0) {
10991
+ console.log(" No runner processes found");
10992
+ console.log("");
10993
+ } else {
10994
+ for (let i = 0; i < discoveredRunners.length; i++) {
10995
+ const warnings = await displayRunnerHealth(
10996
+ discoveredRunners[i],
10997
+ i + 1,
10998
+ allFirecrackerProcesses,
10999
+ allMitmproxyProcesses
11000
+ );
11001
+ totalWarnings += warnings.length;
11002
+ console.log("");
11003
+ }
11004
+ }
11005
+ await detectGlobalOrphans(
11006
+ discoveredRunners,
11007
+ allFirecrackerProcesses,
11008
+ allMitmproxyProcesses,
11009
+ globalWarnings
10857
11010
  );
10858
- displayWarnings(warnings);
10859
- process.exit(warnings.length > 0 ? 1 : 0);
11011
+ console.log("Global:");
11012
+ if (globalWarnings.length === 0) {
11013
+ console.log(" No orphan resources");
11014
+ } else {
11015
+ for (const w of globalWarnings) {
11016
+ console.log(` ${w.message}`);
11017
+ }
11018
+ }
11019
+ totalWarnings += globalWarnings.length;
11020
+ process.exit(totalWarnings > 0 ? 1 : 0);
10860
11021
  } catch (error) {
10861
11022
  console.error(
10862
11023
  `Error: ${error instanceof Error ? error.message : "Unknown error"}`
@@ -11345,7 +11506,7 @@ var snapshotCommand = new Command5("snapshot").description("Generate a Firecrack
11345
11506
  );
11346
11507
 
11347
11508
  // src/index.ts
11348
- var version = true ? "3.13.0" : "0.1.0";
11509
+ var version = true ? "3.14.0" : "0.1.0";
11349
11510
  program.name("vm0-runner").version(version).description("Self-hosted runner for VM0 agents");
11350
11511
  program.addCommand(startCommand);
11351
11512
  program.addCommand(doctorCommand);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vm0/runner",
3
- "version": "3.13.0",
3
+ "version": "3.14.0",
4
4
  "description": "Self-hosted runner for VM0 agents",
5
5
  "repository": {
6
6
  "type": "git",