@vm0/runner 3.9.1 → 3.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +189 -354
  2. package/package.json +2 -1
package/index.js CHANGED
@@ -5,7 +5,6 @@ import { program } from "commander";
5
5
 
6
6
  // src/commands/start.ts
7
7
  import { Command } from "commander";
8
- import { dirname, join } from "path";
9
8
 
10
9
  // src/lib/config.ts
11
10
  import { z } from "zod";
@@ -19,22 +18,35 @@ var VM0_TMP_PREFIX = "/tmp/vm0";
19
18
  var runtimePaths = {
20
19
  /** Runner PID file for single-instance lock */
21
20
  runnerPid: path.join(VM0_RUN_DIR, "runner.pid"),
22
- /** IP pool lock file */
23
- ipPoolLock: path.join(VM0_RUN_DIR, "ip-pool.lock.active"),
24
21
  /** IP allocation registry */
25
22
  ipRegistry: path.join(VM0_RUN_DIR, "ip-registry.json")
26
23
  };
27
- var dataPaths = {
24
+ var VM_WORKSPACE_PREFIX = "vm0-";
25
+ var runnerPaths = {
28
26
  /** Overlay pool directory for pre-warmed VM overlays */
29
- overlayPool: (dataDir) => path.join(dataDir, "overlay-pool")
27
+ overlayPool: (baseDir) => path.join(baseDir, "overlay-pool"),
28
+ /** Workspaces directory for VM work directories */
29
+ workspacesDir: (baseDir) => path.join(baseDir, "workspaces"),
30
+ /** VM work directory */
31
+ vmWorkDir: (baseDir, vmId) => path.join(baseDir, "workspaces", `${VM_WORKSPACE_PREFIX}${vmId}`),
32
+ /** Runner status file */
33
+ statusFile: (baseDir) => path.join(baseDir, "status.json"),
34
+ /** Check if a directory name is a VM workspace */
35
+ isVmWorkspace: (dirname) => dirname.startsWith(VM_WORKSPACE_PREFIX),
36
+ /** Extract vmId from workspace directory name */
37
+ extractVmId: (dirname) => dirname.replace(VM_WORKSPACE_PREFIX, "")
38
+ };
39
+ var vmPaths = {
40
+ /** Firecracker config file (used with --config-file --no-api) */
41
+ config: (workDir) => path.join(workDir, "config.json"),
42
+ /** Vsock UDS for host-guest communication */
43
+ vsock: (workDir) => path.join(workDir, "vsock.sock")
30
44
  };
31
45
  var tempPaths = {
32
46
  /** Default proxy CA directory */
33
47
  proxyDir: `${VM0_TMP_PREFIX}-proxy`,
34
48
  /** VM registry for proxy */
35
49
  vmRegistry: `${VM0_TMP_PREFIX}-vm-registry.json`,
36
- /** VM work directory (fallback when not using workspaces) */
37
- vmWorkDir: (vmId) => `${VM0_TMP_PREFIX}-vm-${vmId}`,
38
50
  /** Network log file for a run */
39
51
  networkLog: (runId) => `${VM0_TMP_PREFIX}-network-${runId}.jsonl`
40
52
  };
@@ -56,7 +68,7 @@ var runnerConfigSchema = z.object({
56
68
  /^[a-z0-9-]+\/[a-z0-9-]+$/,
57
69
  "Group must be in format 'scope/name' (lowercase, hyphens allowed)"
58
70
  ),
59
- data_dir: z.string().min(1, "Data directory is required"),
71
+ base_dir: z.string().min(1, "Base directory is required"),
60
72
  server: z.object({
61
73
  url: z.url({ message: "Server URL must be a valid URL" }),
62
74
  token: z.string().min(1, "Server token is required")
@@ -84,7 +96,7 @@ var DEBUG_SERVER_DEFAULTS = {
84
96
  var debugConfigSchema = z.object({
85
97
  name: z.string().default("debug-runner"),
86
98
  group: z.string().default("debug/local"),
87
- data_dir: z.string().min(1, "Data directory is required"),
99
+ base_dir: z.string().min(1, "Base directory is required"),
88
100
  server: z.object({
89
101
  url: z.url().default(DEBUG_SERVER_DEFAULTS.url),
90
102
  token: z.string().default(DEBUG_SERVER_DEFAULTS.token)
@@ -302,175 +314,11 @@ async function subscribeToJobs(server, group, onJob, onConnectionChange) {
302
314
  };
303
315
  }
304
316
 
305
- // src/lib/executor.ts
306
- import path6 from "path";
307
-
308
317
  // src/lib/firecracker/vm.ts
309
318
  import { spawn } from "child_process";
310
319
  import fs4 from "fs";
311
- import path4 from "path";
312
320
  import readline from "readline";
313
321
 
314
- // src/lib/firecracker/client.ts
315
- import http from "http";
316
- var FirecrackerClient = class {
317
- socketPath;
318
- constructor(socketPath) {
319
- this.socketPath = socketPath;
320
- }
321
- /**
322
- * Make HTTP request to Firecracker API
323
- */
324
- async request(method, path9, body) {
325
- return new Promise((resolve, reject) => {
326
- const bodyStr = body !== void 0 ? JSON.stringify(body) : void 0;
327
- const headers = {
328
- Accept: "application/json",
329
- Connection: "close"
330
- // Disable keep-alive to prevent request pipelining issues
331
- };
332
- if (bodyStr !== void 0) {
333
- headers["Content-Type"] = "application/json";
334
- headers["Content-Length"] = Buffer.byteLength(bodyStr);
335
- }
336
- console.log(
337
- `[FC API] ${method} ${path9}${bodyStr ? ` (${Buffer.byteLength(bodyStr)} bytes)` : ""}`
338
- );
339
- const options = {
340
- socketPath: this.socketPath,
341
- path: path9,
342
- method,
343
- headers,
344
- // Disable agent to ensure fresh connection for each request
345
- // Firecracker's single-threaded API can have issues with pipelined requests
346
- agent: false
347
- };
348
- const req = http.request(options, (res) => {
349
- let data = "";
350
- res.on("data", (chunk) => {
351
- data += chunk.toString();
352
- });
353
- res.on("end", () => {
354
- if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) {
355
- if (data) {
356
- try {
357
- resolve(JSON.parse(data));
358
- } catch {
359
- resolve(data);
360
- }
361
- } else {
362
- resolve(void 0);
363
- }
364
- } else {
365
- let errorMsg = `Firecracker API error: ${res.statusCode}`;
366
- if (data) {
367
- try {
368
- const errorBody = JSON.parse(data);
369
- if (errorBody.fault_message) {
370
- errorMsg = `${errorMsg} - ${errorBody.fault_message}`;
371
- }
372
- } catch {
373
- errorMsg = `${errorMsg} - ${data}`;
374
- }
375
- }
376
- reject(new Error(errorMsg));
377
- }
378
- });
379
- });
380
- req.on("error", (err) => {
381
- reject(new Error(`Failed to connect to Firecracker: ${err.message}`));
382
- });
383
- if (bodyStr !== void 0) {
384
- req.write(bodyStr);
385
- }
386
- req.end();
387
- });
388
- }
389
- /**
390
- * Configure machine settings (vCPUs, memory)
391
- */
392
- async setMachineConfig(config) {
393
- await this.request("PUT", "/machine-config", config);
394
- }
395
- /**
396
- * Configure boot source (kernel)
397
- */
398
- async setBootSource(config) {
399
- await this.request("PUT", "/boot-source", config);
400
- }
401
- /**
402
- * Add or update a drive (block device)
403
- */
404
- async setDrive(drive) {
405
- await this.request("PUT", `/drives/${drive.drive_id}`, drive);
406
- }
407
- /**
408
- * Add or update a network interface
409
- */
410
- async setNetworkInterface(iface) {
411
- await this.request("PUT", `/network-interfaces/${iface.iface_id}`, iface);
412
- }
413
- /**
414
- * Configure vsock device for host-guest communication
415
- */
416
- async setVsock(vsock) {
417
- await this.request("PUT", "/vsock", vsock);
418
- }
419
- /**
420
- * Perform an action (start, stop, etc.)
421
- */
422
- async performAction(actionType) {
423
- await this.request("PUT", "/actions", { action_type: actionType });
424
- }
425
- /**
426
- * Start the VM instance
427
- */
428
- async start() {
429
- await this.performAction("InstanceStart");
430
- }
431
- /**
432
- * Send Ctrl+Alt+Del to the VM (graceful shutdown request)
433
- */
434
- async sendCtrlAltDel() {
435
- await this.performAction("SendCtrlAltDel");
436
- }
437
- /**
438
- * Get machine configuration
439
- */
440
- async getMachineConfig() {
441
- return await this.request("GET", "/machine-config");
442
- }
443
- /**
444
- * Check if the Firecracker API is ready
445
- * Returns true if API is responding
446
- */
447
- async isReady() {
448
- try {
449
- await this.request("GET", "/");
450
- return true;
451
- } catch {
452
- return false;
453
- }
454
- }
455
- /**
456
- * Wait for Firecracker API to become ready
457
- * @param timeoutMs Maximum time to wait
458
- * @param intervalMs Polling interval
459
- */
460
- async waitUntilReady(timeoutMs = 5e3, intervalMs = 100) {
461
- const start = Date.now();
462
- while (Date.now() - start < timeoutMs) {
463
- if (await this.isReady()) {
464
- return;
465
- }
466
- await new Promise((resolve) => setTimeout(resolve, intervalMs));
467
- }
468
- throw new Error(
469
- `Firecracker API not ready after ${timeoutMs}ms at ${this.socketPath}`
470
- );
471
- }
472
- };
473
-
474
322
  // src/lib/firecracker/network.ts
475
323
  import { execSync, exec } from "child_process";
476
324
  import { promisify } from "util";
@@ -1004,28 +852,47 @@ import { exec as exec3 } from "child_process";
1004
852
  import { promisify as promisify3 } from "util";
1005
853
  import * as fs3 from "fs";
1006
854
  import path3 from "path";
855
+
856
+ // src/lib/utils/file-lock.ts
857
+ import lockfile from "proper-lockfile";
858
+ var DEFAULT_OPTIONS = {
859
+ stale: 3e4,
860
+ // Consider lock stale after 30 seconds
861
+ retries: {
862
+ retries: 5,
863
+ minTimeout: 100,
864
+ maxTimeout: 1e3
865
+ }
866
+ };
867
+ async function withFileLock(path7, fn, options) {
868
+ const release = await lockfile.lock(path7, { ...DEFAULT_OPTIONS, ...options });
869
+ try {
870
+ return await fn();
871
+ } finally {
872
+ await release();
873
+ }
874
+ }
875
+
876
+ // src/lib/firecracker/ip-registry.ts
1007
877
  var execAsync3 = promisify3(exec3);
1008
878
  var logger3 = createLogger("IPRegistry");
1009
879
  var IP_PREFIX = "172.16.0.";
1010
880
  var IP_START = 2;
1011
881
  var IP_END = 254;
1012
- var LOCK_TIMEOUT_MS = 1e4;
1013
- var LOCK_RETRY_INTERVAL_MS = 100;
1014
- async function defaultEnsureRunDir(runDir) {
1015
- if (!fs3.existsSync(runDir)) {
1016
- await execAsync3(`sudo mkdir -p ${runDir}`);
1017
- await execAsync3(`sudo chmod 777 ${runDir}`);
882
+ async function defaultEnsureRegistryDir(registryPath) {
883
+ const dir = path3.dirname(registryPath);
884
+ if (!fs3.existsSync(dir)) {
885
+ await execAsync3(`sudo mkdir -p ${dir}`);
886
+ await execAsync3(`sudo chmod 777 ${dir}`);
1018
887
  }
1019
888
  }
1020
- async function defaultScanTapDevices() {
889
+ async function scanTapDevices() {
1021
890
  const tapDevices = /* @__PURE__ */ new Set();
1022
891
  try {
1023
- const { stdout } = await execAsync3(
1024
- `ip -o link show type tuntap 2>/dev/null || true`
1025
- );
892
+ const { stdout } = await execAsync3(`ip link show 2>/dev/null || true`);
1026
893
  const lines = stdout.split("\n");
1027
894
  for (const line of lines) {
1028
- const match = line.match(/^\d+:\s+([a-z0-9]+):/);
895
+ const match = line.match(/^\d+:\s+(vm0[a-z0-9]+):/);
1029
896
  if (match && match[1]) {
1030
897
  tapDevices.add(match[1]);
1031
898
  }
@@ -1056,13 +923,11 @@ function isProcessRunning(pid) {
1056
923
  var IPRegistry = class {
1057
924
  config;
1058
925
  constructor(config = {}) {
1059
- const runDir = config.runDir ?? VM0_RUN_DIR;
926
+ const registryPath = config.registryPath ?? runtimePaths.ipRegistry;
1060
927
  this.config = {
1061
- runDir,
1062
- lockPath: config.lockPath ?? path3.join(runDir, "ip-pool.lock.active"),
1063
- registryPath: config.registryPath ?? path3.join(runDir, "ip-registry.json"),
1064
- ensureRunDir: config.ensureRunDir ?? (() => defaultEnsureRunDir(runDir)),
1065
- scanTapDevices: config.scanTapDevices ?? defaultScanTapDevices,
928
+ registryPath,
929
+ ensureRegistryDir: config.ensureRegistryDir ?? (() => defaultEnsureRegistryDir(registryPath)),
930
+ scanTapDevices: config.scanTapDevices ?? scanTapDevices,
1066
931
  checkTapExists: config.checkTapExists ?? defaultCheckTapExists
1067
932
  };
1068
933
  }
@@ -1071,44 +936,21 @@ var IPRegistry = class {
1071
936
  * Execute a function while holding an exclusive lock on the IP pool
1072
937
  */
1073
938
  async withIPLock(fn) {
1074
- await this.config.ensureRunDir();
1075
- const startTime = Date.now();
1076
- let lockAcquired = false;
1077
- while (Date.now() - startTime < LOCK_TIMEOUT_MS) {
939
+ await this.config.ensureRegistryDir();
940
+ if (!fs3.existsSync(this.config.registryPath)) {
1078
941
  try {
1079
- fs3.writeFileSync(this.config.lockPath, process.pid.toString(), {
1080
- flag: "wx"
1081
- });
1082
- lockAcquired = true;
1083
- break;
1084
- } catch {
1085
- try {
1086
- const pidStr = fs3.readFileSync(this.config.lockPath, "utf-8");
1087
- const pid = parseInt(pidStr, 10);
1088
- if (!isProcessRunning(pid)) {
1089
- fs3.unlinkSync(this.config.lockPath);
1090
- continue;
1091
- }
1092
- } catch {
1093
- }
1094
- await new Promise(
1095
- (resolve) => setTimeout(resolve, LOCK_RETRY_INTERVAL_MS)
942
+ fs3.writeFileSync(
943
+ this.config.registryPath,
944
+ JSON.stringify({ allocations: {} }, null, 2),
945
+ { flag: "wx" }
1096
946
  );
947
+ } catch (err) {
948
+ if (err.code !== "EEXIST") {
949
+ throw err;
950
+ }
1097
951
  }
1098
952
  }
1099
- if (!lockAcquired) {
1100
- throw new Error(
1101
- `Failed to acquire IP pool lock after ${LOCK_TIMEOUT_MS}ms`
1102
- );
1103
- }
1104
- try {
1105
- return await fn();
1106
- } finally {
1107
- try {
1108
- fs3.unlinkSync(this.config.lockPath);
1109
- } catch {
1110
- }
1111
- }
953
+ return withFileLock(this.config.registryPath, fn);
1112
954
  }
1113
955
  // ============ Registry CRUD ============
1114
956
  /**
@@ -1441,22 +1283,8 @@ var TapPool = class {
1441
1283
  * Scan for orphaned TAP devices from previous runs (matching this pool's prefix)
1442
1284
  */
1443
1285
  async scanOrphanedTaps() {
1444
- try {
1445
- const { stdout } = await execAsync4(
1446
- `ip -o link show type tuntap 2>/dev/null || true`
1447
- );
1448
- const orphaned = [];
1449
- const lines = stdout.split("\n");
1450
- for (const line of lines) {
1451
- const match = line.match(/^\d+:\s+([a-z0-9]+):/);
1452
- if (match && match[1] && this.isOwnTap(match[1])) {
1453
- orphaned.push(match[1]);
1454
- }
1455
- }
1456
- return orphaned;
1457
- } catch {
1458
- return [];
1459
- }
1286
+ const allTaps = await scanTapDevices();
1287
+ return Array.from(allTaps).filter((tap) => this.isOwnTap(tap));
1460
1288
  }
1461
1289
  /**
1462
1290
  * Initialize the TAP pool
@@ -1674,20 +1502,20 @@ var logger5 = createLogger("VM");
1674
1502
  var FirecrackerVM = class {
1675
1503
  config;
1676
1504
  process = null;
1677
- client = null;
1678
1505
  networkConfig = null;
1679
1506
  state = "created";
1680
1507
  workDir;
1681
- socketPath;
1682
1508
  vmOverlayPath = null;
1683
1509
  // Set during start()
1684
1510
  vsockPath;
1685
1511
  // Vsock UDS path for host-guest communication
1512
+ configPath;
1513
+ // Firecracker config file path
1686
1514
  constructor(config) {
1687
1515
  this.config = config;
1688
- this.workDir = config.workDir || tempPaths.vmWorkDir(config.vmId);
1689
- this.socketPath = path4.join(this.workDir, "firecracker.sock");
1690
- this.vsockPath = path4.join(this.workDir, "vsock.sock");
1516
+ this.workDir = config.workDir;
1517
+ this.vsockPath = vmPaths.vsock(this.workDir);
1518
+ this.configPath = vmPaths.config(this.workDir);
1691
1519
  }
1692
1520
  /**
1693
1521
  * Get current VM state
@@ -1707,12 +1535,6 @@ var FirecrackerVM = class {
1707
1535
  getNetworkConfig() {
1708
1536
  return this.networkConfig;
1709
1537
  }
1710
- /**
1711
- * Get the socket path for Firecracker API
1712
- */
1713
- getSocketPath() {
1714
- return this.socketPath;
1715
- }
1716
1538
  /**
1717
1539
  * Get the vsock UDS path for host-guest communication
1718
1540
  */
@@ -1721,7 +1543,8 @@ var FirecrackerVM = class {
1721
1543
  }
1722
1544
  /**
1723
1545
  * Start the VM
1724
- * This spawns Firecracker, configures it via API, and boots the VM
1546
+ * Uses Firecracker's static configuration mode (--config-file --no-api)
1547
+ * for faster startup by eliminating API polling and HTTP request overhead.
1725
1548
  */
1726
1549
  async start() {
1727
1550
  if (this.state !== "created") {
@@ -1729,19 +1552,29 @@ var FirecrackerVM = class {
1729
1552
  }
1730
1553
  try {
1731
1554
  fs4.mkdirSync(this.workDir, { recursive: true });
1732
- if (fs4.existsSync(this.socketPath)) {
1733
- fs4.unlinkSync(this.socketPath);
1734
- }
1735
1555
  logger5.log(`[VM ${this.config.vmId}] Acquiring overlay...`);
1736
1556
  this.vmOverlayPath = await acquireOverlay();
1737
1557
  logger5.log(`[VM ${this.config.vmId}] Overlay acquired`);
1738
1558
  logger5.log(`[VM ${this.config.vmId}] Acquiring TAP+IP...`);
1739
1559
  this.networkConfig = await acquireTap(this.config.vmId);
1740
1560
  logger5.log(`[VM ${this.config.vmId}] TAP+IP acquired`);
1561
+ const config = this.buildConfig();
1562
+ fs4.writeFileSync(this.configPath, JSON.stringify(config, null, 2));
1563
+ logger5.log(
1564
+ `[VM ${this.config.vmId}] Configuring: ${this.config.vcpus} vCPUs, ${this.config.memoryMb}MB RAM`
1565
+ );
1566
+ logger5.log(
1567
+ `[VM ${this.config.vmId}] Base rootfs: ${this.config.rootfsPath}`
1568
+ );
1569
+ logger5.log(`[VM ${this.config.vmId}] Overlay: ${this.vmOverlayPath}`);
1570
+ logger5.log(
1571
+ `[VM ${this.config.vmId}] Network: ${this.networkConfig.tapDevice}`
1572
+ );
1573
+ logger5.log(`[VM ${this.config.vmId}] Vsock: ${this.vsockPath}`);
1741
1574
  logger5.log(`[VM ${this.config.vmId}] Starting Firecracker...`);
1742
1575
  this.process = spawn(
1743
1576
  this.config.firecrackerBinary,
1744
- ["--api-sock", this.socketPath],
1577
+ ["--config-file", this.configPath, "--no-api"],
1745
1578
  {
1746
1579
  cwd: this.workDir,
1747
1580
  stdio: ["ignore", "pipe", "pipe"],
@@ -1780,13 +1613,6 @@ var FirecrackerVM = class {
1780
1613
  }
1781
1614
  });
1782
1615
  }
1783
- this.client = new FirecrackerClient(this.socketPath);
1784
- logger5.log(`[VM ${this.config.vmId}] Waiting for API...`);
1785
- await this.client.waitUntilReady(1e4, 100);
1786
- this.state = "configuring";
1787
- await this.configure();
1788
- logger5.log(`[VM ${this.config.vmId}] Booting...`);
1789
- await this.client.start();
1790
1616
  this.state = "running";
1791
1617
  logger5.log(
1792
1618
  `[VM ${this.config.vmId}] Running at ${this.networkConfig.guestIp}`
@@ -1798,59 +1624,83 @@ var FirecrackerVM = class {
1798
1624
  }
1799
1625
  }
1800
1626
  /**
1801
- * Configure the VM via Firecracker API
1802
- */
1803
- async configure() {
1804
- if (!this.client || !this.networkConfig || !this.vmOverlayPath) {
1627
+ * Build Firecracker configuration object
1628
+ *
1629
+ * Creates the JSON configuration for Firecracker's --config-file option.
1630
+ * Boot args:
1631
+ * - console=ttyS0: serial console output
1632
+ * - reboot=k: use keyboard controller for reboot
1633
+ * - panic=1: reboot after 1 second on kernel panic
1634
+ * - pci=off: disable PCI bus (not needed in microVM)
1635
+ * - nomodules: skip module loading (not needed in microVM)
1636
+ * - random.trust_cpu=on: trust CPU RNG, skip entropy wait
1637
+ * - quiet loglevel=0: minimize kernel log output
1638
+ * - nokaslr: disable kernel address space randomization
1639
+ * - audit=0: disable kernel auditing
1640
+ * - numa=off: disable NUMA (single node)
1641
+ * - mitigations=off: disable CPU vulnerability mitigations
1642
+ * - noresume: skip hibernation resume check
1643
+ * - init=/sbin/vm-init: use vm-init (Rust binary) for filesystem setup and vsock-agent
1644
+ * - ip=...: network configuration (guest IP, gateway, netmask)
1645
+ */
1646
+ buildConfig() {
1647
+ if (!this.networkConfig || !this.vmOverlayPath) {
1805
1648
  throw new Error("VM not properly initialized");
1806
1649
  }
1807
- logger5.log(
1808
- `[VM ${this.config.vmId}] Configuring: ${this.config.vcpus} vCPUs, ${this.config.memoryMb}MB RAM`
1809
- );
1810
- await this.client.setMachineConfig({
1811
- vcpu_count: this.config.vcpus,
1812
- mem_size_mib: this.config.memoryMb
1813
- });
1814
1650
  const networkBootArgs = generateNetworkBootArgs(this.networkConfig);
1815
1651
  const bootArgs = `console=ttyS0 reboot=k panic=1 pci=off nomodules random.trust_cpu=on quiet loglevel=0 nokaslr audit=0 numa=off mitigations=off noresume init=/sbin/vm-init ${networkBootArgs}`;
1816
1652
  logger5.log(`[VM ${this.config.vmId}] Boot args: ${bootArgs}`);
1817
- await this.client.setBootSource({
1818
- kernel_image_path: this.config.kernelPath,
1819
- boot_args: bootArgs
1820
- });
1821
- logger5.log(
1822
- `[VM ${this.config.vmId}] Base rootfs: ${this.config.rootfsPath}`
1823
- );
1824
- await this.client.setDrive({
1825
- drive_id: "rootfs",
1826
- path_on_host: this.config.rootfsPath,
1827
- is_root_device: true,
1828
- is_read_only: true
1829
- });
1830
- logger5.log(`[VM ${this.config.vmId}] Overlay: ${this.vmOverlayPath}`);
1831
- await this.client.setDrive({
1832
- drive_id: "overlay",
1833
- path_on_host: this.vmOverlayPath,
1834
- is_root_device: false,
1835
- is_read_only: false
1836
- });
1837
- logger5.log(
1838
- `[VM ${this.config.vmId}] Network: ${this.networkConfig.tapDevice}`
1839
- );
1840
- await this.client.setNetworkInterface({
1841
- iface_id: "eth0",
1842
- guest_mac: this.networkConfig.guestMac,
1843
- host_dev_name: this.networkConfig.tapDevice
1844
- });
1845
- logger5.log(`[VM ${this.config.vmId}] Vsock: ${this.vsockPath}`);
1846
- await this.client.setVsock({
1847
- vsock_id: "vsock0",
1848
- guest_cid: 3,
1849
- uds_path: this.vsockPath
1850
- });
1653
+ return {
1654
+ "boot-source": {
1655
+ kernel_image_path: this.config.kernelPath,
1656
+ boot_args: bootArgs
1657
+ },
1658
+ drives: [
1659
+ // Base drive (squashfs, read-only, shared across VMs)
1660
+ // Mounted as /dev/vda inside the VM
1661
+ {
1662
+ drive_id: "rootfs",
1663
+ path_on_host: this.config.rootfsPath,
1664
+ is_root_device: true,
1665
+ is_read_only: true
1666
+ },
1667
+ // Overlay drive (ext4, read-write, per-VM)
1668
+ // Mounted as /dev/vdb inside the VM
1669
+ // The vm-init script combines these using overlayfs
1670
+ {
1671
+ drive_id: "overlay",
1672
+ path_on_host: this.vmOverlayPath,
1673
+ is_root_device: false,
1674
+ is_read_only: false
1675
+ }
1676
+ ],
1677
+ "machine-config": {
1678
+ vcpu_count: this.config.vcpus,
1679
+ mem_size_mib: this.config.memoryMb
1680
+ },
1681
+ "network-interfaces": [
1682
+ {
1683
+ iface_id: "eth0",
1684
+ guest_mac: this.networkConfig.guestMac,
1685
+ host_dev_name: this.networkConfig.tapDevice
1686
+ }
1687
+ ],
1688
+ // Guest CID 3 is the standard guest identifier (CID 0=hypervisor, 1=local, 2=host)
1689
+ vsock: {
1690
+ guest_cid: 3,
1691
+ uds_path: this.vsockPath
1692
+ }
1693
+ };
1851
1694
  }
1852
1695
  /**
1853
- * Stop the VM gracefully
1696
+ * Stop the VM
1697
+ *
1698
+ * Note: With --no-api mode, we can only force kill the process.
1699
+ * The VM doesn't have an API endpoint for graceful shutdown.
1700
+ *
1701
+ * TODO(#2118): Implement graceful shutdown via vsock command to guest agent.
1702
+ * This would allow the guest to clean up before termination without
1703
+ * adding the startup latency of API mode.
1854
1704
  */
1855
1705
  async stop() {
1856
1706
  if (this.state !== "running") {
@@ -1859,17 +1709,7 @@ var FirecrackerVM = class {
1859
1709
  }
1860
1710
  this.state = "stopping";
1861
1711
  logger5.log(`[VM ${this.config.vmId}] Stopping...`);
1862
- try {
1863
- if (this.client) {
1864
- await this.client.sendCtrlAltDel().catch((error) => {
1865
- logger5.log(
1866
- `[VM ${this.config.vmId}] Graceful shutdown signal failed (VM may already be stopping): ${error instanceof Error ? error.message : error}`
1867
- );
1868
- });
1869
- }
1870
- } finally {
1871
- await this.cleanup();
1872
- }
1712
+ await this.cleanup();
1873
1713
  }
1874
1714
  /**
1875
1715
  * Force kill the VM
@@ -1909,7 +1749,6 @@ var FirecrackerVM = class {
1909
1749
  if (fs4.existsSync(this.workDir)) {
1910
1750
  fs4.rmSync(this.workDir, { recursive: true, force: true });
1911
1751
  }
1912
- this.client = null;
1913
1752
  this.state = "stopped";
1914
1753
  logger5.log(`[VM ${this.config.vmId}] Stopped`);
1915
1754
  }
@@ -1974,8 +1813,8 @@ function encodeExecPayload(command, timeoutMs) {
1974
1813
  cmdBuf.copy(payload, 8);
1975
1814
  return payload;
1976
1815
  }
1977
- function encodeWriteFilePayload(path9, content, sudo) {
1978
- const pathBuf = Buffer.from(path9, "utf-8");
1816
+ function encodeWriteFilePayload(path7, content, sudo) {
1817
+ const pathBuf = Buffer.from(path7, "utf-8");
1979
1818
  if (pathBuf.length > 65535) {
1980
1819
  throw new Error(`Path too long: ${pathBuf.length} bytes (max 65535)`);
1981
1820
  }
@@ -2832,8 +2671,8 @@ function getErrorMap() {
2832
2671
  return overrideErrorMap;
2833
2672
  }
2834
2673
  var makeIssue = (params) => {
2835
- const { data, path: path9, errorMaps, issueData } = params;
2836
- const fullPath = [...path9, ...issueData.path || []];
2674
+ const { data, path: path7, errorMaps, issueData } = params;
2675
+ const fullPath = [...path7, ...issueData.path || []];
2837
2676
  const fullIssue = {
2838
2677
  ...issueData,
2839
2678
  path: fullPath
@@ -2932,11 +2771,11 @@ var errorUtil;
2932
2771
  errorUtil2.toString = (message) => typeof message === "string" ? message : message === null || message === void 0 ? void 0 : message.message;
2933
2772
  })(errorUtil || (errorUtil = {}));
2934
2773
  var ParseInputLazyPath = class {
2935
- constructor(parent, value, path9, key) {
2774
+ constructor(parent, value, path7, key) {
2936
2775
  this._cachedPath = [];
2937
2776
  this.parent = parent;
2938
2777
  this.data = value;
2939
- this._path = path9;
2778
+ this._path = path7;
2940
2779
  this._key = key;
2941
2780
  }
2942
2781
  get path() {
@@ -8175,6 +8014,8 @@ var scheduleResponseSchema = z19.object({
8175
8014
  volumeVersions: z19.record(z19.string(), z19.string()).nullable(),
8176
8015
  enabled: z19.boolean(),
8177
8016
  nextRunAt: z19.string().nullable(),
8017
+ lastRunAt: z19.string().nullable(),
8018
+ retryStartedAt: z19.string().nullable(),
8178
8019
  createdAt: z19.string(),
8179
8020
  updatedAt: z19.string()
8180
8021
  });
@@ -9216,7 +9057,7 @@ function initVMRegistry(registryPath) {
9216
9057
  // src/lib/proxy/proxy-manager.ts
9217
9058
  import { spawn as spawn2 } from "child_process";
9218
9059
  import fs7 from "fs";
9219
- import path5 from "path";
9060
+ import path4 from "path";
9220
9061
 
9221
9062
  // src/lib/proxy/mitm-addon-script.ts
9222
9063
  var RUNNER_MITM_ADDON_SCRIPT = `#!/usr/bin/env python3
@@ -9712,7 +9553,7 @@ var ProxyManager = class {
9712
9553
  process = null;
9713
9554
  isRunning = false;
9714
9555
  constructor(config) {
9715
- const addonPath = path5.join(config.caDir, "mitm_addon.py");
9556
+ const addonPath = path4.join(config.caDir, "mitm_addon.py");
9716
9557
  this.config = {
9717
9558
  ...DEFAULT_PROXY_OPTIONS,
9718
9559
  ...config,
@@ -9739,7 +9580,7 @@ var ProxyManager = class {
9739
9580
  * Ensure the addon script exists at the configured path
9740
9581
  */
9741
9582
  ensureAddonScript() {
9742
- const addonDir = path5.dirname(this.config.addonPath);
9583
+ const addonDir = path4.dirname(this.config.addonPath);
9743
9584
  if (!fs7.existsSync(addonDir)) {
9744
9585
  fs7.mkdirSync(addonDir, { recursive: true });
9745
9586
  }
@@ -9755,7 +9596,7 @@ var ProxyManager = class {
9755
9596
  if (!fs7.existsSync(this.config.caDir)) {
9756
9597
  throw new Error(`Proxy CA directory not found: ${this.config.caDir}`);
9757
9598
  }
9758
- const caCertPath = path5.join(this.config.caDir, "mitmproxy-ca.pem");
9599
+ const caCertPath = path4.join(this.config.caDir, "mitmproxy-ca.pem");
9759
9600
  if (!fs7.existsSync(caCertPath)) {
9760
9601
  throw new Error(`Proxy CA certificate not found: ${caCertPath}`);
9761
9602
  }
@@ -10191,7 +10032,6 @@ async function executeJob(context, config, options = {}) {
10191
10032
  let guestIp = null;
10192
10033
  logger10.log(`Starting job ${context.runId} in VM ${vmId}`);
10193
10034
  try {
10194
- const workspacesDir = path6.join(process.cwd(), "workspaces");
10195
10035
  const vmConfig = {
10196
10036
  vmId,
10197
10037
  vcpus: config.sandbox.vcpu,
@@ -10199,7 +10039,7 @@ async function executeJob(context, config, options = {}) {
10199
10039
  kernelPath: config.firecracker.kernel,
10200
10040
  rootfsPath: config.firecracker.rootfs,
10201
10041
  firecrackerBinary: config.firecracker.binary,
10202
- workDir: path6.join(workspacesDir, `vm0-${vmId}`)
10042
+ workDir: runnerPaths.vmWorkDir(config.base_dir, vmId)
10203
10043
  };
10204
10044
  logger10.log(`Creating VM ${vmId}...`);
10205
10045
  vm = new FirecrackerVM(vmConfig);
@@ -10393,7 +10233,7 @@ function createStatusUpdater(statusFilePath, state) {
10393
10233
  // src/lib/runner/runner-lock.ts
10394
10234
  import { exec as exec5 } from "child_process";
10395
10235
  import fs9 from "fs";
10396
- import path7 from "path";
10236
+ import path5 from "path";
10397
10237
  import { promisify as promisify5 } from "util";
10398
10238
  var execAsync5 = promisify5(exec5);
10399
10239
  var logger12 = createLogger("RunnerLock");
@@ -10423,7 +10263,7 @@ function isProcessRunning2(pid) {
10423
10263
  async function acquireRunnerLock(options = {}) {
10424
10264
  const pidFile = options.pidFile ?? DEFAULT_PID_FILE;
10425
10265
  const skipSudo = options.skipSudo ?? false;
10426
- const runDir = path7.dirname(pidFile);
10266
+ const runDir = path5.dirname(pidFile);
10427
10267
  await ensureRunDir(runDir, skipSudo);
10428
10268
  if (fs9.existsSync(pidFile)) {
10429
10269
  const pidStr = fs9.readFileSync(pidFile, "utf-8").trim();
@@ -10476,7 +10316,7 @@ async function setupEnvironment(options) {
10476
10316
  await initOverlayPool({
10477
10317
  size: config.sandbox.max_concurrent + 2,
10478
10318
  replenishThreshold: config.sandbox.max_concurrent,
10479
- poolDir: dataPaths.overlayPool(config.data_dir)
10319
+ poolDir: runnerPaths.overlayPool(config.base_dir)
10480
10320
  });
10481
10321
  logger13.log("Initializing TAP pool...");
10482
10322
  await initTapPool({
@@ -10796,7 +10636,7 @@ var startCommand = new Command("start").description("Start the runner").option("
10796
10636
  const config = loadConfig(options.config);
10797
10637
  validateFirecrackerPaths(config.firecracker);
10798
10638
  console.log("Config valid");
10799
- const statusFilePath = join(dirname(options.config), "status.json");
10639
+ const statusFilePath = runnerPaths.statusFile(config.base_dir);
10800
10640
  const runner = new Runner(config, statusFilePath);
10801
10641
  await runner.start();
10802
10642
  } catch (error) {
@@ -10812,11 +10652,10 @@ var startCommand = new Command("start").description("Start the runner").option("
10812
10652
  // src/commands/doctor.ts
10813
10653
  import { Command as Command2 } from "commander";
10814
10654
  import { existsSync as existsSync4, readFileSync as readFileSync3, readdirSync as readdirSync2 } from "fs";
10815
- import { dirname as dirname2, join as join2 } from "path";
10816
10655
 
10817
10656
  // src/lib/firecracker/process.ts
10818
10657
  import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync3 } from "fs";
10819
- import path8 from "path";
10658
+ import path6 from "path";
10820
10659
  function parseFirecrackerCmdline(cmdline) {
10821
10660
  const args = cmdline.split("\0");
10822
10661
  if (!args[0]?.includes("firecracker")) return null;
@@ -10849,7 +10688,7 @@ function findFirecrackerProcesses() {
10849
10688
  for (const entry of entries) {
10850
10689
  if (!/^\d+$/.test(entry)) continue;
10851
10690
  const pid = parseInt(entry, 10);
10852
- const cmdlinePath = path8.join(procDir, entry, "cmdline");
10691
+ const cmdlinePath = path6.join(procDir, entry, "cmdline");
10853
10692
  if (!existsSync3(cmdlinePath)) continue;
10854
10693
  try {
10855
10694
  const cmdline = readFileSync2(cmdlinePath, "utf-8");
@@ -10906,7 +10745,7 @@ function findMitmproxyProcess() {
10906
10745
  for (const entry of entries) {
10907
10746
  if (!/^\d+$/.test(entry)) continue;
10908
10747
  const pid = parseInt(entry, 10);
10909
- const cmdlinePath = path8.join(procDir, entry, "cmdline");
10748
+ const cmdlinePath = path6.join(procDir, entry, "cmdline");
10910
10749
  if (!existsSync3(cmdlinePath)) continue;
10911
10750
  try {
10912
10751
  const cmdline = readFileSync2(cmdlinePath, "utf-8");
@@ -10927,9 +10766,8 @@ var doctorCommand = new Command2("doctor").description("Diagnose runner health,
10927
10766
  async (options) => {
10928
10767
  try {
10929
10768
  const config = loadConfig(options.config);
10930
- const configDir = dirname2(options.config);
10931
- const statusFilePath = join2(configDir, "status.json");
10932
- const workspacesDir = join2(configDir, "workspaces");
10769
+ const statusFilePath = runnerPaths.statusFile(config.base_dir);
10770
+ const workspacesDir = runnerPaths.workspacesDir(config.base_dir);
10933
10771
  console.log(`Runner: ${config.name}`);
10934
10772
  let status = null;
10935
10773
  if (existsSync4(statusFilePath)) {
@@ -10995,7 +10833,7 @@ var doctorCommand = new Command2("doctor").description("Diagnose runner health,
10995
10833
  }
10996
10834
  console.log("");
10997
10835
  const processes = findFirecrackerProcesses();
10998
- const workspaces = existsSync4(workspacesDir) ? readdirSync2(workspacesDir).filter((d) => d.startsWith("vm0-")) : [];
10836
+ const workspaces = existsSync4(workspacesDir) ? readdirSync2(workspacesDir).filter(runnerPaths.isVmWorkspace) : [];
10999
10837
  const jobs = [];
11000
10838
  const statusVmIds = /* @__PURE__ */ new Set();
11001
10839
  const allocations = getAllocations();
@@ -11070,7 +10908,7 @@ var doctorCommand = new Command2("doctor").description("Diagnose runner health,
11070
10908
  }
11071
10909
  }
11072
10910
  for (const ws of workspaces) {
11073
- const vmId = ws.replace("vm0-", "");
10911
+ const vmId = runnerPaths.extractVmId(ws);
11074
10912
  if (!processVmIds.has(vmId) && !statusVmIds.has(vmId)) {
11075
10913
  warnings.push({
11076
10914
  message: `Orphan workspace: ${ws} (no matching job or process)`
@@ -11120,21 +10958,18 @@ function formatUptime(ms) {
11120
10958
  // src/commands/kill.ts
11121
10959
  import { Command as Command3 } from "commander";
11122
10960
  import { existsSync as existsSync5, readFileSync as readFileSync4, writeFileSync as writeFileSync3, rmSync } from "fs";
11123
- import { dirname as dirname3, join as join3 } from "path";
11124
10961
  import * as readline2 from "readline";
11125
10962
  var killCommand = new Command3("kill").description("Force terminate a run and clean up all resources").argument("<run-id>", "Run ID (full UUID or short 8-char vmId)").option("--config <path>", "Config file path", "./runner.yaml").option("--force", "Skip confirmation prompt").action(
11126
10963
  // eslint-disable-next-line complexity -- TODO: refactor complex function
11127
10964
  async (runIdArg, options) => {
11128
10965
  try {
11129
- loadConfig(options.config);
11130
- const configDir = dirname3(options.config);
11131
- const statusFilePath = join3(configDir, "status.json");
11132
- const workspacesDir = join3(configDir, "workspaces");
10966
+ const config = loadConfig(options.config);
10967
+ const statusFilePath = runnerPaths.statusFile(config.base_dir);
11133
10968
  const { vmId, runId } = resolveRunId(runIdArg, statusFilePath);
11134
10969
  console.log(`Killing run ${vmId}...`);
11135
10970
  const proc = findProcessByVmId(vmId);
11136
10971
  const guestIp = getIPForVm(vmId);
11137
- const workspaceDir = join3(workspacesDir, `vm0-${vmId}`);
10972
+ const workspaceDir = runnerPaths.vmWorkDir(config.base_dir, vmId);
11138
10973
  console.log("");
11139
10974
  console.log("Resources to clean up:");
11140
10975
  if (proc) {
@@ -11365,7 +11200,7 @@ var benchmarkCommand = new Command4("benchmark").description(
11365
11200
  await initOverlayPool({
11366
11201
  size: 2,
11367
11202
  replenishThreshold: 1,
11368
- poolDir: dataPaths.overlayPool(config.data_dir)
11203
+ poolDir: runnerPaths.overlayPool(config.base_dir)
11369
11204
  });
11370
11205
  await initTapPool({ name: config.name, size: 2, replenishThreshold: 1 });
11371
11206
  poolsInitialized = true;
@@ -11394,7 +11229,7 @@ var benchmarkCommand = new Command4("benchmark").description(
11394
11229
  });
11395
11230
 
11396
11231
  // src/index.ts
11397
- var version = true ? "3.9.1" : "0.1.0";
11232
+ var version = true ? "3.9.3" : "0.1.0";
11398
11233
  program.name("vm0-runner").version(version).description("Self-hosted runner for VM0 agents");
11399
11234
  program.addCommand(startCommand);
11400
11235
  program.addCommand(doctorCommand);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vm0/runner",
3
- "version": "3.9.1",
3
+ "version": "3.9.3",
4
4
  "description": "Self-hosted runner for VM0 agents",
5
5
  "repository": {
6
6
  "type": "git",
@@ -17,6 +17,7 @@
17
17
  "dependencies": {
18
18
  "ably": "^2.17.0",
19
19
  "commander": "^14.0.0",
20
+ "proper-lockfile": "^4.1.2",
20
21
  "yaml": "^2.3.4",
21
22
  "zod": "^4.1.12"
22
23
  }