@vm0/runner 3.11.3 → 3.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +640 -247
  2. package/package.json +1 -1
package/index.js CHANGED
@@ -44,7 +44,7 @@ var runnerPaths = {
44
44
  /** Runner status file */
45
45
  statusFile: (baseDir) => path.join(baseDir, "status.json"),
46
46
  /** Snapshot generation work directory */
47
- snapshotWorkDir: (baseDir) => path.join(baseDir, "workspaces", ".snapshot-work"),
47
+ snapshotWorkDir: (baseDir) => path.join(baseDir, "workspaces", "snapshot"),
48
48
  /** Check if a directory name is a VM workspace */
49
49
  isVmWorkspace: (dirname) => dirname.startsWith(VM_WORKSPACE_PREFIX),
50
50
  /** Extract vmId from workspace directory name */
@@ -62,6 +62,14 @@ var vmPaths = {
62
62
  /** Overlay filesystem for VM writes */
63
63
  overlay: (workDir) => path.join(workDir, "overlay.ext4")
64
64
  };
65
+ var snapshotOutputPaths = {
66
+ /** VM state snapshot */
67
+ snapshot: (outputDir) => path.join(outputDir, "snapshot.bin"),
68
+ /** VM memory snapshot */
69
+ memory: (outputDir) => path.join(outputDir, "memory.bin"),
70
+ /** Golden overlay with guest state */
71
+ overlay: (outputDir) => path.join(outputDir, "overlay.ext4")
72
+ };
65
73
  var tempPaths = {
66
74
  /** Default proxy CA directory */
67
75
  proxyDir: `${VM0_TMP_PREFIX}-proxy`,
@@ -102,7 +110,12 @@ var runnerConfigSchema = z.object({
102
110
  firecracker: z.object({
103
111
  binary: z.string().min(1, "Firecracker binary path is required"),
104
112
  kernel: z.string().min(1, "Kernel path is required"),
105
- rootfs: z.string().min(1, "Rootfs path is required")
113
+ rootfs: z.string().min(1, "Rootfs path is required"),
114
+ snapshot: z.object({
115
+ snapshot: z.string().min(1, "Snapshot state file path is required"),
116
+ memory: z.string().min(1, "Snapshot memory file path is required"),
117
+ overlay: z.string().min(1, "Snapshot overlay file path is required")
118
+ }).optional()
106
119
  }),
107
120
  proxy: z.object({
108
121
  // TODO: Allow 0 to auto-find available port
@@ -131,7 +144,12 @@ var debugConfigSchema = z.object({
131
144
  firecracker: z.object({
132
145
  binary: z.string().min(1, "Firecracker binary path is required"),
133
146
  kernel: z.string().min(1, "Kernel path is required"),
134
- rootfs: z.string().min(1, "Rootfs path is required")
147
+ rootfs: z.string().min(1, "Rootfs path is required"),
148
+ snapshot: z.object({
149
+ snapshot: z.string().min(1, "Snapshot state file path is required"),
150
+ memory: z.string().min(1, "Snapshot memory file path is required"),
151
+ overlay: z.string().min(1, "Snapshot overlay file path is required")
152
+ }).optional()
135
153
  }),
136
154
  proxy: z.object({
137
155
  port: z.number().int().min(1024).max(65535).default(PROXY_DEFAULTS.port),
@@ -172,6 +190,13 @@ function validateFirecrackerPaths(config) {
172
190
  { path: config.kernel, name: "Kernel" },
173
191
  { path: config.rootfs, name: "Rootfs" }
174
192
  ];
193
+ if (config.snapshot) {
194
+ checks.push(
195
+ { path: config.snapshot.snapshot, name: "Snapshot state file" },
196
+ { path: config.snapshot.memory, name: "Snapshot memory file" },
197
+ { path: config.snapshot.overlay, name: "Snapshot overlay file" }
198
+ );
199
+ }
175
200
  for (const check of checks) {
176
201
  if (!fs.existsSync(check.path)) {
177
202
  throw new Error(`${check.name} not found: ${check.path}`);
@@ -337,10 +362,13 @@ async function subscribeToJobs(server, group, onJob, onConnectionChange) {
337
362
 
338
363
  // src/lib/executor.ts
339
364
  import fs9 from "fs";
365
+ import path6 from "path";
340
366
 
341
367
  // src/lib/firecracker/vm.ts
342
368
  import { spawn } from "child_process";
343
369
  import fs4 from "fs";
370
+ import os from "os";
371
+ import path4 from "path";
344
372
  import readline from "readline";
345
373
 
346
374
  // src/lib/firecracker/netns-pool.ts
@@ -379,8 +407,8 @@ var DEFAULT_OPTIONS = {
379
407
  maxTimeout: 1e3
380
408
  }
381
409
  };
382
- async function withFileLock(path7, fn, options) {
383
- const release = await lockfile.lock(path7, { ...DEFAULT_OPTIONS, ...options });
410
+ async function withFileLock(path9, fn, options) {
411
+ const release = await lockfile.lock(path9, { ...DEFAULT_OPTIONS, ...options });
384
412
  try {
385
413
  return await fn();
386
414
  } finally {
@@ -388,6 +416,37 @@ async function withFileLock(path7, fn, options) {
388
416
  }
389
417
  }
390
418
 
419
+ // src/lib/utils/process.ts
420
+ import { execSync } from "child_process";
421
+ function isProcessRunning(pid) {
422
+ try {
423
+ process.kill(pid, 0);
424
+ return true;
425
+ } catch (err) {
426
+ if (err instanceof Error && "code" in err && err.code === "EPERM") {
427
+ return true;
428
+ }
429
+ return false;
430
+ }
431
+ }
432
+ function killProcessTree(pid) {
433
+ try {
434
+ const childPidsStr = execSync(`pgrep -P ${pid} 2>/dev/null || true`, {
435
+ encoding: "utf-8"
436
+ }).trim();
437
+ if (childPidsStr) {
438
+ const childPids = childPidsStr.split("\n").map((p) => parseInt(p, 10));
439
+ for (const childPid of childPids) {
440
+ if (!isNaN(childPid)) {
441
+ killProcessTree(childPid);
442
+ }
443
+ }
444
+ }
445
+ process.kill(pid, "SIGKILL");
446
+ } catch {
447
+ }
448
+ }
449
+
391
450
  // src/lib/utils/exec.ts
392
451
  import { exec } from "child_process";
393
452
  import { promisify } from "util";
@@ -436,6 +495,10 @@ async function createNetnsWithTap(nsName, tap) {
436
495
  await execCommand(`ip netns exec ${nsName} ip link set ${tap.tapName} up`);
437
496
  await execCommand(`ip netns exec ${nsName} ip link set lo up`);
438
497
  }
498
+ async function deleteNetns(nsName) {
499
+ await execCommand(`ip netns del ${nsName}`).catch(() => {
500
+ });
501
+ }
439
502
 
440
503
  // src/lib/firecracker/netns-pool.ts
441
504
  var logger = createLogger("NetnsPool");
@@ -517,14 +580,6 @@ function makeNsName(runnerIdx, nsIdx) {
517
580
  function makeVethName(runnerIdx, nsIdx) {
518
581
  return `${VETH_PREFIX}${runnerIdx}-${nsIdx}`;
519
582
  }
520
- function isPidAlive(pid) {
521
- try {
522
- process.kill(pid, 0);
523
- return true;
524
- } catch {
525
- return false;
526
- }
527
- }
528
583
  async function deleteIptablesRulesByComment(comment) {
529
584
  const deleteFromTable = async (table) => {
530
585
  try {
@@ -593,7 +648,7 @@ var NetnsPool = class _NetnsPool {
593
648
  const data = read();
594
649
  const orphaned = [];
595
650
  for (const [runnerIdx, runner] of Object.entries(data.runners)) {
596
- if (!isPidAlive(runner.pid)) {
651
+ if (!isProcessRunning(runner.pid)) {
597
652
  orphaned.push({
598
653
  runnerIdx,
599
654
  namespaces: Object.entries(runner.namespaces).map(
@@ -630,7 +685,7 @@ var NetnsPool = class _NetnsPool {
630
685
  const data = read();
631
686
  for (const { runnerIdx } of orphanedData) {
632
687
  const runner = data.runners[runnerIdx];
633
- if (runner && !isPidAlive(runner.pid)) {
688
+ if (runner && !isProcessRunning(runner.pid)) {
634
689
  delete data.runners[runnerIdx];
635
690
  }
636
691
  }
@@ -1077,10 +1132,10 @@ import * as http from "http";
1077
1132
  import * as fs3 from "fs";
1078
1133
  var logger3 = createLogger("FirecrackerClient");
1079
1134
  var FirecrackerApiError = class extends Error {
1080
- constructor(statusCode, path7, faultMessage) {
1081
- super(`Firecracker API error ${statusCode} on ${path7}: ${faultMessage}`);
1135
+ constructor(statusCode, path9, faultMessage) {
1136
+ super(`Firecracker API error ${statusCode} on ${path9}: ${faultMessage}`);
1082
1137
  this.statusCode = statusCode;
1083
- this.path = path7;
1138
+ this.path = path9;
1084
1139
  this.faultMessage = faultMessage;
1085
1140
  this.name = "FirecrackerApiError";
1086
1141
  }
@@ -1193,27 +1248,27 @@ var FirecrackerClient = class {
1193
1248
  /**
1194
1249
  * GET request
1195
1250
  */
1196
- async get(path7) {
1197
- return this.request("GET", path7);
1251
+ async get(path9) {
1252
+ return this.request("GET", path9);
1198
1253
  }
1199
1254
  /**
1200
1255
  * PATCH request
1201
1256
  */
1202
- async patch(path7, body) {
1203
- return this.request("PATCH", path7, body);
1257
+ async patch(path9, body) {
1258
+ return this.request("PATCH", path9, body);
1204
1259
  }
1205
1260
  /**
1206
1261
  * PUT request
1207
1262
  */
1208
- async put(path7, body) {
1209
- return this.request("PUT", path7, body);
1263
+ async put(path9, body) {
1264
+ return this.request("PUT", path9, body);
1210
1265
  }
1211
1266
  /**
1212
1267
  * Make an HTTP request to Firecracker API
1213
1268
  *
1214
1269
  * @param timeoutMs Request timeout in milliseconds (default: 30000ms)
1215
1270
  */
1216
- request(method, path7, body, timeoutMs = 3e4) {
1271
+ request(method, path9, body, timeoutMs = 3e4) {
1217
1272
  return new Promise((resolve, reject) => {
1218
1273
  const bodyStr = body !== void 0 ? JSON.stringify(body) : void 0;
1219
1274
  const headers = {
@@ -1227,7 +1282,7 @@ var FirecrackerClient = class {
1227
1282
  }
1228
1283
  const options = {
1229
1284
  socketPath: this.socketPath,
1230
- path: path7,
1285
+ path: path9,
1231
1286
  method,
1232
1287
  headers,
1233
1288
  timeout: timeoutMs,
@@ -1235,7 +1290,7 @@ var FirecrackerClient = class {
1235
1290
  // Firecracker's single-threaded API can have issues with pipelined requests
1236
1291
  agent: false
1237
1292
  };
1238
- logger3.log(`${method} ${path7}${bodyStr ? " " + bodyStr : ""}`);
1293
+ logger3.log(`${method} ${path9}${bodyStr ? " " + bodyStr : ""}`);
1239
1294
  const req = http.request(options, (res) => {
1240
1295
  let data = "";
1241
1296
  res.on("data", (chunk) => {
@@ -1252,14 +1307,14 @@ var FirecrackerClient = class {
1252
1307
  faultMessage = errorBody.fault_message || data;
1253
1308
  } catch {
1254
1309
  }
1255
- reject(new FirecrackerApiError(statusCode, path7, faultMessage));
1310
+ reject(new FirecrackerApiError(statusCode, path9, faultMessage));
1256
1311
  }
1257
1312
  });
1258
1313
  });
1259
1314
  req.on("timeout", () => {
1260
1315
  req.destroy();
1261
1316
  reject(
1262
- new Error(`Request timeout after ${timeoutMs}ms: ${method} ${path7}`)
1317
+ new Error(`Request timeout after ${timeoutMs}ms: ${method} ${path9}`)
1263
1318
  );
1264
1319
  });
1265
1320
  req.on("error", (err) => {
@@ -1352,7 +1407,7 @@ var FirecrackerVM = class {
1352
1407
  this.workDir = config.workDir;
1353
1408
  this.vsockPath = vmPaths.vsock(this.workDir);
1354
1409
  this.configPath = vmPaths.config(this.workDir);
1355
- this.apiSocketPath = `${this.workDir}/api.sock`;
1410
+ this.apiSocketPath = vmPaths.apiSock(this.workDir);
1356
1411
  }
1357
1412
  /**
1358
1413
  * Get current VM state
@@ -1451,6 +1506,7 @@ var FirecrackerVM = class {
1451
1506
  const config = this.buildConfig();
1452
1507
  fs4.writeFileSync(this.configPath, JSON.stringify(config, null, 2));
1453
1508
  logger4.log(`[VM ${this.config.vmId}] Starting Firecracker (fresh boot)...`);
1509
+ const currentUser = os.userInfo().username;
1454
1510
  this.process = spawn(
1455
1511
  "sudo",
1456
1512
  [
@@ -1458,6 +1514,9 @@ var FirecrackerVM = class {
1458
1514
  "netns",
1459
1515
  "exec",
1460
1516
  this.netns.name,
1517
+ "sudo",
1518
+ "-u",
1519
+ currentUser,
1461
1520
  this.config.firecrackerBinary,
1462
1521
  "--config-file",
1463
1522
  this.configPath,
@@ -1475,25 +1534,58 @@ var FirecrackerVM = class {
1475
1534
  * Start VM from snapshot
1476
1535
  * Uses --api-sock to load snapshot via API
1477
1536
  *
1478
- * Drive configuration must be done before loading snapshot
1479
- * because our overlay path differs from the snapshot's original path.
1537
+ * Snapshot contains original absolute paths for drives. We use mount namespace
1538
+ * isolation to bind mount our actual overlay file to the path expected by the snapshot.
1539
+ * This allows concurrent VMs to each have their own overlay while restoring from
1540
+ * the same snapshot.
1480
1541
  */
1481
1542
  async startFromSnapshot(snapshot) {
1482
1543
  logger4.log(
1483
1544
  `[VM ${this.config.vmId}] Starting Firecracker (snapshot restore)...`
1484
1545
  );
1485
- logger4.log(`[VM ${this.config.vmId}] Snapshot: ${snapshot.snapshotPath}`);
1486
- logger4.log(`[VM ${this.config.vmId}] Memory: ${snapshot.memoryPath}`);
1546
+ logger4.log(`[VM ${this.config.vmId}] Snapshot: ${snapshot.snapshot}`);
1547
+ logger4.log(`[VM ${this.config.vmId}] Memory: ${snapshot.memory}`);
1548
+ const actualVsockDir = vmPaths.vsockDir(this.workDir);
1549
+ logger4.log(
1550
+ `[VM ${this.config.vmId}] Snapshot vsock: ${snapshot.snapshotVsockDir}`
1551
+ );
1552
+ logger4.log(
1553
+ `[VM ${this.config.vmId}] Snapshot overlay: ${snapshot.snapshotOverlay}`
1554
+ );
1555
+ logger4.log(`[VM ${this.config.vmId}] Actual vsock: ${actualVsockDir}`);
1556
+ logger4.log(
1557
+ `[VM ${this.config.vmId}] Actual overlay: ${this.vmOverlayPath}`
1558
+ );
1559
+ fs4.mkdirSync(snapshot.snapshotVsockDir, { recursive: true });
1560
+ fs4.mkdirSync(path4.dirname(snapshot.snapshotOverlay), {
1561
+ recursive: true
1562
+ });
1563
+ if (!fs4.existsSync(snapshot.snapshotOverlay)) {
1564
+ fs4.writeFileSync(snapshot.snapshotOverlay, "");
1565
+ }
1566
+ const currentUser = os.userInfo().username;
1567
+ const bindMountVsock = `mount --bind "${actualVsockDir}" "${snapshot.snapshotVsockDir}"`;
1568
+ const bindMountOverlay = `mount --bind "${this.vmOverlayPath}" "${snapshot.snapshotOverlay}"`;
1569
+ const firecrackerCmd = [
1570
+ "ip",
1571
+ "netns",
1572
+ "exec",
1573
+ this.netns.name,
1574
+ "sudo",
1575
+ "-u",
1576
+ currentUser,
1577
+ this.config.firecrackerBinary,
1578
+ "--api-sock",
1579
+ this.apiSocketPath
1580
+ ].join(" ");
1487
1581
  this.process = spawn(
1488
1582
  "sudo",
1489
1583
  [
1490
- "ip",
1491
- "netns",
1492
- "exec",
1493
- this.netns.name,
1494
- this.config.firecrackerBinary,
1495
- "--api-sock",
1496
- this.apiSocketPath
1584
+ "unshare",
1585
+ "--mount",
1586
+ "bash",
1587
+ "-c",
1588
+ `${bindMountVsock} && ${bindMountOverlay} && ${firecrackerCmd}`
1497
1589
  ],
1498
1590
  {
1499
1591
  cwd: this.workDir,
@@ -1504,26 +1596,11 @@ var FirecrackerVM = class {
1504
1596
  this.setupProcessHandlers();
1505
1597
  const client = new FirecrackerClient(this.apiSocketPath);
1506
1598
  await this.waitForApiReady(client);
1507
- logger4.log(`[VM ${this.config.vmId}] Configuring drives...`);
1508
- await Promise.all([
1509
- client.configureDrive({
1510
- drive_id: "rootfs",
1511
- path_on_host: this.config.rootfsPath,
1512
- is_root_device: true,
1513
- is_read_only: true
1514
- }),
1515
- client.configureDrive({
1516
- drive_id: "overlay",
1517
- path_on_host: this.vmOverlayPath,
1518
- is_root_device: false,
1519
- is_read_only: false
1520
- })
1521
- ]);
1522
1599
  logger4.log(`[VM ${this.config.vmId}] Loading snapshot...`);
1523
1600
  await client.loadSnapshot({
1524
- snapshot_path: snapshot.snapshotPath,
1601
+ snapshot_path: snapshot.snapshot,
1525
1602
  mem_backend: {
1526
- backend_path: snapshot.memoryPath,
1603
+ backend_path: snapshot.memory,
1527
1604
  backend_type: "File"
1528
1605
  },
1529
1606
  resume_vm: true
@@ -1617,8 +1694,8 @@ var FirecrackerVM = class {
1617
1694
  * since we want to clean up as much as possible even if some parts fail.
1618
1695
  */
1619
1696
  async cleanup() {
1620
- if (this.process && !this.process.killed) {
1621
- this.process.kill("SIGKILL");
1697
+ if (this.process && !this.process.killed && this.process.pid) {
1698
+ killProcessTree(this.process.pid);
1622
1699
  this.process = null;
1623
1700
  }
1624
1701
  if (this.netns) {
@@ -1756,8 +1833,8 @@ function encodeExecPayload(command, timeoutMs) {
1756
1833
  cmdBuf.copy(payload, 8);
1757
1834
  return payload;
1758
1835
  }
1759
- function encodeWriteFilePayload(path7, content, sudo) {
1760
- const pathBuf = Buffer.from(path7, "utf-8");
1836
+ function encodeWriteFilePayload(path9, content, sudo) {
1837
+ const pathBuf = Buffer.from(path9, "utf-8");
1761
1838
  if (pathBuf.length > 65535) {
1762
1839
  throw new Error(`Path too long: ${pathBuf.length} bytes (max 65535)`);
1763
1840
  }
@@ -2652,8 +2729,8 @@ function getErrorMap() {
2652
2729
  return overrideErrorMap;
2653
2730
  }
2654
2731
  var makeIssue = (params) => {
2655
- const { data, path: path7, errorMaps, issueData } = params;
2656
- const fullPath = [...path7, ...issueData.path || []];
2732
+ const { data, path: path9, errorMaps, issueData } = params;
2733
+ const fullPath = [...path9, ...issueData.path || []];
2657
2734
  const fullIssue = {
2658
2735
  ...issueData,
2659
2736
  path: fullPath
@@ -2752,11 +2829,11 @@ var errorUtil;
2752
2829
  errorUtil2.toString = (message) => typeof message === "string" ? message : message === null || message === void 0 ? void 0 : message.message;
2753
2830
  })(errorUtil || (errorUtil = {}));
2754
2831
  var ParseInputLazyPath = class {
2755
- constructor(parent, value, path7, key) {
2832
+ constructor(parent, value, path9, key) {
2756
2833
  this._cachedPath = [];
2757
2834
  this.parent = parent;
2758
2835
  this.data = value;
2759
- this._path = path7;
2836
+ this._path = path9;
2760
2837
  this._key = key;
2761
2838
  }
2762
2839
  get path() {
@@ -7830,6 +7907,7 @@ var modelProviderTypeSchema = z19.enum([
7830
7907
  "minimax-api-key",
7831
7908
  "deepseek-api-key",
7832
7909
  "zai-api-key",
7910
+ "azure-foundry",
7833
7911
  "aws-bedrock"
7834
7912
  ]);
7835
7913
  var modelProviderFrameworkSchema = z19.enum(["claude-code", "codex"]);
@@ -9220,7 +9298,7 @@ function initVMRegistry(registryPath) {
9220
9298
  // src/lib/proxy/proxy-manager.ts
9221
9299
  import { spawn as spawn2 } from "child_process";
9222
9300
  import fs7 from "fs";
9223
- import path4 from "path";
9301
+ import path5 from "path";
9224
9302
 
9225
9303
  // src/lib/proxy/mitm-addon-script.ts
9226
9304
  var RUNNER_MITM_ADDON_SCRIPT = `#!/usr/bin/env python3
@@ -9716,7 +9794,7 @@ var ProxyManager = class {
9716
9794
  process = null;
9717
9795
  isRunning = false;
9718
9796
  constructor(config) {
9719
- const addonPath = path4.join(config.caDir, "mitm_addon.py");
9797
+ const addonPath = path5.join(config.caDir, "mitm_addon.py");
9720
9798
  this.config = {
9721
9799
  ...DEFAULT_PROXY_OPTIONS,
9722
9800
  ...config,
@@ -9743,7 +9821,7 @@ var ProxyManager = class {
9743
9821
  * Ensure the addon script exists at the configured path
9744
9822
  */
9745
9823
  ensureAddonScript() {
9746
- const addonDir = path4.dirname(this.config.addonPath);
9824
+ const addonDir = path5.dirname(this.config.addonPath);
9747
9825
  if (!fs7.existsSync(addonDir)) {
9748
9826
  fs7.mkdirSync(addonDir, { recursive: true });
9749
9827
  }
@@ -9759,7 +9837,7 @@ var ProxyManager = class {
9759
9837
  if (!fs7.existsSync(this.config.caDir)) {
9760
9838
  throw new Error(`Proxy CA directory not found: ${this.config.caDir}`);
9761
9839
  }
9762
- const caCertPath = path4.join(this.config.caDir, "mitmproxy-ca.pem");
9840
+ const caCertPath = path5.join(this.config.caDir, "mitmproxy-ca.pem");
9763
9841
  if (!fs7.existsSync(caCertPath)) {
9764
9842
  throw new Error(`Proxy CA certificate not found: ${caCertPath}`);
9765
9843
  }
@@ -10212,7 +10290,21 @@ async function executeJob(context, config, options = {}) {
10212
10290
  const guestConnectionPromise = guest.waitForGuestConnection(3e4);
10213
10291
  logger9.log(`Creating VM ${vmId}...`);
10214
10292
  vm = new FirecrackerVM(vmConfig);
10215
- await withSandboxTiming("vm_create", () => vm.start());
10293
+ const snapshotConfig = config.firecracker.snapshot;
10294
+ let snapshotPaths;
10295
+ if (snapshotConfig) {
10296
+ const snapshotDir = path6.dirname(snapshotConfig.snapshot);
10297
+ const originalBaseDir = path6.dirname(snapshotDir);
10298
+ const snapshotBaseDir = runnerPaths.snapshotBaseDir(originalBaseDir);
10299
+ const snapshotWorkDir = runnerPaths.snapshotWorkDir(snapshotBaseDir);
10300
+ snapshotPaths = {
10301
+ snapshot: snapshotConfig.snapshot,
10302
+ memory: snapshotConfig.memory,
10303
+ snapshotOverlay: vmPaths.overlay(snapshotWorkDir),
10304
+ snapshotVsockDir: vmPaths.vsockDir(snapshotWorkDir)
10305
+ };
10306
+ }
10307
+ await withSandboxTiming("vm_create", () => vm.start(snapshotPaths));
10216
10308
  guestIp = vm.getGuestIp();
10217
10309
  vethNsIp = vm.getNetns()?.vethNsIp ?? null;
10218
10310
  if (!guestIp || !vethNsIp) {
@@ -10240,6 +10332,10 @@ async function executeJob(context, config, options = {}) {
10240
10332
  logger9.log(`Waiting for guest connection...`);
10241
10333
  await withSandboxTiming("guest_wait", () => guestConnectionPromise);
10242
10334
  logger9.log(`Guest client ready`);
10335
+ if (config.firecracker.snapshot) {
10336
+ const timestamp = (Date.now() / 1e3).toFixed(3);
10337
+ await guest.exec(`date -s "@${timestamp}"`);
10338
+ }
10243
10339
  if (context.storageManifest) {
10244
10340
  await withSandboxTiming(
10245
10341
  "storage_download",
@@ -10400,12 +10496,12 @@ function createStatusUpdater(statusFilePath, state) {
10400
10496
  }
10401
10497
 
10402
10498
  // src/lib/firecracker/network.ts
10403
- import { execSync, exec as exec3 } from "child_process";
10499
+ import { execSync as execSync2, exec as exec3 } from "child_process";
10404
10500
  import { promisify as promisify3 } from "util";
10405
10501
  var execAsync3 = promisify3(exec3);
10406
10502
  function commandExists(cmd) {
10407
10503
  try {
10408
- execSync(`which ${cmd}`, { stdio: "ignore" });
10504
+ execSync2(`which ${cmd}`, { stdio: "ignore" });
10409
10505
  return true;
10410
10506
  } catch {
10411
10507
  return false;
@@ -10420,7 +10516,7 @@ function checkNetworkPrerequisites() {
10420
10516
  }
10421
10517
  }
10422
10518
  try {
10423
- execSync("sudo -n true 2>/dev/null", { stdio: "ignore" });
10519
+ execSync2("sudo -n true 2>/dev/null", { stdio: "ignore" });
10424
10520
  } catch {
10425
10521
  errors.push(
10426
10522
  "Root/sudo access required for network configuration. Please run with sudo or configure sudoers."
@@ -10442,24 +10538,13 @@ async function isPortInUse(port) {
10442
10538
 
10443
10539
  // src/lib/runner/runner-lock.ts
10444
10540
  import fs10 from "fs";
10445
- import path5 from "path";
10541
+ import path7 from "path";
10446
10542
  var logger11 = createLogger("RunnerLock");
10447
10543
  var DEFAULT_PID_FILE = runtimePaths.runnerPid;
10448
10544
  var currentPidFile = null;
10449
- function isProcessRunning(pid) {
10450
- try {
10451
- process.kill(pid, 0);
10452
- return true;
10453
- } catch (err) {
10454
- if (err instanceof Error && "code" in err && err.code === "EPERM") {
10455
- return true;
10456
- }
10457
- return false;
10458
- }
10459
- }
10460
10545
  function acquireRunnerLock(options = {}) {
10461
10546
  const pidFile = options.pidFile ?? DEFAULT_PID_FILE;
10462
- const runDir = path5.dirname(pidFile);
10547
+ const runDir = path7.dirname(pidFile);
10463
10548
  fs10.mkdirSync(runDir, { recursive: true });
10464
10549
  if (fs10.existsSync(pidFile)) {
10465
10550
  const pidStr = fs10.readFileSync(pidFile, "utf-8").trim();
@@ -10493,7 +10578,7 @@ function releaseRunnerLock() {
10493
10578
  var logger12 = createLogger("Runner");
10494
10579
  async function setupEnvironment(options) {
10495
10580
  const { config } = options;
10496
- await acquireRunnerLock();
10581
+ acquireRunnerLock();
10497
10582
  const networkCheck = checkNetworkPrerequisites();
10498
10583
  if (!networkCheck.ok) {
10499
10584
  logger12.error("Network prerequisites not met:");
@@ -10523,10 +10608,16 @@ async function setupEnvironment(options) {
10523
10608
  );
10524
10609
  }
10525
10610
  logger12.log("Initializing overlay pool...");
10611
+ const snapshotConfig = config.firecracker.snapshot;
10526
10612
  await initOverlayPool({
10527
10613
  size: config.sandbox.max_concurrent + 2,
10528
10614
  replenishThreshold: config.sandbox.max_concurrent,
10529
- poolDir: runnerPaths.overlayPool(config.base_dir)
10615
+ poolDir: runnerPaths.overlayPool(config.base_dir),
10616
+ createFile: snapshotConfig ? (filePath) => execCommand(
10617
+ `cp --sparse=always "${snapshotConfig.overlay}" "${filePath}"`,
10618
+ false
10619
+ ).then(() => {
10620
+ }) : void 0
10530
10621
  });
10531
10622
  logger12.log("Initializing namespace pool...");
10532
10623
  await initNetnsPool({
@@ -10831,19 +10922,29 @@ var startCommand = new Command("start").description("Start the runner").option("
10831
10922
  // src/commands/doctor.ts
10832
10923
  import { Command as Command2 } from "commander";
10833
10924
  import { existsSync as existsSync5, readFileSync as readFileSync3, readdirSync as readdirSync2 } from "fs";
10925
+ import { execSync as execSync3 } from "child_process";
10834
10926
 
10835
10927
  // src/lib/firecracker/process.ts
10836
10928
  import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync4 } from "fs";
10837
- import path6 from "path";
10929
+ import path8 from "path";
10838
10930
  function parseFirecrackerCmdline(cmdline) {
10839
10931
  const args = cmdline.split("\0");
10840
10932
  if (!args[0]?.includes("firecracker")) return null;
10933
+ let filePath;
10841
10934
  const sockIdx = args.indexOf("--api-sock");
10842
- const socketPath = args[sockIdx + 1];
10843
- if (sockIdx === -1 || !socketPath) return null;
10844
- const match = socketPath.match(/vm0-([a-f0-9]+)\/firecracker\.sock$/);
10935
+ if (sockIdx !== -1) {
10936
+ filePath = args[sockIdx + 1];
10937
+ }
10938
+ if (!filePath) {
10939
+ const configIdx = args.indexOf("--config-file");
10940
+ if (configIdx !== -1) {
10941
+ filePath = args[configIdx + 1];
10942
+ }
10943
+ }
10944
+ if (!filePath) return null;
10945
+ const match = filePath.match(/vm0-([a-f0-9]+)\//);
10845
10946
  if (!match?.[1]) return null;
10846
- return { vmId: createVmId(match[1]), socketPath };
10947
+ return createVmId(match[1]);
10847
10948
  }
10848
10949
  function parseMitmproxyCmdline(cmdline) {
10849
10950
  if (!cmdline.includes("mitmproxy") && !cmdline.includes("mitmdump")) {
@@ -10867,13 +10968,13 @@ function findFirecrackerProcesses() {
10867
10968
  for (const entry of entries) {
10868
10969
  if (!/^\d+$/.test(entry)) continue;
10869
10970
  const pid = parseInt(entry, 10);
10870
- const cmdlinePath = path6.join(procDir, entry, "cmdline");
10971
+ const cmdlinePath = path8.join(procDir, entry, "cmdline");
10871
10972
  if (!existsSync4(cmdlinePath)) continue;
10872
10973
  try {
10873
10974
  const cmdline = readFileSync2(cmdlinePath, "utf-8");
10874
- const parsed = parseFirecrackerCmdline(cmdline);
10875
- if (parsed) {
10876
- processes.push({ pid, ...parsed });
10975
+ const vmId = parseFirecrackerCmdline(cmdline);
10976
+ if (vmId) {
10977
+ processes.push({ pid, vmId });
10877
10978
  }
10878
10979
  } catch {
10879
10980
  continue;
@@ -10886,33 +10987,25 @@ function findProcessByVmId(vmId) {
10886
10987
  const vmIdStr = vmIdValue(vmId);
10887
10988
  return processes.find((p) => vmIdValue(p.vmId) === vmIdStr) || null;
10888
10989
  }
10889
- function isProcessRunning2(pid) {
10890
- try {
10891
- process.kill(pid, 0);
10892
- return true;
10893
- } catch {
10894
- return false;
10895
- }
10896
- }
10897
10990
  async function killProcess(pid, timeoutMs = 5e3) {
10898
- if (!isProcessRunning2(pid)) return true;
10991
+ if (!isProcessRunning(pid)) return true;
10899
10992
  try {
10900
10993
  process.kill(pid, "SIGTERM");
10901
10994
  } catch {
10902
- return !isProcessRunning2(pid);
10995
+ return !isProcessRunning(pid);
10903
10996
  }
10904
10997
  const startTime = Date.now();
10905
10998
  while (Date.now() - startTime < timeoutMs) {
10906
- if (!isProcessRunning2(pid)) return true;
10999
+ if (!isProcessRunning(pid)) return true;
10907
11000
  await new Promise((resolve) => setTimeout(resolve, 100));
10908
11001
  }
10909
- if (isProcessRunning2(pid)) {
11002
+ if (isProcessRunning(pid)) {
10910
11003
  try {
10911
11004
  process.kill(pid, "SIGKILL");
10912
11005
  } catch {
10913
11006
  }
10914
11007
  }
10915
- return !isProcessRunning2(pid);
11008
+ return !isProcessRunning(pid);
10916
11009
  }
10917
11010
  function findMitmproxyProcess() {
10918
11011
  const procDir = "/proc";
@@ -10925,7 +11018,7 @@ function findMitmproxyProcess() {
10925
11018
  for (const entry of entries) {
10926
11019
  if (!/^\d+$/.test(entry)) continue;
10927
11020
  const pid = parseInt(entry, 10);
10928
- const cmdlinePath = path6.join(procDir, entry, "cmdline");
11021
+ const cmdlinePath = path8.join(procDir, entry, "cmdline");
10929
11022
  if (!existsSync4(cmdlinePath)) continue;
10930
11023
  try {
10931
11024
  const cmdline = readFileSync2(cmdlinePath, "utf-8");
@@ -10940,143 +11033,196 @@ function findMitmproxyProcess() {
10940
11033
  return null;
10941
11034
  }
10942
11035
 
11036
+ // src/lib/runner/types.ts
11037
+ import { z as z30 } from "zod";
11038
+ var RunnerModeSchema = z30.enum(["running", "draining", "stopping", "stopped"]);
11039
+ var RunnerStatusSchema = z30.object({
11040
+ mode: RunnerModeSchema,
11041
+ active_runs: z30.number(),
11042
+ active_run_ids: z30.array(z30.string()),
11043
+ started_at: z30.string(),
11044
+ updated_at: z30.string()
11045
+ });
11046
+
10943
11047
  // src/commands/doctor.ts
10944
- var doctorCommand = new Command2("doctor").description("Diagnose runner health, check network, and detect issues").option("--config <path>", "Config file path", "./runner.yaml").action(
10945
- // eslint-disable-next-line complexity -- TODO: refactor complex function
10946
- async (options) => {
10947
- try {
10948
- const config = loadConfig(options.config);
10949
- const statusFilePath = runnerPaths.statusFile(config.base_dir);
10950
- const workspacesDir = runnerPaths.workspacesDir(config.base_dir);
10951
- console.log(`Runner: ${config.name}`);
10952
- let status = null;
10953
- if (existsSync5(statusFilePath)) {
10954
- try {
10955
- status = JSON.parse(
10956
- readFileSync3(statusFilePath, "utf-8")
10957
- );
10958
- console.log(`Mode: ${status.mode}`);
10959
- if (status.started_at) {
10960
- const started = new Date(status.started_at);
10961
- const uptime = formatUptime(Date.now() - started.getTime());
10962
- console.log(
10963
- `Started: ${started.toLocaleString()} (uptime: ${uptime})`
10964
- );
10965
- }
10966
- } catch {
10967
- console.log("Mode: unknown (status.json unreadable)");
10968
- }
10969
- } else {
10970
- console.log("Mode: unknown (no status.json)");
10971
- }
10972
- console.log("");
10973
- console.log("API Connectivity:");
10974
- try {
10975
- await pollForJob(config.server, config.group);
10976
- console.log(` \u2713 Connected to ${config.server.url}`);
10977
- console.log(" \u2713 Authentication: OK");
10978
- } catch (error) {
10979
- console.log(` \u2717 Cannot connect to ${config.server.url}`);
10980
- console.log(
10981
- ` Error: ${error instanceof Error ? error.message : "Unknown error"}`
10982
- );
10983
- }
10984
- console.log("");
10985
- console.log("Network:");
10986
- const warnings = [];
10987
- const proxyPort = config.proxy.port;
10988
- const mitmProc = findMitmproxyProcess();
10989
- const portInUse = await isPortInUse(proxyPort);
10990
- if (mitmProc) {
10991
- console.log(
10992
- ` \u2713 Proxy mitmproxy (PID ${mitmProc.pid}) on :${proxyPort}`
10993
- );
10994
- } else if (portInUse) {
10995
- console.log(
10996
- ` \u26A0\uFE0F Proxy port :${proxyPort} in use but mitmproxy process not found`
10997
- );
10998
- warnings.push({
10999
- message: `Port ${proxyPort} is in use but mitmproxy process not detected`
11000
- });
11001
- } else {
11002
- console.log(` \u2717 Proxy mitmproxy not running`);
11003
- warnings.push({ message: "Proxy mitmproxy is not running" });
11004
- }
11005
- console.log(
11006
- ` \u2139 Namespaces: each VM runs in isolated namespace with IP ${SNAPSHOT_NETWORK.guestIp}`
11048
+ function displayRunnerStatus(statusFilePath, warnings) {
11049
+ if (!existsSync5(statusFilePath)) {
11050
+ console.log("Mode: unknown (no status.json)");
11051
+ return null;
11052
+ }
11053
+ try {
11054
+ const status = RunnerStatusSchema.parse(
11055
+ JSON.parse(readFileSync3(statusFilePath, "utf-8"))
11056
+ );
11057
+ console.log(`Mode: ${status.mode}`);
11058
+ if (status.started_at) {
11059
+ const started = new Date(status.started_at);
11060
+ const uptime = formatUptime(Date.now() - started.getTime());
11061
+ console.log(`Started: ${started.toLocaleString()} (uptime: ${uptime})`);
11062
+ }
11063
+ return status;
11064
+ } catch {
11065
+ console.log("Mode: unknown (status.json unreadable)");
11066
+ warnings.push({ message: "status.json exists but cannot be parsed" });
11067
+ return null;
11068
+ }
11069
+ }
11070
+ async function checkApiConnectivity(config, warnings) {
11071
+ console.log("API Connectivity:");
11072
+ try {
11073
+ await pollForJob(config.server, config.group);
11074
+ console.log(` \u2713 Connected to ${config.server.url}`);
11075
+ console.log(" \u2713 Authentication: OK");
11076
+ } catch (error) {
11077
+ console.log(` \u2717 Cannot connect to ${config.server.url}`);
11078
+ console.log(
11079
+ ` Error: ${error instanceof Error ? error.message : "Unknown error"}`
11080
+ );
11081
+ warnings.push({
11082
+ message: `Cannot connect to API: ${error instanceof Error ? error.message : "Unknown error"}`
11083
+ });
11084
+ }
11085
+ }
11086
+ async function checkNetwork(config, warnings) {
11087
+ console.log("Network:");
11088
+ const proxyPort = config.proxy.port;
11089
+ const mitmProc = findMitmproxyProcess();
11090
+ const portInUse = await isPortInUse(proxyPort);
11091
+ if (mitmProc) {
11092
+ console.log(` \u2713 Proxy mitmproxy (PID ${mitmProc.pid}) on :${proxyPort}`);
11093
+ } else if (portInUse) {
11094
+ console.log(
11095
+ ` \u26A0\uFE0F Proxy port :${proxyPort} in use but mitmproxy process not found`
11096
+ );
11097
+ warnings.push({
11098
+ message: `Port ${proxyPort} is in use but mitmproxy process not detected`
11099
+ });
11100
+ } else {
11101
+ console.log(` \u2717 Proxy mitmproxy not running`);
11102
+ warnings.push({ message: "Proxy mitmproxy is not running" });
11103
+ }
11104
+ console.log(
11105
+ ` \u2139 Namespaces: each VM runs in isolated namespace with IP ${SNAPSHOT_NETWORK.guestIp}`
11106
+ );
11107
+ }
11108
+ function buildJobInfo(status, processes) {
11109
+ const jobs = [];
11110
+ const statusVmIds = /* @__PURE__ */ new Set();
11111
+ if (status?.active_run_ids) {
11112
+ for (const runId of status.active_run_ids) {
11113
+ const vmId = createVmId(runId);
11114
+ statusVmIds.add(vmId);
11115
+ const proc = processes.find((p) => p.vmId === vmId);
11116
+ jobs.push({
11117
+ runId,
11118
+ vmId,
11119
+ firecrackerPid: proc?.pid
11120
+ });
11121
+ }
11122
+ }
11123
+ return { jobs, statusVmIds };
11124
+ }
11125
+ function displayRuns(jobs, maxConcurrent) {
11126
+ console.log(`Runs (${jobs.length} active, max ${maxConcurrent}):`);
11127
+ if (jobs.length === 0) {
11128
+ console.log(" No active runs");
11129
+ return;
11130
+ }
11131
+ console.log(" Run ID VM ID Status");
11132
+ for (const job of jobs) {
11133
+ const statusText = job.firecrackerPid ? `\u2713 Running (PID ${job.firecrackerPid})` : "\u26A0\uFE0F No process";
11134
+ console.log(` ${job.runId} ${job.vmId} ${statusText}`);
11135
+ }
11136
+ }
11137
+ async function findOrphanNetworkNamespaces(warnings) {
11138
+ let allNamespaces = [];
11139
+ try {
11140
+ const output = execSync3("ip netns list 2>/dev/null || true", {
11141
+ encoding: "utf-8"
11142
+ });
11143
+ allNamespaces = output.split("\n").map((line) => line.split(" ")[0] ?? "").filter((ns) => ns.startsWith(NS_PREFIX));
11144
+ } catch (err) {
11145
+ warnings.push({
11146
+ message: `Failed to list network namespaces: ${err instanceof Error ? err.message : "Unknown error"}`
11147
+ });
11148
+ return [];
11149
+ }
11150
+ if (allNamespaces.length === 0) {
11151
+ return [];
11152
+ }
11153
+ const registryPath = runtimePaths.netnsRegistry;
11154
+ if (!existsSync5(registryPath)) {
11155
+ return allNamespaces;
11156
+ }
11157
+ try {
11158
+ return await withFileLock(registryPath, async () => {
11159
+ const registry = RegistrySchema.parse(
11160
+ JSON.parse(readFileSync3(registryPath, "utf-8"))
11007
11161
  );
11008
- console.log("");
11009
- const processes = findFirecrackerProcesses();
11010
- const workspaces = existsSync5(workspacesDir) ? readdirSync2(workspacesDir).filter(runnerPaths.isVmWorkspace) : [];
11011
- const jobs = [];
11012
- const statusVmIds = /* @__PURE__ */ new Set();
11013
- if (status?.active_run_ids) {
11014
- for (const runId of status.active_run_ids) {
11015
- const vmId = createVmId(runId);
11016
- statusVmIds.add(vmId);
11017
- const proc = processes.find((p) => p.vmId === vmId);
11018
- jobs.push({
11019
- runId,
11020
- vmId,
11021
- hasProcess: !!proc,
11022
- pid: proc?.pid
11023
- });
11024
- }
11025
- }
11026
- const maxConcurrent = config.sandbox.max_concurrent;
11027
- console.log(`Runs (${jobs.length} active, max ${maxConcurrent}):`);
11028
- if (jobs.length === 0) {
11029
- console.log(" No active runs");
11030
- } else {
11031
- console.log(
11032
- " Run ID VM ID Status"
11033
- );
11034
- for (const job of jobs) {
11035
- const statusText = job.hasProcess ? `\u2713 Running (PID ${job.pid})` : "\u26A0\uFE0F No process";
11036
- console.log(` ${job.runId} ${job.vmId} ${statusText}`);
11037
- }
11038
- }
11039
- console.log("");
11040
- for (const job of jobs) {
11041
- if (!job.hasProcess) {
11042
- warnings.push({
11043
- message: `Run ${job.vmId} in status.json but no Firecracker process running`
11044
- });
11045
- }
11046
- }
11047
- const processVmIds = new Set(processes.map((p) => p.vmId));
11048
- for (const proc of processes) {
11049
- if (!statusVmIds.has(proc.vmId)) {
11050
- warnings.push({
11051
- message: `Orphan process: PID ${proc.pid} (vmId ${proc.vmId}) not in status.json`
11052
- });
11053
- }
11054
- }
11055
- for (const ws of workspaces) {
11056
- const vmId = runnerPaths.extractVmId(ws);
11057
- if (!processVmIds.has(vmId) && !statusVmIds.has(vmId)) {
11058
- warnings.push({
11059
- message: `Orphan workspace: ${ws} (no matching job or process)`
11060
- });
11162
+ const aliveNamespaces = /* @__PURE__ */ new Set();
11163
+ for (const [runnerIdx, runner] of Object.entries(registry.runners)) {
11164
+ if (isProcessRunning(runner.pid)) {
11165
+ for (const nsIdx of Object.keys(runner.namespaces)) {
11166
+ aliveNamespaces.add(`${NS_PREFIX}${runnerIdx}-${nsIdx}`);
11167
+ }
11061
11168
  }
11062
11169
  }
11063
- console.log("Warnings:");
11064
- if (warnings.length === 0) {
11065
- console.log(" None");
11066
- } else {
11067
- for (const w of warnings) {
11068
- console.log(` - ${w.message}`);
11170
+ const orphans = [];
11171
+ for (const ns of allNamespaces) {
11172
+ if (!aliveNamespaces.has(ns)) {
11173
+ orphans.push(ns);
11069
11174
  }
11070
11175
  }
11071
- process.exit(warnings.length > 0 ? 1 : 0);
11072
- } catch (error) {
11073
- console.error(
11074
- `Error: ${error instanceof Error ? error.message : "Unknown error"}`
11075
- );
11076
- process.exit(1);
11176
+ return orphans;
11177
+ });
11178
+ } catch (err) {
11179
+ warnings.push({
11180
+ message: `Failed to read netns registry: ${err instanceof Error ? err.message : "Unknown error"}`
11181
+ });
11182
+ return [];
11183
+ }
11184
+ }
11185
+ async function detectOrphanResources(jobs, processes, workspaces, statusVmIds, warnings) {
11186
+ for (const job of jobs) {
11187
+ if (!job.firecrackerPid) {
11188
+ warnings.push({
11189
+ message: `Run ${job.vmId} in status.json but no Firecracker process running`
11190
+ });
11077
11191
  }
11078
11192
  }
11079
- );
11193
+ const processVmIds = new Set(processes.map((p) => p.vmId));
11194
+ for (const proc of processes) {
11195
+ if (!statusVmIds.has(proc.vmId)) {
11196
+ warnings.push({
11197
+ message: `Orphan process: PID ${proc.pid} (vmId ${proc.vmId}) not in status.json`
11198
+ });
11199
+ }
11200
+ }
11201
+ const orphanNetns = await findOrphanNetworkNamespaces(warnings);
11202
+ for (const ns of orphanNetns) {
11203
+ warnings.push({
11204
+ message: `Orphan network namespace: ${ns} (runner process not running)`
11205
+ });
11206
+ }
11207
+ for (const ws of workspaces) {
11208
+ const vmId = runnerPaths.extractVmId(ws);
11209
+ if (!processVmIds.has(vmId) && !statusVmIds.has(vmId)) {
11210
+ warnings.push({
11211
+ message: `Orphan workspace: ${ws} (no matching job or process)`
11212
+ });
11213
+ }
11214
+ }
11215
+ }
11216
+ function displayWarnings(warnings) {
11217
+ console.log("Warnings:");
11218
+ if (warnings.length === 0) {
11219
+ console.log(" None");
11220
+ } else {
11221
+ for (const w of warnings) {
11222
+ console.log(` - ${w.message}`);
11223
+ }
11224
+ }
11225
+ }
11080
11226
  function formatUptime(ms) {
11081
11227
  const seconds = Math.floor(ms / 1e3);
11082
11228
  const minutes = Math.floor(seconds / 60);
@@ -11087,6 +11233,40 @@ function formatUptime(ms) {
11087
11233
  if (minutes > 0) return `${minutes}m`;
11088
11234
  return `${seconds}s`;
11089
11235
  }
11236
+ var doctorCommand = new Command2("doctor").description("Diagnose runner health, check network, and detect issues").option("--config <path>", "Config file path", "./runner.yaml").action(async (options) => {
11237
+ try {
11238
+ const config = loadConfig(options.config);
11239
+ const statusFilePath = runnerPaths.statusFile(config.base_dir);
11240
+ const workspacesDir = runnerPaths.workspacesDir(config.base_dir);
11241
+ const warnings = [];
11242
+ console.log(`Runner: ${config.name}`);
11243
+ const status = displayRunnerStatus(statusFilePath, warnings);
11244
+ console.log("");
11245
+ await checkApiConnectivity(config, warnings);
11246
+ console.log("");
11247
+ await checkNetwork(config, warnings);
11248
+ console.log("");
11249
+ const processes = findFirecrackerProcesses();
11250
+ const workspaces = existsSync5(workspacesDir) ? readdirSync2(workspacesDir).filter(runnerPaths.isVmWorkspace) : [];
11251
+ const { jobs, statusVmIds } = buildJobInfo(status, processes);
11252
+ displayRuns(jobs, config.sandbox.max_concurrent);
11253
+ console.log("");
11254
+ await detectOrphanResources(
11255
+ jobs,
11256
+ processes,
11257
+ workspaces,
11258
+ statusVmIds,
11259
+ warnings
11260
+ );
11261
+ displayWarnings(warnings);
11262
+ process.exit(warnings.length > 0 ? 1 : 0);
11263
+ } catch (error) {
11264
+ console.error(
11265
+ `Error: ${error instanceof Error ? error.message : "Unknown error"}`
11266
+ );
11267
+ process.exit(1);
11268
+ }
11269
+ });
11090
11270
 
11091
11271
  // src/commands/kill.ts
11092
11272
  import { Command as Command3 } from "commander";
@@ -11160,8 +11340,8 @@ var killCommand = new Command3("kill").description("Force terminate a run and cl
11160
11340
  }
11161
11341
  if (runId && existsSync6(statusFilePath)) {
11162
11342
  try {
11163
- const status = JSON.parse(
11164
- readFileSync4(statusFilePath, "utf-8")
11343
+ const status = RunnerStatusSchema.parse(
11344
+ JSON.parse(readFileSync4(statusFilePath, "utf-8"))
11165
11345
  );
11166
11346
  const oldCount = status.active_runs;
11167
11347
  status.active_run_ids = status.active_run_ids.filter(
@@ -11218,8 +11398,8 @@ function resolveRunId(input, statusFilePath) {
11218
11398
  }
11219
11399
  if (existsSync6(statusFilePath)) {
11220
11400
  try {
11221
- const status = JSON.parse(
11222
- readFileSync4(statusFilePath, "utf-8")
11401
+ const status = RunnerStatusSchema.parse(
11402
+ JSON.parse(readFileSync4(statusFilePath, "utf-8"))
11223
11403
  );
11224
11404
  const match = status.active_run_ids.find(
11225
11405
  (id) => id.startsWith(input)
@@ -11323,10 +11503,16 @@ var benchmarkCommand = new Command4("benchmark").description(
11323
11503
  process.exit(1);
11324
11504
  }
11325
11505
  timer.log("Initializing pools...");
11506
+ const snapshotConfig = config.firecracker.snapshot;
11326
11507
  await initOverlayPool({
11327
11508
  size: 2,
11328
11509
  replenishThreshold: 1,
11329
- poolDir: runnerPaths.overlayPool(config.base_dir)
11510
+ poolDir: runnerPaths.overlayPool(config.base_dir),
11511
+ createFile: snapshotConfig ? (filePath) => execCommand(
11512
+ `cp --sparse=always "${snapshotConfig.overlay}" "${filePath}"`,
11513
+ false
11514
+ ).then(() => {
11515
+ }) : void 0
11330
11516
  });
11331
11517
  await initNetnsPool({ name: config.name, size: 2 });
11332
11518
  poolsInitialized = true;
@@ -11354,12 +11540,219 @@ var benchmarkCommand = new Command4("benchmark").description(
11354
11540
  process.exit(exitCode);
11355
11541
  });
11356
11542
 
11543
+ // src/commands/snapshot.ts
11544
+ import { Command as Command5 } from "commander";
11545
+ import { spawn as spawn3 } from "child_process";
11546
+ import fs11 from "fs";
11547
+ import os2 from "os";
11548
+ import readline3 from "readline";
11549
+ var logger15 = createLogger("Snapshot");
11550
+ function startFirecracker(nsName, firecrackerBinary, apiSocketPath, workDir) {
11551
+ logger15.log("Starting Firecracker with API socket...");
11552
+ const currentUser = os2.userInfo().username;
11553
+ const fcProcess = spawn3(
11554
+ "sudo",
11555
+ [
11556
+ "ip",
11557
+ "netns",
11558
+ "exec",
11559
+ nsName,
11560
+ "sudo",
11561
+ "-u",
11562
+ currentUser,
11563
+ firecrackerBinary,
11564
+ "--api-sock",
11565
+ apiSocketPath
11566
+ ],
11567
+ {
11568
+ cwd: workDir,
11569
+ stdio: ["ignore", "pipe", "pipe"],
11570
+ detached: false
11571
+ }
11572
+ );
11573
+ if (fcProcess.stdout) {
11574
+ const stdoutRL = readline3.createInterface({ input: fcProcess.stdout });
11575
+ stdoutRL.on("line", (line) => {
11576
+ if (line.trim()) logger15.log(`[FC] ${line}`);
11577
+ });
11578
+ }
11579
+ if (fcProcess.stderr) {
11580
+ const stderrRL = readline3.createInterface({ input: fcProcess.stderr });
11581
+ stderrRL.on("line", (line) => {
11582
+ if (line.trim()) logger15.log(`[FC stderr] ${line}`);
11583
+ });
11584
+ }
11585
+ fcProcess.on("error", (err) => logger15.log(`Firecracker error: ${err}`));
11586
+ fcProcess.on(
11587
+ "exit",
11588
+ (code, signal) => logger15.log(`Firecracker exited: code=${code}, signal=${signal}`)
11589
+ );
11590
+ return fcProcess;
11591
+ }
11592
+ var snapshotCommand = new Command5("snapshot").description("Generate a Firecracker snapshot for fast VM startup").argument("<output-dir>", "Output directory for snapshot files").option("--config <path>", "Config file path", "./runner.yaml").action(
11593
+ async (outputDir, opts) => {
11594
+ const options = {
11595
+ config: opts.config ?? "./runner.yaml",
11596
+ output: outputDir
11597
+ };
11598
+ const timer = new Timer();
11599
+ setGlobalLogger(timer.log.bind(timer));
11600
+ logger15.log("Loading configuration...");
11601
+ const config = loadDebugConfig(options.config);
11602
+ validateFirecrackerPaths(config.firecracker);
11603
+ const nsName = "vm0-snapshot";
11604
+ const workDir = runnerPaths.snapshotWorkDir(config.base_dir);
11605
+ const overlayPath = vmPaths.overlay(workDir);
11606
+ const vsockPath = vmPaths.vsock(workDir);
11607
+ const apiSocketPath = vmPaths.apiSock(workDir);
11608
+ const outputSnapshot = snapshotOutputPaths.snapshot(options.output);
11609
+ const outputMemory = snapshotOutputPaths.memory(options.output);
11610
+ const outputOverlay = snapshotOutputPaths.overlay(options.output);
11611
+ let fcProcess = null;
11612
+ let vsockClient = null;
11613
+ let exitCode = 0;
11614
+ try {
11615
+ if (fs11.existsSync(workDir)) {
11616
+ logger15.log("Cleaning up stale work directory...");
11617
+ fs11.rmSync(workDir, { recursive: true, force: true });
11618
+ }
11619
+ logger15.log(`Creating directories...`);
11620
+ fs11.mkdirSync(options.output, { recursive: true });
11621
+ fs11.mkdirSync(workDir, { recursive: true });
11622
+ fs11.mkdirSync(vmPaths.vsockDir(workDir), { recursive: true });
11623
+ logger15.log("Creating overlay filesystem...");
11624
+ await createOverlayFile(overlayPath);
11625
+ logger15.log(`Overlay created: ${overlayPath}`);
11626
+ logger15.log(`Creating network namespace: ${nsName}`);
11627
+ await deleteNetns(nsName);
11628
+ await createNetnsWithTap(nsName, {
11629
+ tapName: SNAPSHOT_NETWORK.tapName,
11630
+ gatewayIpWithPrefix: `${SNAPSHOT_NETWORK.gatewayIp}/${SNAPSHOT_NETWORK.prefixLen}`
11631
+ });
11632
+ logger15.log("Network namespace created");
11633
+ fcProcess = startFirecracker(
11634
+ nsName,
11635
+ config.firecracker.binary,
11636
+ apiSocketPath,
11637
+ workDir
11638
+ );
11639
+ const apiClient = new FirecrackerClient(apiSocketPath);
11640
+ logger15.log("Waiting for API to be ready...");
11641
+ await apiClient.waitForReady();
11642
+ logger15.log("API ready");
11643
+ logger15.log("Configuring VM via API...");
11644
+ await Promise.all([
11645
+ apiClient.configureMachine({
11646
+ vcpu_count: config.sandbox.vcpu,
11647
+ mem_size_mib: config.sandbox.memory_mb
11648
+ }),
11649
+ apiClient.configureBootSource({
11650
+ kernel_image_path: config.firecracker.kernel,
11651
+ boot_args: buildBootArgs()
11652
+ }),
11653
+ apiClient.configureDrive({
11654
+ drive_id: "rootfs",
11655
+ path_on_host: config.firecracker.rootfs,
11656
+ is_root_device: true,
11657
+ is_read_only: true
11658
+ }),
11659
+ apiClient.configureDrive({
11660
+ drive_id: "overlay",
11661
+ path_on_host: overlayPath,
11662
+ is_root_device: false,
11663
+ is_read_only: false
11664
+ }),
11665
+ apiClient.configureNetworkInterface({
11666
+ iface_id: "eth0",
11667
+ guest_mac: SNAPSHOT_NETWORK.guestMac,
11668
+ host_dev_name: SNAPSHOT_NETWORK.tapName
11669
+ }),
11670
+ apiClient.configureVsock({
11671
+ guest_cid: 3,
11672
+ uds_path: vsockPath
11673
+ })
11674
+ ]);
11675
+ logger15.log("VM configured");
11676
+ logger15.log("Starting vsock listener...");
11677
+ vsockClient = new VsockClient(vsockPath);
11678
+ const guestConnectionPromise = vsockClient.waitForGuestConnection(6e4);
11679
+ logger15.log("Starting VM...");
11680
+ await apiClient.startInstance();
11681
+ logger15.log("VM started");
11682
+ logger15.log("Waiting for guest connection...");
11683
+ await guestConnectionPromise;
11684
+ logger15.log("Guest connected");
11685
+ logger15.log("Verifying guest is responsive...");
11686
+ const reachable = await vsockClient.isReachable();
11687
+ if (!reachable) {
11688
+ throw new Error("Guest is not responsive");
11689
+ }
11690
+ logger15.log("Guest is responsive");
11691
+ logger15.log("Pausing VM...");
11692
+ await apiClient.pause();
11693
+ logger15.log("VM paused");
11694
+ logger15.log("Creating snapshot...");
11695
+ await apiClient.createSnapshot({
11696
+ snapshot_type: "Full",
11697
+ snapshot_path: outputSnapshot,
11698
+ mem_file_path: outputMemory
11699
+ });
11700
+ logger15.log("Snapshot created");
11701
+ logger15.log("Copying overlay as golden overlay...");
11702
+ await execCommand(
11703
+ `cp --sparse=always "${overlayPath}" "${outputOverlay}"`,
11704
+ false
11705
+ );
11706
+ logger15.log("Golden overlay created");
11707
+ logger15.log("=".repeat(40));
11708
+ logger15.log("Snapshot generation complete!");
11709
+ logger15.log("Files (logical size):");
11710
+ const lsOutput = await execCommand(`ls -lh "${options.output}"`, false);
11711
+ logger15.log(lsOutput);
11712
+ logger15.log("Actual disk usage:");
11713
+ const duOutput = await execCommand(
11714
+ `du -h "${options.output}"/*`,
11715
+ false
11716
+ );
11717
+ logger15.log(duOutput);
11718
+ logger15.log("=".repeat(40));
11719
+ } catch (error) {
11720
+ logger15.error(
11721
+ `Error: ${error instanceof Error ? error.message : "Unknown error"}`
11722
+ );
11723
+ exitCode = 1;
11724
+ } finally {
11725
+ logger15.log("Cleaning up...");
11726
+ if (vsockClient) {
11727
+ vsockClient.close();
11728
+ }
11729
+ if (fcProcess && !fcProcess.killed) {
11730
+ fcProcess.kill("SIGKILL");
11731
+ }
11732
+ await execCommand(
11733
+ `pkill -9 -f "firecracker.*${apiSocketPath}"`,
11734
+ true
11735
+ ).catch(() => {
11736
+ });
11737
+ await deleteNetns(nsName);
11738
+ if (fs11.existsSync(workDir)) {
11739
+ fs11.rmSync(workDir, { recursive: true, force: true });
11740
+ }
11741
+ logger15.log("Cleanup complete");
11742
+ }
11743
+ if (exitCode !== 0) {
11744
+ process.exit(exitCode);
11745
+ }
11746
+ }
11747
+ );
11748
+
11357
11749
  // src/index.ts
11358
- var version = true ? "3.11.3" : "0.1.0";
11750
+ var version = true ? "3.12.1" : "0.1.0";
11359
11751
  program.name("vm0-runner").version(version).description("Self-hosted runner for VM0 agents");
11360
11752
  program.addCommand(startCommand);
11361
11753
  program.addCommand(doctorCommand);
11362
11754
  program.addCommand(killCommand);
11363
11755
  program.addCommand(benchmarkCommand);
11756
+ program.addCommand(snapshotCommand);
11364
11757
  program.parse();
11365
11758
  //# sourceMappingURL=index.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vm0/runner",
3
- "version": "3.11.3",
3
+ "version": "3.12.1",
4
4
  "description": "Self-hosted runner for VM0 agents",
5
5
  "repository": {
6
6
  "type": "git",