@vm0/runner 3.7.3 → 3.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +1063 -684
  2. package/package.json +1 -1
package/index.js CHANGED
@@ -16,7 +16,7 @@ import yaml from "yaml";
16
16
  import path from "path";
17
17
  var VM0_RUN_DIR = "/var/run/vm0";
18
18
  var VM0_TMP_PREFIX = "/tmp/vm0";
19
- var paths = {
19
+ var runtimePaths = {
20
20
  /** Runner PID file for single-instance lock */
21
21
  runnerPid: path.join(VM0_RUN_DIR, "runner.pid"),
22
22
  /** IP pool lock file */
@@ -303,12 +303,12 @@ async function subscribeToJobs(server, group, onJob, onConnectionChange) {
303
303
  }
304
304
 
305
305
  // src/lib/executor.ts
306
- import path5 from "path";
306
+ import path6 from "path";
307
307
 
308
308
  // src/lib/firecracker/vm.ts
309
309
  import { spawn } from "child_process";
310
310
  import fs4 from "fs";
311
- import path3 from "path";
311
+ import path4 from "path";
312
312
  import readline from "readline";
313
313
 
314
314
  // src/lib/firecracker/client.ts
@@ -321,7 +321,7 @@ var FirecrackerClient = class {
321
321
  /**
322
322
  * Make HTTP request to Firecracker API
323
323
  */
324
- async request(method, path8, body) {
324
+ async request(method, path9, body) {
325
325
  return new Promise((resolve, reject) => {
326
326
  const bodyStr = body !== void 0 ? JSON.stringify(body) : void 0;
327
327
  const headers = {
@@ -334,11 +334,11 @@ var FirecrackerClient = class {
334
334
  headers["Content-Length"] = Buffer.byteLength(bodyStr);
335
335
  }
336
336
  console.log(
337
- `[FC API] ${method} ${path8}${bodyStr ? ` (${Buffer.byteLength(bodyStr)} bytes)` : ""}`
337
+ `[FC API] ${method} ${path9}${bodyStr ? ` (${Buffer.byteLength(bodyStr)} bytes)` : ""}`
338
338
  );
339
339
  const options = {
340
340
  socketPath: this.socketPath,
341
- path: path8,
341
+ path: path9,
342
342
  method,
343
343
  headers,
344
344
  // Disable agent to ensure fresh connection for each request
@@ -472,13 +472,8 @@ var FirecrackerClient = class {
472
472
  };
473
473
 
474
474
  // src/lib/firecracker/network.ts
475
- import { execSync, exec as exec2 } from "child_process";
476
- import { promisify as promisify2 } from "util";
477
-
478
- // src/lib/firecracker/ip-pool.ts
479
- import { exec } from "child_process";
475
+ import { execSync, exec } from "child_process";
480
476
  import { promisify } from "util";
481
- import * as fs2 from "fs";
482
477
 
483
478
  // src/lib/logger.ts
484
479
  var _log = null;
@@ -500,204 +495,10 @@ function createLogger(prefix) {
500
495
  };
501
496
  }
502
497
 
503
- // src/lib/firecracker/ip-pool.ts
498
+ // src/lib/firecracker/network.ts
504
499
  var execAsync = promisify(exec);
505
- var logger = createLogger("IP Pool");
506
- var REGISTRY_FILE_PATH = paths.ipRegistry;
500
+ var logger = createLogger("Network");
507
501
  var BRIDGE_NAME = "vm0br0";
508
- var IP_PREFIX = "172.16.0.";
509
- var IP_START = 2;
510
- var IP_END = 254;
511
- var LOCK_TIMEOUT_MS = 1e4;
512
- var LOCK_RETRY_INTERVAL_MS = 100;
513
- var ALLOCATION_GRACE_PERIOD_MS = 3e4;
514
- async function ensureRunDir() {
515
- if (!fs2.existsSync(VM0_RUN_DIR)) {
516
- await execAsync(`sudo mkdir -p ${VM0_RUN_DIR}`);
517
- await execAsync(`sudo chmod 777 ${VM0_RUN_DIR}`);
518
- }
519
- }
520
- async function withLock(fn) {
521
- await ensureRunDir();
522
- const lockMarker = paths.ipPoolLock;
523
- const startTime = Date.now();
524
- let lockAcquired = false;
525
- while (Date.now() - startTime < LOCK_TIMEOUT_MS) {
526
- try {
527
- fs2.writeFileSync(lockMarker, process.pid.toString(), { flag: "wx" });
528
- lockAcquired = true;
529
- break;
530
- } catch {
531
- try {
532
- const pidStr = fs2.readFileSync(lockMarker, "utf-8");
533
- const pid = parseInt(pidStr, 10);
534
- try {
535
- process.kill(pid, 0);
536
- } catch {
537
- fs2.unlinkSync(lockMarker);
538
- continue;
539
- }
540
- } catch {
541
- }
542
- await new Promise(
543
- (resolve) => setTimeout(resolve, LOCK_RETRY_INTERVAL_MS)
544
- );
545
- }
546
- }
547
- if (!lockAcquired) {
548
- throw new Error(
549
- `Failed to acquire IP pool lock after ${LOCK_TIMEOUT_MS}ms`
550
- );
551
- }
552
- try {
553
- return await fn();
554
- } finally {
555
- try {
556
- fs2.unlinkSync(lockMarker);
557
- } catch {
558
- }
559
- }
560
- }
561
- function readRegistry() {
562
- try {
563
- if (fs2.existsSync(REGISTRY_FILE_PATH)) {
564
- const content = fs2.readFileSync(REGISTRY_FILE_PATH, "utf-8");
565
- return JSON.parse(content);
566
- }
567
- } catch {
568
- }
569
- return { allocations: {} };
570
- }
571
- function writeRegistry(registry) {
572
- fs2.writeFileSync(REGISTRY_FILE_PATH, JSON.stringify(registry, null, 2));
573
- }
574
- function getAllocations() {
575
- const registry = readRegistry();
576
- return new Map(Object.entries(registry.allocations));
577
- }
578
- function getIPForVm(vmId) {
579
- const registry = readRegistry();
580
- for (const [ip, allocation] of Object.entries(registry.allocations)) {
581
- if (allocation.vmId === vmId) {
582
- return ip;
583
- }
584
- }
585
- return void 0;
586
- }
587
- async function scanTapDevices() {
588
- const tapDevices = /* @__PURE__ */ new Map();
589
- try {
590
- const { stdout } = await execAsync(
591
- `ip link show master ${BRIDGE_NAME} 2>/dev/null || true`
592
- );
593
- const lines = stdout.split("\n");
594
- for (const line of lines) {
595
- const match = line.match(/^\d+:\s+(tap[a-f0-9]+):/);
596
- if (match && match[1]) {
597
- const tapName = match[1];
598
- const vmIdPrefix = tapName.substring(3);
599
- tapDevices.set(tapName, vmIdPrefix);
600
- }
601
- }
602
- } catch {
603
- }
604
- return tapDevices;
605
- }
606
- function reconcileRegistry(registry, activeTaps) {
607
- const reconciled = { allocations: {} };
608
- const activeTapNames = new Set(activeTaps.keys());
609
- const now = Date.now();
610
- for (const [ip, allocation] of Object.entries(registry.allocations)) {
611
- const allocatedTime = new Date(allocation.allocatedAt).getTime();
612
- const isWithinGracePeriod = now - allocatedTime < ALLOCATION_GRACE_PERIOD_MS;
613
- if (activeTapNames.has(allocation.tapDevice)) {
614
- reconciled.allocations[ip] = allocation;
615
- } else if (isWithinGracePeriod) {
616
- reconciled.allocations[ip] = allocation;
617
- } else {
618
- logger.log(
619
- `Removing stale allocation for ${ip} (TAP ${allocation.tapDevice} no longer exists)`
620
- );
621
- }
622
- }
623
- return reconciled;
624
- }
625
- function findFreeIP(registry) {
626
- const allocatedIPs = new Set(Object.keys(registry.allocations));
627
- for (let octet = IP_START; octet <= IP_END; octet++) {
628
- const ip = `${IP_PREFIX}${octet}`;
629
- if (!allocatedIPs.has(ip)) {
630
- return ip;
631
- }
632
- }
633
- return null;
634
- }
635
- async function allocateIP(vmId) {
636
- const tapDevice = `tap${vmId.substring(0, 8)}`;
637
- return withLock(async () => {
638
- const registry = readRegistry();
639
- const ip = findFreeIP(registry);
640
- if (!ip) {
641
- throw new Error(
642
- "No free IP addresses available in pool (172.16.0.2-254)"
643
- );
644
- }
645
- const allocatedCount = Object.keys(registry.allocations).length;
646
- const allocatedIPs = Object.keys(registry.allocations).sort();
647
- logger.log(
648
- `Current state: ${allocatedCount} IPs allocated [${allocatedIPs.join(", ")}], assigning ${ip}`
649
- );
650
- registry.allocations[ip] = {
651
- vmId,
652
- tapDevice,
653
- allocatedAt: (/* @__PURE__ */ new Date()).toISOString()
654
- };
655
- writeRegistry(registry);
656
- logger.log(`Allocated ${ip} for VM ${vmId} (TAP ${tapDevice})`);
657
- return ip;
658
- });
659
- }
660
- async function releaseIP(ip) {
661
- return withLock(async () => {
662
- const registry = readRegistry();
663
- if (registry.allocations[ip]) {
664
- const allocation = registry.allocations[ip];
665
- delete registry.allocations[ip];
666
- writeRegistry(registry);
667
- logger.log(`Released ${ip} (was allocated to VM ${allocation.vmId})`);
668
- } else {
669
- logger.log(`IP ${ip} was not in registry, nothing to release`);
670
- }
671
- });
672
- }
673
- async function cleanupOrphanedAllocations() {
674
- return withLock(async () => {
675
- logger.log("Cleaning up orphaned allocations...");
676
- const registry = readRegistry();
677
- const beforeCount = Object.keys(registry.allocations).length;
678
- if (beforeCount === 0) {
679
- logger.log("No allocations in registry, nothing to clean up");
680
- return;
681
- }
682
- const activeTaps = await scanTapDevices();
683
- logger.log(`Found ${activeTaps.size} active TAP device(s) on bridge`);
684
- const reconciled = reconcileRegistry(registry, activeTaps);
685
- const afterCount = Object.keys(reconciled.allocations).length;
686
- if (afterCount !== beforeCount) {
687
- writeRegistry(reconciled);
688
- logger.log(
689
- `Cleaned up ${beforeCount - afterCount} orphaned allocation(s)`
690
- );
691
- } else {
692
- logger.log("No orphaned allocations found");
693
- }
694
- });
695
- }
696
-
697
- // src/lib/firecracker/network.ts
698
- var execAsync2 = promisify2(exec2);
699
- var logger2 = createLogger("Network");
700
- var BRIDGE_NAME2 = "vm0br0";
701
502
  var BRIDGE_IP = "172.16.0.1";
702
503
  var BRIDGE_NETMASK = "255.255.255.0";
703
504
  var BRIDGE_CIDR = "172.16.0.0/24";
@@ -728,7 +529,7 @@ function commandExists(cmd) {
728
529
  async function execCommand(cmd, sudo = true) {
729
530
  const fullCmd = sudo ? `sudo ${cmd}` : cmd;
730
531
  try {
731
- const { stdout } = await execAsync2(fullCmd);
532
+ const { stdout } = await execAsync(fullCmd);
732
533
  return stdout.trim();
733
534
  } catch (error) {
734
535
  const execError = error;
@@ -748,33 +549,33 @@ async function getDefaultInterface() {
748
549
  }
749
550
  async function setupForwardRules() {
750
551
  const extIface = await getDefaultInterface();
751
- logger2.log(`Setting up FORWARD rules for ${BRIDGE_NAME2} <-> ${extIface}`);
552
+ logger.log(`Setting up FORWARD rules for ${BRIDGE_NAME} <-> ${extIface}`);
752
553
  try {
753
554
  await execCommand(
754
- `iptables -C FORWARD -i ${BRIDGE_NAME2} -o ${extIface} -j ACCEPT`
555
+ `iptables -C FORWARD -i ${BRIDGE_NAME} -o ${extIface} -j ACCEPT`
755
556
  );
756
- logger2.log("FORWARD outbound rule already exists");
557
+ logger.log("FORWARD outbound rule already exists");
757
558
  } catch {
758
559
  await execCommand(
759
- `iptables -I FORWARD -i ${BRIDGE_NAME2} -o ${extIface} -j ACCEPT`
560
+ `iptables -I FORWARD -i ${BRIDGE_NAME} -o ${extIface} -j ACCEPT`
760
561
  );
761
- logger2.log("FORWARD outbound rule added");
562
+ logger.log("FORWARD outbound rule added");
762
563
  }
763
564
  try {
764
565
  await execCommand(
765
- `iptables -C FORWARD -i ${extIface} -o ${BRIDGE_NAME2} -m state --state RELATED,ESTABLISHED -j ACCEPT`
566
+ `iptables -C FORWARD -i ${extIface} -o ${BRIDGE_NAME} -m state --state RELATED,ESTABLISHED -j ACCEPT`
766
567
  );
767
- logger2.log("FORWARD inbound rule already exists");
568
+ logger.log("FORWARD inbound rule already exists");
768
569
  } catch {
769
570
  await execCommand(
770
- `iptables -I FORWARD -i ${extIface} -o ${BRIDGE_NAME2} -m state --state RELATED,ESTABLISHED -j ACCEPT`
571
+ `iptables -I FORWARD -i ${extIface} -o ${BRIDGE_NAME} -m state --state RELATED,ESTABLISHED -j ACCEPT`
771
572
  );
772
- logger2.log("FORWARD inbound rule added");
573
+ logger.log("FORWARD inbound rule added");
773
574
  }
774
575
  }
775
576
  async function bridgeExists() {
776
577
  try {
777
- await execCommand(`ip link show ${BRIDGE_NAME2}`, true);
578
+ await execCommand(`ip link show ${BRIDGE_NAME}`, true);
778
579
  return true;
779
580
  } catch {
780
581
  return false;
@@ -782,108 +583,30 @@ async function bridgeExists() {
782
583
  }
783
584
  async function setupBridge() {
784
585
  if (await bridgeExists()) {
785
- logger2.log(`Bridge ${BRIDGE_NAME2} already exists`);
586
+ logger.log(`Bridge ${BRIDGE_NAME} already exists`);
786
587
  await setupForwardRules();
787
588
  return;
788
589
  }
789
- logger2.log(`Creating bridge ${BRIDGE_NAME2}...`);
790
- await execCommand(`ip link add name ${BRIDGE_NAME2} type bridge`);
590
+ logger.log(`Creating bridge ${BRIDGE_NAME}...`);
591
+ await execCommand(`ip link add name ${BRIDGE_NAME} type bridge`);
791
592
  await execCommand(
792
- `ip addr add ${BRIDGE_IP}/${BRIDGE_NETMASK} dev ${BRIDGE_NAME2}`
593
+ `ip addr add ${BRIDGE_IP}/${BRIDGE_NETMASK} dev ${BRIDGE_NAME}`
793
594
  );
794
- await execCommand(`ip link set ${BRIDGE_NAME2} up`);
595
+ await execCommand(`ip link set ${BRIDGE_NAME} up`);
795
596
  await execCommand(`sysctl -w net.ipv4.ip_forward=1`);
796
597
  try {
797
598
  await execCommand(
798
599
  `iptables -t nat -C POSTROUTING -s ${BRIDGE_CIDR} -j MASQUERADE`
799
600
  );
800
- logger2.log("NAT rule already exists");
601
+ logger.log("NAT rule already exists");
801
602
  } catch {
802
603
  await execCommand(
803
604
  `iptables -t nat -A POSTROUTING -s ${BRIDGE_CIDR} -j MASQUERADE`
804
605
  );
805
- logger2.log("NAT rule added");
606
+ logger.log("NAT rule added");
806
607
  }
807
608
  await setupForwardRules();
808
- logger2.log(`Bridge ${BRIDGE_NAME2} configured with IP ${BRIDGE_IP}`);
809
- }
810
- async function tapDeviceExists(tapDevice) {
811
- try {
812
- await execCommand(`ip link show ${tapDevice}`, true);
813
- return true;
814
- } catch {
815
- return false;
816
- }
817
- }
818
- async function clearStaleIptablesRulesForIP(ip) {
819
- try {
820
- const { stdout } = await execAsync2(
821
- "sudo iptables -t nat -S PREROUTING 2>/dev/null || true"
822
- );
823
- const lines = stdout.split("\n");
824
- const rulesForIP = lines.filter((line) => line.includes(`-s ${ip}`));
825
- if (rulesForIP.length === 0) {
826
- return;
827
- }
828
- logger2.log(
829
- `Clearing ${rulesForIP.length} stale iptables rule(s) for IP ${ip}`
830
- );
831
- for (const rule of rulesForIP) {
832
- const deleteRule = rule.replace("-A ", "-D ");
833
- try {
834
- await execCommand(`iptables -t nat ${deleteRule}`);
835
- } catch {
836
- }
837
- }
838
- } catch {
839
- }
840
- }
841
- async function createTapDevice(vmId) {
842
- const tapDevice = `tap${vmId.substring(0, 8)}`;
843
- const guestMac = generateMacAddress(vmId);
844
- const guestIp = await allocateIP(vmId);
845
- logger2.log(`[VM ${vmId}] IP allocated: ${guestIp}`);
846
- await clearStaleIptablesRulesForIP(guestIp);
847
- logger2.log(`[VM ${vmId}] Stale iptables cleared`);
848
- if (await tapDeviceExists(tapDevice)) {
849
- logger2.log(
850
- `[VM ${vmId}] TAP device ${tapDevice} already exists, deleting...`
851
- );
852
- await deleteTapDevice(tapDevice);
853
- }
854
- await execCommand(`ip tuntap add ${tapDevice} mode tap`);
855
- logger2.log(`[VM ${vmId}] TAP device created`);
856
- await execCommand(`ip link set ${tapDevice} master ${BRIDGE_NAME2}`);
857
- logger2.log(`[VM ${vmId}] TAP added to bridge`);
858
- await execCommand(`ip link set ${tapDevice} up`);
859
- logger2.log(
860
- `[VM ${vmId}] TAP created: ${tapDevice}, MAC ${guestMac}, IP ${guestIp}`
861
- );
862
- return {
863
- tapDevice,
864
- guestMac,
865
- guestIp,
866
- gatewayIp: BRIDGE_IP,
867
- netmask: BRIDGE_NETMASK
868
- };
869
- }
870
- async function deleteTapDevice(tapDevice, guestIp) {
871
- if (!await tapDeviceExists(tapDevice)) {
872
- logger2.log(`TAP device ${tapDevice} does not exist, skipping delete`);
873
- } else {
874
- await execCommand(`ip link delete ${tapDevice}`);
875
- logger2.log(`TAP device ${tapDevice} deleted`);
876
- }
877
- if (guestIp) {
878
- try {
879
- await execCommand(`ip neigh del ${guestIp} dev ${BRIDGE_NAME2}`, true);
880
- logger2.log(`ARP entry cleared for ${guestIp}`);
881
- } catch {
882
- }
883
- }
884
- if (guestIp) {
885
- await releaseIP(guestIp);
886
- }
609
+ logger.log(`Bridge ${BRIDGE_NAME} configured with IP ${BRIDGE_IP}`);
887
610
  }
888
611
  function generateNetworkBootArgs(config) {
889
612
  return `ip=${config.guestIp}::${config.gatewayIp}:${config.netmask}:vm0-guest:eth0:off`;
@@ -910,69 +633,53 @@ function checkNetworkPrerequisites() {
910
633
  }
911
634
  async function setupCIDRProxyRules(proxyPort) {
912
635
  const comment = "vm0:cidr-proxy";
913
- logger2.log(
636
+ logger.log(
914
637
  `Setting up CIDR proxy rules for ${BRIDGE_CIDR} -> port ${proxyPort}`
915
638
  );
916
639
  try {
917
640
  await execCommand(
918
641
  `iptables -t nat -C PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 80 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
919
642
  );
920
- logger2.log("CIDR proxy rule for port 80 already exists");
643
+ logger.log("CIDR proxy rule for port 80 already exists");
921
644
  } catch {
922
645
  await execCommand(
923
646
  `iptables -t nat -A PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 80 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
924
647
  );
925
- logger2.log("CIDR proxy rule for port 80 added");
648
+ logger.log("CIDR proxy rule for port 80 added");
926
649
  }
927
650
  try {
928
651
  await execCommand(
929
652
  `iptables -t nat -C PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 443 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
930
653
  );
931
- logger2.log("CIDR proxy rule for port 443 already exists");
654
+ logger.log("CIDR proxy rule for port 443 already exists");
932
655
  } catch {
933
656
  await execCommand(
934
657
  `iptables -t nat -A PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 443 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
935
658
  );
936
- logger2.log("CIDR proxy rule for port 443 added");
659
+ logger.log("CIDR proxy rule for port 443 added");
937
660
  }
938
661
  }
939
662
  async function cleanupCIDRProxyRules(proxyPort) {
940
663
  const comment = "vm0:cidr-proxy";
941
- logger2.log("Cleaning up CIDR proxy rules...");
664
+ logger.log("Cleaning up CIDR proxy rules...");
942
665
  try {
943
666
  await execCommand(
944
667
  `iptables -t nat -D PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 80 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
945
668
  );
946
- logger2.log("CIDR proxy rule for port 80 removed");
669
+ logger.log("CIDR proxy rule for port 80 removed");
947
670
  } catch {
948
671
  }
949
672
  try {
950
673
  await execCommand(
951
674
  `iptables -t nat -D PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 443 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
952
675
  );
953
- logger2.log("CIDR proxy rule for port 443 removed");
676
+ logger.log("CIDR proxy rule for port 443 removed");
954
677
  } catch {
955
678
  }
956
679
  }
957
- async function listTapDevices() {
958
- try {
959
- const result = await execCommand("ip -o link show type tuntap", false);
960
- const devices = [];
961
- const lines = result.split("\n");
962
- for (const line of lines) {
963
- const match = line.match(/^\d+:\s+(tap[a-f0-9]{8}):/);
964
- if (match && match[1]) {
965
- devices.push(match[1]);
966
- }
967
- }
968
- return devices;
969
- } catch {
970
- return [];
971
- }
972
- }
973
680
  async function checkBridgeStatus() {
974
681
  try {
975
- const result = await execCommand(`ip -o addr show ${BRIDGE_NAME2}`, false);
682
+ const result = await execCommand(`ip -o addr show ${BRIDGE_NAME}`, false);
976
683
  const ipMatch = result.match(/inet\s+(\d+\.\d+\.\d+\.\d+)/);
977
684
  const upMatch = result.includes("UP") || result.includes("state UP");
978
685
  return {
@@ -1029,17 +736,17 @@ async function findOrphanedIptablesRules(rules, activeVmIps, expectedProxyPort)
1029
736
  return orphaned;
1030
737
  }
1031
738
  async function flushBridgeArpCache() {
1032
- logger2.log(`Flushing ARP cache on bridge ${BRIDGE_NAME2}...`);
739
+ logger.log(`Flushing ARP cache on bridge ${BRIDGE_NAME}...`);
1033
740
  try {
1034
741
  if (!await bridgeExists()) {
1035
- logger2.log("Bridge does not exist, skipping ARP flush");
742
+ logger.log("Bridge does not exist, skipping ARP flush");
1036
743
  return;
1037
744
  }
1038
- const { stdout } = await execAsync2(
1039
- `ip neigh show dev ${BRIDGE_NAME2} 2>/dev/null || true`
745
+ const { stdout } = await execAsync(
746
+ `ip neigh show dev ${BRIDGE_NAME} 2>/dev/null || true`
1040
747
  );
1041
748
  if (!stdout.trim()) {
1042
- logger2.log("No ARP entries on bridge");
749
+ logger.log("No ARP entries on bridge");
1043
750
  return;
1044
751
  }
1045
752
  const lines = stdout.split("\n").filter((line) => line.trim());
@@ -1049,104 +756,642 @@ async function flushBridgeArpCache() {
1049
756
  if (match && match[1]) {
1050
757
  const ip = match[1];
1051
758
  try {
1052
- await execCommand(`ip neigh del ${ip} dev ${BRIDGE_NAME2}`, true);
759
+ await execCommand(`ip neigh del ${ip} dev ${BRIDGE_NAME}`, true);
1053
760
  cleared++;
1054
761
  } catch {
1055
762
  }
1056
763
  }
1057
764
  }
1058
- logger2.log(`Cleared ${cleared} ARP entries from bridge`);
765
+ logger.log(`Cleared ${cleared} ARP entries from bridge`);
1059
766
  } catch (error) {
1060
- logger2.log(
767
+ logger.log(
1061
768
  `Warning: Could not flush ARP cache: ${error instanceof Error ? error.message : "Unknown error"}`
1062
769
  );
1063
770
  }
1064
771
  }
1065
772
  async function cleanupOrphanedProxyRules(runnerName) {
1066
773
  const comment = `vm0:runner:${runnerName}`;
1067
- logger2.log(`Cleaning up orphaned proxy rules for runner '${runnerName}'...`);
774
+ logger.log(`Cleaning up orphaned proxy rules for runner '${runnerName}'...`);
1068
775
  try {
1069
776
  const rules = await execCommand("iptables -t nat -S PREROUTING", false);
1070
777
  const ourRules = rules.split("\n").filter((rule) => rule.includes(comment));
1071
778
  if (ourRules.length === 0) {
1072
- logger2.log("No orphaned proxy rules found");
779
+ logger.log("No orphaned proxy rules found");
1073
780
  return;
1074
781
  }
1075
- logger2.log(`Found ${ourRules.length} orphaned rule(s) to clean up`);
782
+ logger.log(`Found ${ourRules.length} orphaned rule(s) to clean up`);
1076
783
  for (const rule of ourRules) {
1077
784
  const deleteRule = rule.replace("-A ", "-D ");
1078
785
  try {
1079
786
  await execCommand(`iptables -t nat ${deleteRule}`);
1080
- logger2.log(`Deleted orphaned rule: ${rule.substring(0, 80)}...`);
787
+ logger.log(`Deleted orphaned rule: ${rule.substring(0, 80)}...`);
1081
788
  } catch {
1082
- logger2.log(
789
+ logger.log(
1083
790
  `Failed to delete rule (may already be gone): ${rule.substring(0, 80)}...`
1084
791
  );
1085
792
  }
1086
793
  }
1087
- logger2.log("Orphaned proxy rules cleanup complete");
794
+ logger.log("Orphaned proxy rules cleanup complete");
1088
795
  } catch (error) {
1089
- logger2.log(
796
+ logger.log(
1090
797
  `Warning: Could not clean up orphaned rules: ${error instanceof Error ? error.message : "Unknown error"}`
1091
798
  );
1092
799
  }
1093
800
  }
1094
801
 
1095
- // src/lib/firecracker/overlay-pool.ts
1096
- import { exec as exec3 } from "child_process";
1097
- import { randomUUID } from "crypto";
1098
- import fs3 from "fs";
1099
- import path2 from "path";
1100
- import { promisify as promisify3 } from "util";
1101
- var execAsync3 = promisify3(exec3);
1102
- var logger3 = createLogger("OverlayPool");
1103
- var OVERLAY_SIZE = 2 * 1024 * 1024 * 1024;
1104
- async function defaultCreateFile(filePath) {
1105
- const fd = fs3.openSync(filePath, "w");
1106
- fs3.ftruncateSync(fd, OVERLAY_SIZE);
1107
- fs3.closeSync(fd);
1108
- await execAsync3(`mkfs.ext4 -F -q "${filePath}"`);
802
+ // src/lib/firecracker/overlay-pool.ts
803
+ import { exec as exec2 } from "child_process";
804
+ import { randomUUID } from "crypto";
805
+ import fs2 from "fs";
806
+ import path2 from "path";
807
+ import { promisify as promisify2 } from "util";
808
+ var execAsync2 = promisify2(exec2);
809
+ var logger2 = createLogger("OverlayPool");
810
+ var OVERLAY_SIZE = 2 * 1024 * 1024 * 1024;
811
+ async function defaultCreateFile(filePath) {
812
+ const fd = fs2.openSync(filePath, "w");
813
+ fs2.ftruncateSync(fd, OVERLAY_SIZE);
814
+ fs2.closeSync(fd);
815
+ await execAsync2(`mkfs.ext4 -F -q "${filePath}"`);
816
+ }
817
+ var OverlayPool = class {
818
+ initialized = false;
819
+ queue = [];
820
+ replenishing = false;
821
+ config;
822
+ constructor(config) {
823
+ this.config = {
824
+ size: config.size,
825
+ replenishThreshold: config.replenishThreshold,
826
+ poolDir: config.poolDir,
827
+ createFile: config.createFile ?? defaultCreateFile
828
+ };
829
+ }
830
+ /**
831
+ * Generate unique file name using UUID
832
+ */
833
+ generateFileName() {
834
+ return `overlay-${randomUUID()}.ext4`;
835
+ }
836
+ /**
837
+ * Ensure the pool directory exists
838
+ */
839
+ async ensurePoolDir() {
840
+ const parentDir = path2.dirname(this.config.poolDir);
841
+ if (!fs2.existsSync(parentDir)) {
842
+ await execAsync2(`sudo mkdir -p ${parentDir}`);
843
+ await execAsync2(`sudo chmod 777 ${parentDir}`);
844
+ }
845
+ if (!fs2.existsSync(this.config.poolDir)) {
846
+ fs2.mkdirSync(this.config.poolDir, { recursive: true });
847
+ }
848
+ }
849
+ /**
850
+ * Scan pool directory for overlay files
851
+ */
852
+ scanPoolDir() {
853
+ if (!fs2.existsSync(this.config.poolDir)) {
854
+ return [];
855
+ }
856
+ return fs2.readdirSync(this.config.poolDir).filter((f) => f.startsWith("overlay-") && f.endsWith(".ext4")).map((f) => path2.join(this.config.poolDir, f));
857
+ }
858
+ /**
859
+ * Replenish the pool in background
860
+ */
861
+ async replenish() {
862
+ if (this.replenishing || !this.initialized) {
863
+ return;
864
+ }
865
+ const needed = this.config.size - this.queue.length;
866
+ if (needed <= 0) {
867
+ return;
868
+ }
869
+ this.replenishing = true;
870
+ logger2.log(`Replenishing pool: creating ${needed} overlay(s)...`);
871
+ try {
872
+ const promises = [];
873
+ for (let i = 0; i < needed; i++) {
874
+ const filePath = path2.join(
875
+ this.config.poolDir,
876
+ this.generateFileName()
877
+ );
878
+ promises.push(
879
+ this.config.createFile(filePath).then(() => {
880
+ this.queue.push(filePath);
881
+ })
882
+ );
883
+ }
884
+ await Promise.all(promises);
885
+ logger2.log(`Pool replenished: ${this.queue.length} available`);
886
+ } catch (err) {
887
+ logger2.error(
888
+ `Replenish failed: ${err instanceof Error ? err.message : "Unknown"}`
889
+ );
890
+ } finally {
891
+ this.replenishing = false;
892
+ }
893
+ }
894
+ /**
895
+ * Initialize the overlay pool
896
+ */
897
+ async init() {
898
+ this.queue = [];
899
+ logger2.log(
900
+ `Initializing overlay pool (size=${this.config.size}, threshold=${this.config.replenishThreshold})...`
901
+ );
902
+ await this.ensurePoolDir();
903
+ const existing = this.scanPoolDir();
904
+ if (existing.length > 0) {
905
+ logger2.log(`Cleaning up ${existing.length} stale overlay(s)`);
906
+ for (const file of existing) {
907
+ fs2.unlinkSync(file);
908
+ }
909
+ }
910
+ this.initialized = true;
911
+ await this.replenish();
912
+ logger2.log("Overlay pool initialized");
913
+ }
914
+ /**
915
+ * Acquire an overlay file from the pool
916
+ *
917
+ * Returns the file path. Caller owns the file and must delete it when done.
918
+ * Falls back to on-demand creation if pool is exhausted.
919
+ */
920
+ async acquire() {
921
+ if (!this.initialized) {
922
+ throw new Error("Overlay pool not initialized");
923
+ }
924
+ const filePath = this.queue.shift();
925
+ if (filePath) {
926
+ logger2.log(`Acquired overlay from pool (${this.queue.length} remaining)`);
927
+ if (this.queue.length < this.config.replenishThreshold) {
928
+ this.replenish().catch((err) => {
929
+ logger2.error(
930
+ `Background replenish failed: ${err instanceof Error ? err.message : "Unknown"}`
931
+ );
932
+ });
933
+ }
934
+ return filePath;
935
+ }
936
+ logger2.log("Pool exhausted, creating overlay on-demand");
937
+ const newPath = path2.join(this.config.poolDir, this.generateFileName());
938
+ await this.config.createFile(newPath);
939
+ return newPath;
940
+ }
941
+ /**
942
+ * Clean up the overlay pool
943
+ */
944
+ cleanup() {
945
+ if (!this.initialized) {
946
+ return;
947
+ }
948
+ logger2.log("Cleaning up overlay pool...");
949
+ for (const file of this.queue) {
950
+ try {
951
+ fs2.unlinkSync(file);
952
+ } catch (err) {
953
+ logger2.log(
954
+ `Failed to delete ${file}: ${err instanceof Error ? err.message : "Unknown"}`
955
+ );
956
+ }
957
+ }
958
+ this.queue = [];
959
+ for (const file of this.scanPoolDir()) {
960
+ try {
961
+ fs2.unlinkSync(file);
962
+ } catch (err) {
963
+ logger2.log(
964
+ `Failed to delete ${file}: ${err instanceof Error ? err.message : "Unknown"}`
965
+ );
966
+ }
967
+ }
968
+ this.initialized = false;
969
+ this.replenishing = false;
970
+ logger2.log("Overlay pool cleaned up");
971
+ }
972
+ };
973
+ var overlayPool = null;
974
+ async function initOverlayPool(config) {
975
+ if (overlayPool) {
976
+ overlayPool.cleanup();
977
+ }
978
+ overlayPool = new OverlayPool(config);
979
+ await overlayPool.init();
980
+ return overlayPool;
981
+ }
982
+ function acquireOverlay() {
983
+ if (!overlayPool) {
984
+ throw new Error(
985
+ "Overlay pool not initialized. Call initOverlayPool() first."
986
+ );
987
+ }
988
+ return overlayPool.acquire();
989
+ }
990
+ function cleanupOverlayPool() {
991
+ if (overlayPool) {
992
+ overlayPool.cleanup();
993
+ overlayPool = null;
994
+ }
995
+ }
996
+
997
+ // src/lib/firecracker/tap-pool.ts
998
+ import { createHash } from "crypto";
999
+ import { exec as exec4 } from "child_process";
1000
+ import { promisify as promisify4 } from "util";
1001
+
1002
+ // src/lib/firecracker/ip-registry.ts
1003
+ import { exec as exec3 } from "child_process";
1004
+ import { promisify as promisify3 } from "util";
1005
+ import * as fs3 from "fs";
1006
+ import path3 from "path";
1007
+ var execAsync3 = promisify3(exec3);
1008
+ var logger3 = createLogger("IPRegistry");
1009
+ var IP_PREFIX = "172.16.0.";
1010
+ var IP_START = 2;
1011
+ var IP_END = 254;
1012
+ var LOCK_TIMEOUT_MS = 1e4;
1013
+ var LOCK_RETRY_INTERVAL_MS = 100;
1014
+ async function defaultEnsureRunDir(runDir) {
1015
+ if (!fs3.existsSync(runDir)) {
1016
+ await execAsync3(`sudo mkdir -p ${runDir}`);
1017
+ await execAsync3(`sudo chmod 777 ${runDir}`);
1018
+ }
1019
+ }
1020
+ async function defaultScanTapDevices() {
1021
+ const tapDevices = /* @__PURE__ */ new Set();
1022
+ try {
1023
+ const { stdout } = await execAsync3(
1024
+ `ip -o link show type tuntap 2>/dev/null || true`
1025
+ );
1026
+ const lines = stdout.split("\n");
1027
+ for (const line of lines) {
1028
+ const match = line.match(/^\d+:\s+([a-z0-9]+):/);
1029
+ if (match && match[1]) {
1030
+ tapDevices.add(match[1]);
1031
+ }
1032
+ }
1033
+ } catch {
1034
+ }
1035
+ return tapDevices;
1036
+ }
1037
+ async function defaultCheckTapExists(tapDevice) {
1038
+ try {
1039
+ await execAsync3(`ip link show ${tapDevice} 2>/dev/null`);
1040
+ return true;
1041
+ } catch {
1042
+ return false;
1043
+ }
1044
+ }
1045
+ function isProcessRunning(pid) {
1046
+ if (!Number.isInteger(pid) || pid <= 0) {
1047
+ return false;
1048
+ }
1049
+ try {
1050
+ process.kill(pid, 0);
1051
+ return true;
1052
+ } catch (err) {
1053
+ return err.code === "EPERM";
1054
+ }
1055
+ }
1056
+ var IPRegistry = class {
1057
+ config;
1058
+ constructor(config = {}) {
1059
+ const runDir = config.runDir ?? VM0_RUN_DIR;
1060
+ this.config = {
1061
+ runDir,
1062
+ lockPath: config.lockPath ?? path3.join(runDir, "ip-pool.lock.active"),
1063
+ registryPath: config.registryPath ?? path3.join(runDir, "ip-registry.json"),
1064
+ ensureRunDir: config.ensureRunDir ?? (() => defaultEnsureRunDir(runDir)),
1065
+ scanTapDevices: config.scanTapDevices ?? defaultScanTapDevices,
1066
+ checkTapExists: config.checkTapExists ?? defaultCheckTapExists
1067
+ };
1068
+ }
1069
+ // ============ File Lock ============
1070
+ /**
1071
+ * Execute a function while holding an exclusive lock on the IP pool
1072
+ */
1073
+ async withIPLock(fn) {
1074
+ await this.config.ensureRunDir();
1075
+ const startTime = Date.now();
1076
+ let lockAcquired = false;
1077
+ while (Date.now() - startTime < LOCK_TIMEOUT_MS) {
1078
+ try {
1079
+ fs3.writeFileSync(this.config.lockPath, process.pid.toString(), {
1080
+ flag: "wx"
1081
+ });
1082
+ lockAcquired = true;
1083
+ break;
1084
+ } catch {
1085
+ try {
1086
+ const pidStr = fs3.readFileSync(this.config.lockPath, "utf-8");
1087
+ const pid = parseInt(pidStr, 10);
1088
+ if (!isProcessRunning(pid)) {
1089
+ fs3.unlinkSync(this.config.lockPath);
1090
+ continue;
1091
+ }
1092
+ } catch {
1093
+ }
1094
+ await new Promise(
1095
+ (resolve) => setTimeout(resolve, LOCK_RETRY_INTERVAL_MS)
1096
+ );
1097
+ }
1098
+ }
1099
+ if (!lockAcquired) {
1100
+ throw new Error(
1101
+ `Failed to acquire IP pool lock after ${LOCK_TIMEOUT_MS}ms`
1102
+ );
1103
+ }
1104
+ try {
1105
+ return await fn();
1106
+ } finally {
1107
+ try {
1108
+ fs3.unlinkSync(this.config.lockPath);
1109
+ } catch {
1110
+ }
1111
+ }
1112
+ }
1113
+ // ============ Registry CRUD ============
1114
+ /**
1115
+ * Read the IP registry from file
1116
+ */
1117
+ readRegistry() {
1118
+ try {
1119
+ if (fs3.existsSync(this.config.registryPath)) {
1120
+ const content = fs3.readFileSync(this.config.registryPath, "utf-8");
1121
+ return JSON.parse(content);
1122
+ }
1123
+ } catch {
1124
+ }
1125
+ return { allocations: {} };
1126
+ }
1127
+ /**
1128
+ * Write the IP registry to file
1129
+ */
1130
+ writeRegistry(registry) {
1131
+ fs3.writeFileSync(
1132
+ this.config.registryPath,
1133
+ JSON.stringify(registry, null, 2)
1134
+ );
1135
+ }
1136
+ /**
1137
+ * Find the first available IP in the range
1138
+ */
1139
+ findFreeIP(registry) {
1140
+ const allocatedIPs = new Set(Object.keys(registry.allocations));
1141
+ for (let octet = IP_START; octet <= IP_END; octet++) {
1142
+ const ip = `${IP_PREFIX}${octet}`;
1143
+ if (!allocatedIPs.has(ip)) {
1144
+ return ip;
1145
+ }
1146
+ }
1147
+ return null;
1148
+ }
1149
+ // ============ IP Allocation ============
1150
+ /**
1151
+ * Allocate an IP address for a TAP device
1152
+ */
1153
+ async allocateIP(tapDevice) {
1154
+ return this.withIPLock(async () => {
1155
+ const registry = this.readRegistry();
1156
+ const ip = this.findFreeIP(registry);
1157
+ if (!ip) {
1158
+ throw new Error(
1159
+ "No free IP addresses available in pool (172.16.0.2-254)"
1160
+ );
1161
+ }
1162
+ registry.allocations[ip] = {
1163
+ runnerPid: process.pid,
1164
+ tapDevice,
1165
+ vmId: null
1166
+ };
1167
+ this.writeRegistry(registry);
1168
+ logger3.log(`Allocated IP ${ip} for TAP ${tapDevice}`);
1169
+ return ip;
1170
+ });
1171
+ }
1172
+ /**
1173
+ * Release an IP address back to the pool
1174
+ */
1175
+ async releaseIP(ip) {
1176
+ return this.withIPLock(async () => {
1177
+ const registry = this.readRegistry();
1178
+ if (registry.allocations[ip]) {
1179
+ const allocation = registry.allocations[ip];
1180
+ delete registry.allocations[ip];
1181
+ this.writeRegistry(registry);
1182
+ logger3.log(
1183
+ `Released IP ${ip} (was allocated to TAP ${allocation.tapDevice})`
1184
+ );
1185
+ }
1186
+ });
1187
+ }
1188
+ // ============ Cleanup ============
1189
+ /**
1190
+ * Clean up orphaned IP allocations
1191
+ *
1192
+ * An allocation is orphaned if:
1193
+ * 1. TAP device no longer exists on the system, OR
1194
+ * 2. Runner process that created it is no longer running
1195
+ *
1196
+ * @returns List of orphaned TAP devices that should be deleted by caller
1197
+ */
1198
+ async cleanupOrphanedIPs() {
1199
+ const activeTaps = await this.config.scanTapDevices();
1200
+ logger3.log(`Found ${activeTaps.size} TAP device(s) on system`);
1201
+ return this.withIPLock(async () => {
1202
+ const registry = this.readRegistry();
1203
+ const beforeCount = Object.keys(registry.allocations).length;
1204
+ if (beforeCount === 0) {
1205
+ return [];
1206
+ }
1207
+ const cleanedRegistry = { allocations: {} };
1208
+ const orphanedTaps = [];
1209
+ for (const [ip, allocation] of Object.entries(registry.allocations)) {
1210
+ const tapInScan = activeTaps.has(allocation.tapDevice);
1211
+ const runnerAlive = isProcessRunning(allocation.runnerPid);
1212
+ if (!runnerAlive) {
1213
+ logger3.log(
1214
+ `Removing orphaned IP ${ip} (runner PID ${allocation.runnerPid} not running)`
1215
+ );
1216
+ if (tapInScan) {
1217
+ orphanedTaps.push(allocation.tapDevice);
1218
+ }
1219
+ continue;
1220
+ }
1221
+ if (tapInScan) {
1222
+ cleanedRegistry.allocations[ip] = allocation;
1223
+ } else {
1224
+ const exists = await this.config.checkTapExists(allocation.tapDevice);
1225
+ if (exists) {
1226
+ cleanedRegistry.allocations[ip] = allocation;
1227
+ } else {
1228
+ logger3.log(
1229
+ `Removing orphaned IP ${ip} (TAP ${allocation.tapDevice} not found)`
1230
+ );
1231
+ }
1232
+ }
1233
+ }
1234
+ const afterCount = Object.keys(cleanedRegistry.allocations).length;
1235
+ if (afterCount !== beforeCount) {
1236
+ this.writeRegistry(cleanedRegistry);
1237
+ logger3.log(`Cleaned up ${beforeCount - afterCount} orphaned IP(s)`);
1238
+ }
1239
+ return orphanedTaps;
1240
+ });
1241
+ }
1242
+ // ============ VM ID Tracking ============
1243
+ /**
1244
+ * Assign a vmId to an IP allocation (called when VM acquires the pair)
1245
+ */
1246
+ async assignVmIdToIP(ip, vmId) {
1247
+ return this.withIPLock(async () => {
1248
+ const registry = this.readRegistry();
1249
+ if (registry.allocations[ip]) {
1250
+ registry.allocations[ip].vmId = vmId;
1251
+ this.writeRegistry(registry);
1252
+ }
1253
+ });
1254
+ }
1255
+ /**
1256
+ * Clear vmId from an IP allocation (called when pair is returned to pool)
1257
+ * Only clears if the current vmId matches expectedVmId to prevent race conditions
1258
+ * where a new VM's vmId could be cleared by the previous VM's release.
1259
+ */
1260
+ async clearVmIdFromIP(ip, expectedVmId) {
1261
+ return this.withIPLock(async () => {
1262
+ const registry = this.readRegistry();
1263
+ if (registry.allocations[ip] && registry.allocations[ip].vmId === expectedVmId) {
1264
+ registry.allocations[ip].vmId = null;
1265
+ this.writeRegistry(registry);
1266
+ }
1267
+ });
1268
+ }
1269
+ // ============ Diagnostic Functions ============
1270
+ /**
1271
+ * Get all current IP allocations (for diagnostic purposes)
1272
+ * Used by the doctor command to display allocated IPs.
1273
+ */
1274
+ getAllocations() {
1275
+ const registry = this.readRegistry();
1276
+ return new Map(Object.entries(registry.allocations));
1277
+ }
1278
+ /**
1279
+ * Get IP allocation for a specific VM ID (for diagnostic purposes)
1280
+ */
1281
+ getIPForVm(vmId) {
1282
+ const registry = this.readRegistry();
1283
+ for (const [ip, allocation] of Object.entries(registry.allocations)) {
1284
+ if (allocation.vmId === vmId) {
1285
+ return ip;
1286
+ }
1287
+ }
1288
+ return void 0;
1289
+ }
1290
+ };
1291
+ var globalRegistry = null;
1292
+ function getRegistry() {
1293
+ if (!globalRegistry) {
1294
+ globalRegistry = new IPRegistry();
1295
+ }
1296
+ return globalRegistry;
1297
+ }
1298
+ async function allocateIP(tapDevice) {
1299
+ return getRegistry().allocateIP(tapDevice);
1300
+ }
1301
+ async function releaseIP(ip) {
1302
+ return getRegistry().releaseIP(ip);
1303
+ }
1304
+ async function cleanupOrphanedIPs() {
1305
+ return getRegistry().cleanupOrphanedIPs();
1306
+ }
1307
+ async function assignVmIdToIP(ip, vmId) {
1308
+ return getRegistry().assignVmIdToIP(ip, vmId);
1309
+ }
1310
+ async function clearVmIdFromIP(ip, expectedVmId) {
1311
+ return getRegistry().clearVmIdFromIP(ip, expectedVmId);
1312
+ }
1313
+ function getAllocations() {
1314
+ return getRegistry().getAllocations();
1315
+ }
1316
+ function getIPForVm(vmId) {
1317
+ return getRegistry().getIPForVm(vmId);
1318
+ }
1319
+
1320
+ // src/lib/firecracker/tap-pool.ts
1321
+ var execAsync4 = promisify4(exec4);
1322
+ var logger4 = createLogger("TapPool");
1323
+ function generateTapPrefix(name) {
1324
+ const hash = createHash("md5").update(name).digest("hex").substring(0, 8);
1325
+ return `vm0${hash}`;
1109
1326
  }
1110
- var OverlayPool = class {
1327
+ async function execCommand2(cmd) {
1328
+ const fullCmd = `sudo ${cmd}`;
1329
+ const { stdout } = await execAsync4(fullCmd);
1330
+ return stdout.trim();
1331
+ }
1332
+ async function defaultCreateTap(name) {
1333
+ await execCommand2(`ip tuntap add ${name} mode tap`);
1334
+ await execCommand2(`ip link set ${name} master ${BRIDGE_NAME}`);
1335
+ await execCommand2(`ip link set ${name} up`);
1336
+ }
1337
+ async function defaultDeleteTap(name) {
1338
+ await execCommand2(`ip link delete ${name}`);
1339
+ }
1340
+ async function defaultSetMac(tap, mac) {
1341
+ await execCommand2(`ip link set dev ${tap} address ${mac}`);
1342
+ }
1343
+ async function clearArpEntry(ip) {
1344
+ try {
1345
+ await execCommand2(`ip neigh del ${ip} dev ${BRIDGE_NAME}`);
1346
+ } catch {
1347
+ }
1348
+ }
1349
+ var TapPool = class {
1111
1350
  initialized = false;
1112
1351
  queue = [];
1113
1352
  replenishing = false;
1353
+ nextIndex = 0;
1354
+ prefix;
1114
1355
  config;
1115
1356
  constructor(config) {
1357
+ this.prefix = generateTapPrefix(config.name);
1116
1358
  this.config = {
1359
+ name: config.name,
1117
1360
  size: config.size,
1118
1361
  replenishThreshold: config.replenishThreshold,
1119
- poolDir: config.poolDir,
1120
- createFile: config.createFile ?? defaultCreateFile
1362
+ createTap: config.createTap ?? defaultCreateTap,
1363
+ deleteTap: config.deleteTap ?? defaultDeleteTap,
1364
+ setMac: config.setMac ?? defaultSetMac
1121
1365
  };
1122
1366
  }
1123
1367
  /**
1124
- * Generate unique file name using UUID
1368
+ * Generate TAP device name
1369
+ * Format: {prefix}{index} (e.g., vm01a2b3c4d000)
1125
1370
  */
1126
- generateFileName() {
1127
- return `overlay-${randomUUID()}.ext4`;
1371
+ generateTapName(index) {
1372
+ return `${this.prefix}${index.toString().padStart(3, "0")}`;
1128
1373
  }
1129
1374
  /**
1130
- * Ensure the pool directory exists
1375
+ * Check if a TAP name belongs to this pool instance
1131
1376
  */
1132
- async ensurePoolDir() {
1133
- const parentDir = path2.dirname(this.config.poolDir);
1134
- if (!fs3.existsSync(parentDir)) {
1135
- await execAsync3(`sudo mkdir -p ${parentDir}`);
1136
- await execAsync3(`sudo chmod 777 ${parentDir}`);
1137
- }
1138
- if (!fs3.existsSync(this.config.poolDir)) {
1139
- fs3.mkdirSync(this.config.poolDir, { recursive: true });
1140
- }
1377
+ isOwnTap(name) {
1378
+ return name.startsWith(this.prefix);
1141
1379
  }
1142
1380
  /**
1143
- * Scan pool directory for overlay files
1381
+ * Create a {TAP, IP} pair
1144
1382
  */
1145
- scanPoolDir() {
1146
- if (!fs3.existsSync(this.config.poolDir)) {
1147
- return [];
1383
+ async createPair() {
1384
+ const tapDevice = this.generateTapName(this.nextIndex++);
1385
+ await this.config.createTap(tapDevice);
1386
+ let guestIp;
1387
+ try {
1388
+ guestIp = await allocateIP(tapDevice);
1389
+ } catch (err) {
1390
+ await this.config.deleteTap(tapDevice).catch(() => {
1391
+ });
1392
+ throw err;
1148
1393
  }
1149
- return fs3.readdirSync(this.config.poolDir).filter((f) => f.startsWith("overlay-") && f.endsWith(".ext4")).map((f) => path2.join(this.config.poolDir, f));
1394
+ return { tapDevice, guestIp };
1150
1395
  }
1151
1396
  /**
1152
1397
  * Replenish the pool in background
@@ -1160,135 +1405,267 @@ var OverlayPool = class {
1160
1405
  return;
1161
1406
  }
1162
1407
  this.replenishing = true;
1163
- logger3.log(`Replenishing pool: creating ${needed} overlay(s)...`);
1408
+ logger4.log(`Replenishing pool: creating up to ${needed} pair(s)...`);
1164
1409
  try {
1165
- const promises = [];
1166
1410
  for (let i = 0; i < needed; i++) {
1167
- const filePath = path2.join(
1168
- this.config.poolDir,
1169
- this.generateFileName()
1170
- );
1171
- promises.push(
1172
- this.config.createFile(filePath).then(() => {
1173
- this.queue.push(filePath);
1174
- })
1175
- );
1411
+ if (!this.initialized) {
1412
+ logger4.log("Pool shutdown detected, stopping replenish");
1413
+ break;
1414
+ }
1415
+ if (this.queue.length >= this.config.size) {
1416
+ break;
1417
+ }
1418
+ try {
1419
+ const pair = await this.createPair();
1420
+ if (!this.initialized) {
1421
+ await releaseIP(pair.guestIp).catch(() => {
1422
+ });
1423
+ await this.config.deleteTap(pair.tapDevice).catch(() => {
1424
+ });
1425
+ logger4.log("Pool shutdown detected, cleaned up in-flight pair");
1426
+ break;
1427
+ }
1428
+ this.queue.push(pair);
1429
+ } catch (err) {
1430
+ logger4.error(
1431
+ `Failed to create pair: ${err instanceof Error ? err.message : "Unknown"}`
1432
+ );
1433
+ }
1176
1434
  }
1177
- await Promise.all(promises);
1178
- logger3.log(`Pool replenished: ${this.queue.length} available`);
1179
- } catch (err) {
1180
- logger3.error(
1181
- `Replenish failed: ${err instanceof Error ? err.message : "Unknown"}`
1182
- );
1435
+ logger4.log(`Pool replenished: ${this.queue.length} available`);
1183
1436
  } finally {
1184
1437
  this.replenishing = false;
1185
1438
  }
1186
1439
  }
1187
1440
  /**
1188
- * Initialize the overlay pool
1441
+ * Scan for orphaned TAP devices from previous runs (matching this pool's prefix)
1442
+ */
1443
+ async scanOrphanedTaps() {
1444
+ try {
1445
+ const { stdout } = await execAsync4(
1446
+ `ip -o link show type tuntap 2>/dev/null || true`
1447
+ );
1448
+ const orphaned = [];
1449
+ const lines = stdout.split("\n");
1450
+ for (const line of lines) {
1451
+ const match = line.match(/^\d+:\s+([a-z0-9]+):/);
1452
+ if (match && match[1] && this.isOwnTap(match[1])) {
1453
+ orphaned.push(match[1]);
1454
+ }
1455
+ }
1456
+ return orphaned;
1457
+ } catch {
1458
+ return [];
1459
+ }
1460
+ }
1461
+ /**
1462
+ * Initialize the TAP pool
1189
1463
  */
1190
1464
  async init() {
1191
1465
  this.queue = [];
1192
- logger3.log(
1193
- `Initializing overlay pool (size=${this.config.size}, threshold=${this.config.replenishThreshold})...`
1466
+ this.nextIndex = 0;
1467
+ logger4.log(
1468
+ `Initializing TAP pool (size=${this.config.size}, threshold=${this.config.replenishThreshold})...`
1194
1469
  );
1195
- await this.ensurePoolDir();
1196
- const existing = this.scanPoolDir();
1197
- if (existing.length > 0) {
1198
- logger3.log(`Cleaning up ${existing.length} stale overlay(s)`);
1199
- for (const file of existing) {
1200
- fs3.unlinkSync(file);
1470
+ const orphanedTaps = await this.scanOrphanedTaps();
1471
+ if (orphanedTaps.length > 0) {
1472
+ logger4.log(`Cleaning up ${orphanedTaps.length} orphaned TAP(s)`);
1473
+ for (const tap of orphanedTaps) {
1474
+ try {
1475
+ await execCommand2(`ip link delete ${tap}`);
1476
+ } catch {
1477
+ }
1478
+ }
1479
+ }
1480
+ const orphanedTapsFromRegistry = await cleanupOrphanedIPs();
1481
+ for (const tap of orphanedTapsFromRegistry) {
1482
+ try {
1483
+ await execCommand2(`ip link delete ${tap}`);
1484
+ logger4.log(`Deleted orphaned TAP ${tap} (runner dead)`);
1485
+ } catch {
1201
1486
  }
1202
1487
  }
1203
1488
  this.initialized = true;
1204
1489
  await this.replenish();
1205
- logger3.log("Overlay pool initialized");
1490
+ logger4.log("TAP pool initialized");
1206
1491
  }
1207
1492
  /**
1208
- * Acquire an overlay file from the pool
1493
+ * Acquire a {TAP, IP} pair from the pool
1209
1494
  *
1210
- * Returns the file path. Caller owns the file and must delete it when done.
1495
+ * Returns VMNetworkConfig with TAP device, IP, and MAC.
1211
1496
  * Falls back to on-demand creation if pool is exhausted.
1212
1497
  */
1213
- async acquire() {
1214
- if (!this.initialized) {
1215
- throw new Error("Overlay pool not initialized");
1216
- }
1217
- const filePath = this.queue.shift();
1218
- if (filePath) {
1219
- logger3.log(`Acquired overlay from pool (${this.queue.length} remaining)`);
1498
+ async acquire(vmId) {
1499
+ let resource;
1500
+ let fromPool;
1501
+ const pooled = this.queue.shift();
1502
+ if (pooled) {
1503
+ resource = pooled;
1504
+ fromPool = true;
1505
+ logger4.log(`Acquired pair from pool (${this.queue.length} remaining)`);
1220
1506
  if (this.queue.length < this.config.replenishThreshold) {
1221
1507
  this.replenish().catch((err) => {
1222
- logger3.error(
1508
+ logger4.error(
1509
+ `Background replenish failed: ${err instanceof Error ? err.message : "Unknown"}`
1510
+ );
1511
+ });
1512
+ }
1513
+ } else {
1514
+ logger4.log("Pool exhausted, creating pair on-demand");
1515
+ resource = await this.createPair();
1516
+ fromPool = false;
1517
+ if (this.config.replenishThreshold > 0) {
1518
+ this.replenish().catch((err) => {
1519
+ logger4.error(
1223
1520
  `Background replenish failed: ${err instanceof Error ? err.message : "Unknown"}`
1224
1521
  );
1225
1522
  });
1226
1523
  }
1227
- return filePath;
1228
1524
  }
1229
- logger3.log("Pool exhausted, creating overlay on-demand");
1230
- const newPath = path2.join(this.config.poolDir, this.generateFileName());
1231
- await this.config.createFile(newPath);
1232
- return newPath;
1525
+ const guestMac = generateMacAddress(vmId);
1526
+ try {
1527
+ await this.config.setMac(resource.tapDevice, guestMac);
1528
+ } catch (err) {
1529
+ if (fromPool) {
1530
+ this.queue.push(resource);
1531
+ logger4.log(
1532
+ `Returned pair to pool after MAC set failure: ${resource.tapDevice}`
1533
+ );
1534
+ } else {
1535
+ await releaseIP(resource.guestIp).catch(() => {
1536
+ });
1537
+ await this.config.deleteTap(resource.tapDevice).catch(() => {
1538
+ });
1539
+ }
1540
+ throw err;
1541
+ }
1542
+ await clearArpEntry(resource.guestIp);
1543
+ try {
1544
+ await assignVmIdToIP(resource.guestIp, vmId);
1545
+ } catch (err) {
1546
+ logger4.error(
1547
+ `Failed to assign vmId to IP registry: ${err instanceof Error ? err.message : "Unknown"}`
1548
+ );
1549
+ }
1550
+ logger4.log(
1551
+ `Acquired: TAP ${resource.tapDevice}, MAC ${guestMac}, IP ${resource.guestIp}`
1552
+ );
1553
+ return {
1554
+ tapDevice: resource.tapDevice,
1555
+ guestMac,
1556
+ guestIp: resource.guestIp,
1557
+ gatewayIp: BRIDGE_IP,
1558
+ netmask: BRIDGE_NETMASK
1559
+ };
1233
1560
  }
1234
1561
  /**
1235
- * Clean up the overlay pool
1562
+ * Release a {TAP, IP} pair back to the pool
1563
+ * @param vmId The VM ID that is releasing this pair (for registry cleanup)
1236
1564
  */
1237
- cleanup() {
1565
+ async release(tapDevice, guestIp, vmId) {
1566
+ await clearArpEntry(guestIp);
1238
1567
  if (!this.initialized) {
1568
+ await releaseIP(guestIp).catch(() => {
1569
+ });
1570
+ try {
1571
+ await this.config.deleteTap(tapDevice);
1572
+ logger4.log(`Pair deleted (pool shutdown): ${tapDevice}, ${guestIp}`);
1573
+ } catch (err) {
1574
+ logger4.log(
1575
+ `Failed to delete TAP ${tapDevice}: ${err instanceof Error ? err.message : "Unknown"}`
1576
+ );
1577
+ }
1239
1578
  return;
1240
1579
  }
1241
- logger3.log("Cleaning up overlay pool...");
1242
- for (const file of this.queue) {
1580
+ if (this.isOwnTap(tapDevice)) {
1581
+ const alreadyInQueue = this.queue.some((r) => r.tapDevice === tapDevice);
1582
+ if (alreadyInQueue) {
1583
+ logger4.log(
1584
+ `Pair ${tapDevice} already in pool, ignoring duplicate release`
1585
+ );
1586
+ return;
1587
+ }
1588
+ this.queue.push({ tapDevice, guestIp });
1589
+ logger4.log(
1590
+ `Pair released: ${tapDevice}, ${guestIp} (${this.queue.length} available)`
1591
+ );
1243
1592
  try {
1244
- fs3.unlinkSync(file);
1593
+ await clearVmIdFromIP(guestIp, vmId);
1245
1594
  } catch (err) {
1246
- logger3.log(
1247
- `Failed to delete ${file}: ${err instanceof Error ? err.message : "Unknown"}`
1595
+ logger4.error(
1596
+ `Failed to clear vmId from IP registry: ${err instanceof Error ? err.message : "Unknown"}`
1248
1597
  );
1249
1598
  }
1250
- }
1251
- this.queue = [];
1252
- for (const file of this.scanPoolDir()) {
1599
+ } else {
1600
+ await releaseIP(guestIp).catch(() => {
1601
+ });
1253
1602
  try {
1254
- fs3.unlinkSync(file);
1603
+ await this.config.deleteTap(tapDevice);
1604
+ logger4.log(`Non-pooled pair deleted: ${tapDevice}, ${guestIp}`);
1255
1605
  } catch (err) {
1256
- logger3.log(
1257
- `Failed to delete ${file}: ${err instanceof Error ? err.message : "Unknown"}`
1606
+ logger4.log(
1607
+ `Failed to delete non-pooled TAP ${tapDevice}: ${err instanceof Error ? err.message : "Unknown"}`
1258
1608
  );
1259
1609
  }
1260
1610
  }
1611
+ }
1612
+ /**
1613
+ * Clean up the TAP pool
1614
+ *
1615
+ * Note: This is a sync function for compatibility with process cleanup.
1616
+ * Resources are cleaned up asynchronously (fire-and-forget).
1617
+ * Any remaining resources will be cleaned up by init() on next startup.
1618
+ */
1619
+ cleanup() {
1620
+ if (!this.initialized) {
1621
+ return;
1622
+ }
1623
+ logger4.log(`Cleaning up TAP pool (${this.queue.length} pairs)...`);
1624
+ for (const { tapDevice, guestIp } of this.queue) {
1625
+ releaseIP(guestIp).catch(() => {
1626
+ });
1627
+ this.config.deleteTap(tapDevice).catch((err) => {
1628
+ logger4.log(
1629
+ `Failed to delete ${tapDevice}: ${err instanceof Error ? err.message : "Unknown"}`
1630
+ );
1631
+ });
1632
+ }
1633
+ this.queue = [];
1261
1634
  this.initialized = false;
1262
1635
  this.replenishing = false;
1263
- logger3.log("Overlay pool cleaned up");
1636
+ logger4.log("TAP pool cleanup initiated");
1264
1637
  }
1265
1638
  };
1266
- var overlayPool = null;
1267
- async function initOverlayPool(config) {
1268
- if (overlayPool) {
1269
- overlayPool.cleanup();
1639
+ var tapPool = null;
1640
+ async function initTapPool(config) {
1641
+ if (tapPool) {
1642
+ tapPool.cleanup();
1643
+ }
1644
+ tapPool = new TapPool(config);
1645
+ await tapPool.init();
1646
+ return tapPool;
1647
+ }
1648
+ async function acquireTap(vmId) {
1649
+ if (!tapPool) {
1650
+ throw new Error("TAP pool not initialized. Call initTapPool() first.");
1270
1651
  }
1271
- overlayPool = new OverlayPool(config);
1272
- await overlayPool.init();
1273
- return overlayPool;
1652
+ return tapPool.acquire(vmId);
1274
1653
  }
1275
- function acquireOverlay() {
1276
- if (!overlayPool) {
1277
- throw new Error(
1278
- "Overlay pool not initialized. Call initOverlayPool() first."
1279
- );
1654
+ async function releaseTap(tapDevice, guestIp, vmId) {
1655
+ if (!tapPool) {
1656
+ throw new Error("TAP pool not initialized. Call initTapPool() first.");
1280
1657
  }
1281
- return overlayPool.acquire();
1658
+ return tapPool.release(tapDevice, guestIp, vmId);
1282
1659
  }
1283
- function cleanupOverlayPool() {
1284
- if (overlayPool) {
1285
- overlayPool.cleanup();
1286
- overlayPool = null;
1660
+ function cleanupTapPool() {
1661
+ if (tapPool) {
1662
+ tapPool.cleanup();
1663
+ tapPool = null;
1287
1664
  }
1288
1665
  }
1289
1666
 
1290
1667
  // src/lib/firecracker/vm.ts
1291
- var logger4 = createLogger("VM");
1668
+ var logger5 = createLogger("VM");
1292
1669
  var FirecrackerVM = class {
1293
1670
  config;
1294
1671
  process = null;
@@ -1304,8 +1681,8 @@ var FirecrackerVM = class {
1304
1681
  constructor(config) {
1305
1682
  this.config = config;
1306
1683
  this.workDir = config.workDir || tempPaths.vmWorkDir(config.vmId);
1307
- this.socketPath = path3.join(this.workDir, "firecracker.sock");
1308
- this.vsockPath = path3.join(this.workDir, "vsock.sock");
1684
+ this.socketPath = path4.join(this.workDir, "firecracker.sock");
1685
+ this.vsockPath = path4.join(this.workDir, "vsock.sock");
1309
1686
  }
1310
1687
  /**
1311
1688
  * Get current VM state
@@ -1350,19 +1727,13 @@ var FirecrackerVM = class {
1350
1727
  if (fs4.existsSync(this.socketPath)) {
1351
1728
  fs4.unlinkSync(this.socketPath);
1352
1729
  }
1353
- logger4.log(`[VM ${this.config.vmId}] Setting up overlay and network...`);
1354
- const setupOverlay = async () => {
1355
- this.vmOverlayPath = await acquireOverlay();
1356
- logger4.log(
1357
- `[VM ${this.config.vmId}] Overlay acquired: ${this.vmOverlayPath}`
1358
- );
1359
- };
1360
- const [, networkConfig] = await Promise.all([
1361
- setupOverlay(),
1362
- createTapDevice(this.config.vmId)
1363
- ]);
1364
- this.networkConfig = networkConfig;
1365
- logger4.log(`[VM ${this.config.vmId}] Starting Firecracker...`);
1730
+ logger5.log(`[VM ${this.config.vmId}] Acquiring overlay...`);
1731
+ this.vmOverlayPath = await acquireOverlay();
1732
+ logger5.log(`[VM ${this.config.vmId}] Overlay acquired`);
1733
+ logger5.log(`[VM ${this.config.vmId}] Acquiring TAP+IP...`);
1734
+ this.networkConfig = await acquireTap(this.config.vmId);
1735
+ logger5.log(`[VM ${this.config.vmId}] TAP+IP acquired`);
1736
+ logger5.log(`[VM ${this.config.vmId}] Starting Firecracker...`);
1366
1737
  this.process = spawn(
1367
1738
  this.config.firecrackerBinary,
1368
1739
  ["--api-sock", this.socketPath],
@@ -1373,11 +1744,11 @@ var FirecrackerVM = class {
1373
1744
  }
1374
1745
  );
1375
1746
  this.process.on("error", (err) => {
1376
- logger4.log(`[VM ${this.config.vmId}] Firecracker error: ${err}`);
1747
+ logger5.log(`[VM ${this.config.vmId}] Firecracker error: ${err}`);
1377
1748
  this.state = "error";
1378
1749
  });
1379
1750
  this.process.on("exit", (code, signal) => {
1380
- logger4.log(
1751
+ logger5.log(
1381
1752
  `[VM ${this.config.vmId}] Firecracker exited: code=${code}, signal=${signal}`
1382
1753
  );
1383
1754
  if (this.state !== "stopped") {
@@ -1390,7 +1761,7 @@ var FirecrackerVM = class {
1390
1761
  });
1391
1762
  stdoutRL.on("line", (line) => {
1392
1763
  if (line.trim()) {
1393
- logger4.log(`[VM ${this.config.vmId}] ${line}`);
1764
+ logger5.log(`[VM ${this.config.vmId}] ${line}`);
1394
1765
  }
1395
1766
  });
1396
1767
  }
@@ -1400,19 +1771,19 @@ var FirecrackerVM = class {
1400
1771
  });
1401
1772
  stderrRL.on("line", (line) => {
1402
1773
  if (line.trim()) {
1403
- logger4.log(`[VM ${this.config.vmId}] stderr: ${line}`);
1774
+ logger5.log(`[VM ${this.config.vmId}] stderr: ${line}`);
1404
1775
  }
1405
1776
  });
1406
1777
  }
1407
1778
  this.client = new FirecrackerClient(this.socketPath);
1408
- logger4.log(`[VM ${this.config.vmId}] Waiting for API...`);
1779
+ logger5.log(`[VM ${this.config.vmId}] Waiting for API...`);
1409
1780
  await this.client.waitUntilReady(1e4, 100);
1410
1781
  this.state = "configuring";
1411
1782
  await this.configure();
1412
- logger4.log(`[VM ${this.config.vmId}] Booting...`);
1783
+ logger5.log(`[VM ${this.config.vmId}] Booting...`);
1413
1784
  await this.client.start();
1414
1785
  this.state = "running";
1415
- logger4.log(
1786
+ logger5.log(
1416
1787
  `[VM ${this.config.vmId}] Running at ${this.networkConfig.guestIp}`
1417
1788
  );
1418
1789
  } catch (error) {
@@ -1428,7 +1799,7 @@ var FirecrackerVM = class {
1428
1799
  if (!this.client || !this.networkConfig || !this.vmOverlayPath) {
1429
1800
  throw new Error("VM not properly initialized");
1430
1801
  }
1431
- logger4.log(
1802
+ logger5.log(
1432
1803
  `[VM ${this.config.vmId}] Configuring: ${this.config.vcpus} vCPUs, ${this.config.memoryMb}MB RAM`
1433
1804
  );
1434
1805
  await this.client.setMachineConfig({
@@ -1437,12 +1808,12 @@ var FirecrackerVM = class {
1437
1808
  });
1438
1809
  const networkBootArgs = generateNetworkBootArgs(this.networkConfig);
1439
1810
  const bootArgs = `console=ttyS0 reboot=k panic=1 pci=off nomodules random.trust_cpu=on quiet loglevel=0 nokaslr audit=0 numa=off mitigations=off noresume init=/sbin/vm-init ${networkBootArgs}`;
1440
- logger4.log(`[VM ${this.config.vmId}] Boot args: ${bootArgs}`);
1811
+ logger5.log(`[VM ${this.config.vmId}] Boot args: ${bootArgs}`);
1441
1812
  await this.client.setBootSource({
1442
1813
  kernel_image_path: this.config.kernelPath,
1443
1814
  boot_args: bootArgs
1444
1815
  });
1445
- logger4.log(
1816
+ logger5.log(
1446
1817
  `[VM ${this.config.vmId}] Base rootfs: ${this.config.rootfsPath}`
1447
1818
  );
1448
1819
  await this.client.setDrive({
@@ -1451,14 +1822,14 @@ var FirecrackerVM = class {
1451
1822
  is_root_device: true,
1452
1823
  is_read_only: true
1453
1824
  });
1454
- logger4.log(`[VM ${this.config.vmId}] Overlay: ${this.vmOverlayPath}`);
1825
+ logger5.log(`[VM ${this.config.vmId}] Overlay: ${this.vmOverlayPath}`);
1455
1826
  await this.client.setDrive({
1456
1827
  drive_id: "overlay",
1457
1828
  path_on_host: this.vmOverlayPath,
1458
1829
  is_root_device: false,
1459
1830
  is_read_only: false
1460
1831
  });
1461
- logger4.log(
1832
+ logger5.log(
1462
1833
  `[VM ${this.config.vmId}] Network: ${this.networkConfig.tapDevice}`
1463
1834
  );
1464
1835
  await this.client.setNetworkInterface({
@@ -1466,7 +1837,7 @@ var FirecrackerVM = class {
1466
1837
  guest_mac: this.networkConfig.guestMac,
1467
1838
  host_dev_name: this.networkConfig.tapDevice
1468
1839
  });
1469
- logger4.log(`[VM ${this.config.vmId}] Vsock: ${this.vsockPath}`);
1840
+ logger5.log(`[VM ${this.config.vmId}] Vsock: ${this.vsockPath}`);
1470
1841
  await this.client.setVsock({
1471
1842
  vsock_id: "vsock0",
1472
1843
  guest_cid: 3,
@@ -1478,15 +1849,15 @@ var FirecrackerVM = class {
1478
1849
  */
1479
1850
  async stop() {
1480
1851
  if (this.state !== "running") {
1481
- logger4.log(`[VM ${this.config.vmId}] Not running, state: ${this.state}`);
1852
+ logger5.log(`[VM ${this.config.vmId}] Not running, state: ${this.state}`);
1482
1853
  return;
1483
1854
  }
1484
1855
  this.state = "stopping";
1485
- logger4.log(`[VM ${this.config.vmId}] Stopping...`);
1856
+ logger5.log(`[VM ${this.config.vmId}] Stopping...`);
1486
1857
  try {
1487
1858
  if (this.client) {
1488
1859
  await this.client.sendCtrlAltDel().catch((error) => {
1489
- logger4.log(
1860
+ logger5.log(
1490
1861
  `[VM ${this.config.vmId}] Graceful shutdown signal failed (VM may already be stopping): ${error instanceof Error ? error.message : error}`
1491
1862
  );
1492
1863
  });
@@ -1499,7 +1870,7 @@ var FirecrackerVM = class {
1499
1870
  * Force kill the VM
1500
1871
  */
1501
1872
  async kill() {
1502
- logger4.log(`[VM ${this.config.vmId}] Force killing...`);
1873
+ logger5.log(`[VM ${this.config.vmId}] Force killing...`);
1503
1874
  await this.cleanup();
1504
1875
  }
1505
1876
  /**
@@ -1513,10 +1884,17 @@ var FirecrackerVM = class {
1513
1884
  this.process = null;
1514
1885
  }
1515
1886
  if (this.networkConfig) {
1516
- await deleteTapDevice(
1517
- this.networkConfig.tapDevice,
1518
- this.networkConfig.guestIp
1519
- );
1887
+ try {
1888
+ await releaseTap(
1889
+ this.networkConfig.tapDevice,
1890
+ this.networkConfig.guestIp,
1891
+ this.config.vmId
1892
+ );
1893
+ } catch (err) {
1894
+ logger5.log(
1895
+ `[VM ${this.config.vmId}] Failed to release TAP: ${err instanceof Error ? err.message : "Unknown"}`
1896
+ );
1897
+ }
1520
1898
  this.networkConfig = null;
1521
1899
  }
1522
1900
  if (this.vmOverlayPath && fs4.existsSync(this.vmOverlayPath)) {
@@ -1528,7 +1906,7 @@ var FirecrackerVM = class {
1528
1906
  }
1529
1907
  this.client = null;
1530
1908
  this.state = "stopped";
1531
- logger4.log(`[VM ${this.config.vmId}] Stopped`);
1909
+ logger5.log(`[VM ${this.config.vmId}] Stopped`);
1532
1910
  }
1533
1911
  /**
1534
1912
  * Wait for the VM process to exit
@@ -1591,8 +1969,8 @@ function encodeExecPayload(command, timeoutMs) {
1591
1969
  cmdBuf.copy(payload, 8);
1592
1970
  return payload;
1593
1971
  }
1594
- function encodeWriteFilePayload(path8, content, sudo) {
1595
- const pathBuf = Buffer.from(path8, "utf-8");
1972
+ function encodeWriteFilePayload(path9, content, sudo) {
1973
+ const pathBuf = Buffer.from(path9, "utf-8");
1596
1974
  if (pathBuf.length > 65535) {
1597
1975
  throw new Error(`Path too long: ${pathBuf.length} bytes (max 65535)`);
1598
1976
  }
@@ -2449,8 +2827,8 @@ function getErrorMap() {
2449
2827
  return overrideErrorMap;
2450
2828
  }
2451
2829
  var makeIssue = (params) => {
2452
- const { data, path: path8, errorMaps, issueData } = params;
2453
- const fullPath = [...path8, ...issueData.path || []];
2830
+ const { data, path: path9, errorMaps, issueData } = params;
2831
+ const fullPath = [...path9, ...issueData.path || []];
2454
2832
  const fullIssue = {
2455
2833
  ...issueData,
2456
2834
  path: fullPath
@@ -2549,11 +2927,11 @@ var errorUtil;
2549
2927
  errorUtil2.toString = (message) => typeof message === "string" ? message : message === null || message === void 0 ? void 0 : message.message;
2550
2928
  })(errorUtil || (errorUtil = {}));
2551
2929
  var ParseInputLazyPath = class {
2552
- constructor(parent, value, path8, key) {
2930
+ constructor(parent, value, path9, key) {
2553
2931
  this._cachedPath = [];
2554
2932
  this.parent = parent;
2555
2933
  this.data = value;
2556
- this._path = path8;
2934
+ this._path = path9;
2557
2935
  this._key = key;
2558
2936
  }
2559
2937
  get path() {
@@ -8729,7 +9107,7 @@ var ENV_LOADER_PATH = "/usr/local/bin/vm0-agent/env-loader.mjs";
8729
9107
 
8730
9108
  // src/lib/proxy/vm-registry.ts
8731
9109
  import fs6 from "fs";
8732
- var logger5 = createLogger("VMRegistry");
9110
+ var logger6 = createLogger("VMRegistry");
8733
9111
  var DEFAULT_REGISTRY_PATH = tempPaths.vmRegistry;
8734
9112
  var VMRegistry = class {
8735
9113
  registryPath;
@@ -8776,7 +9154,7 @@ var VMRegistry = class {
8776
9154
  this.save();
8777
9155
  const firewallInfo = options?.firewallRules ? ` with ${options.firewallRules.length} firewall rules` : "";
8778
9156
  const mitmInfo = options?.mitmEnabled ? ", MITM enabled" : "";
8779
- logger5.log(
9157
+ logger6.log(
8780
9158
  `Registered VM ${vmIp} for run ${runId}${firewallInfo}${mitmInfo}`
8781
9159
  );
8782
9160
  }
@@ -8788,7 +9166,7 @@ var VMRegistry = class {
8788
9166
  const registration = this.data.vms[vmIp];
8789
9167
  delete this.data.vms[vmIp];
8790
9168
  this.save();
8791
- logger5.log(`Unregistered VM ${vmIp} (run ${registration.runId})`);
9169
+ logger6.log(`Unregistered VM ${vmIp} (run ${registration.runId})`);
8792
9170
  }
8793
9171
  }
8794
9172
  /**
@@ -8809,7 +9187,7 @@ var VMRegistry = class {
8809
9187
  clear() {
8810
9188
  this.data.vms = {};
8811
9189
  this.save();
8812
- logger5.log("Cleared all registrations");
9190
+ logger6.log("Cleared all registrations");
8813
9191
  }
8814
9192
  /**
8815
9193
  * Get the path to the registry file
@@ -8818,22 +9196,22 @@ var VMRegistry = class {
8818
9196
  return this.registryPath;
8819
9197
  }
8820
9198
  };
8821
- var globalRegistry = null;
9199
+ var globalRegistry2 = null;
8822
9200
  function getVMRegistry() {
8823
- if (!globalRegistry) {
8824
- globalRegistry = new VMRegistry();
9201
+ if (!globalRegistry2) {
9202
+ globalRegistry2 = new VMRegistry();
8825
9203
  }
8826
- return globalRegistry;
9204
+ return globalRegistry2;
8827
9205
  }
8828
9206
  function initVMRegistry(registryPath) {
8829
- globalRegistry = new VMRegistry(registryPath);
8830
- return globalRegistry;
9207
+ globalRegistry2 = new VMRegistry(registryPath);
9208
+ return globalRegistry2;
8831
9209
  }
8832
9210
 
8833
9211
  // src/lib/proxy/proxy-manager.ts
8834
9212
  import { spawn as spawn2 } from "child_process";
8835
9213
  import fs7 from "fs";
8836
- import path4 from "path";
9214
+ import path5 from "path";
8837
9215
 
8838
9216
  // src/lib/proxy/mitm-addon-script.ts
8839
9217
  var RUNNER_MITM_ADDON_SCRIPT = `#!/usr/bin/env python3
@@ -9319,7 +9697,7 @@ addons = [tls_clienthello, request, response]
9319
9697
  `;
9320
9698
 
9321
9699
  // src/lib/proxy/proxy-manager.ts
9322
- var logger6 = createLogger("ProxyManager");
9700
+ var logger7 = createLogger("ProxyManager");
9323
9701
  var DEFAULT_PROXY_OPTIONS = {
9324
9702
  port: 8080,
9325
9703
  registryPath: DEFAULT_REGISTRY_PATH
@@ -9329,7 +9707,7 @@ var ProxyManager = class {
9329
9707
  process = null;
9330
9708
  isRunning = false;
9331
9709
  constructor(config) {
9332
- const addonPath = path4.join(config.caDir, "mitm_addon.py");
9710
+ const addonPath = path5.join(config.caDir, "mitm_addon.py");
9333
9711
  this.config = {
9334
9712
  ...DEFAULT_PROXY_OPTIONS,
9335
9713
  ...config,
@@ -9356,14 +9734,14 @@ var ProxyManager = class {
9356
9734
  * Ensure the addon script exists at the configured path
9357
9735
  */
9358
9736
  ensureAddonScript() {
9359
- const addonDir = path4.dirname(this.config.addonPath);
9737
+ const addonDir = path5.dirname(this.config.addonPath);
9360
9738
  if (!fs7.existsSync(addonDir)) {
9361
9739
  fs7.mkdirSync(addonDir, { recursive: true });
9362
9740
  }
9363
9741
  fs7.writeFileSync(this.config.addonPath, RUNNER_MITM_ADDON_SCRIPT, {
9364
9742
  mode: 493
9365
9743
  });
9366
- logger6.log(`Addon script written to ${this.config.addonPath}`);
9744
+ logger7.log(`Addon script written to ${this.config.addonPath}`);
9367
9745
  }
9368
9746
  /**
9369
9747
  * Validate proxy configuration
@@ -9372,7 +9750,7 @@ var ProxyManager = class {
9372
9750
  if (!fs7.existsSync(this.config.caDir)) {
9373
9751
  throw new Error(`Proxy CA directory not found: ${this.config.caDir}`);
9374
9752
  }
9375
- const caCertPath = path4.join(this.config.caDir, "mitmproxy-ca.pem");
9753
+ const caCertPath = path5.join(this.config.caDir, "mitmproxy-ca.pem");
9376
9754
  if (!fs7.existsSync(caCertPath)) {
9377
9755
  throw new Error(`Proxy CA certificate not found: ${caCertPath}`);
9378
9756
  }
@@ -9383,7 +9761,7 @@ var ProxyManager = class {
9383
9761
  */
9384
9762
  async start() {
9385
9763
  if (this.isRunning) {
9386
- logger6.log("Proxy already running");
9764
+ logger7.log("Proxy already running");
9387
9765
  return;
9388
9766
  }
9389
9767
  const mitmproxyInstalled = await this.checkMitmproxyInstalled();
@@ -9394,11 +9772,11 @@ var ProxyManager = class {
9394
9772
  }
9395
9773
  this.validateConfig();
9396
9774
  getVMRegistry();
9397
- logger6.log("Starting mitmproxy...");
9398
- logger6.log(` Port: ${this.config.port}`);
9399
- logger6.log(` CA Dir: ${this.config.caDir}`);
9400
- logger6.log(` Addon: ${this.config.addonPath}`);
9401
- logger6.log(` Registry: ${this.config.registryPath}`);
9775
+ logger7.log("Starting mitmproxy...");
9776
+ logger7.log(` Port: ${this.config.port}`);
9777
+ logger7.log(` CA Dir: ${this.config.caDir}`);
9778
+ logger7.log(` Addon: ${this.config.addonPath}`);
9779
+ logger7.log(` Registry: ${this.config.registryPath}`);
9402
9780
  const args = [
9403
9781
  "--mode",
9404
9782
  "transparent",
@@ -9428,18 +9806,18 @@ var ProxyManager = class {
9428
9806
  mitmLogger.log(data.toString().trim());
9429
9807
  });
9430
9808
  this.process.on("close", (code) => {
9431
- logger6.log(`mitmproxy exited with code ${code}`);
9809
+ logger7.log(`mitmproxy exited with code ${code}`);
9432
9810
  this.isRunning = false;
9433
9811
  this.process = null;
9434
9812
  });
9435
9813
  this.process.on("error", (err) => {
9436
- logger6.error(`mitmproxy error: ${err.message}`);
9814
+ logger7.error(`mitmproxy error: ${err.message}`);
9437
9815
  this.isRunning = false;
9438
9816
  this.process = null;
9439
9817
  });
9440
9818
  await this.waitForReady();
9441
9819
  this.isRunning = true;
9442
- logger6.log("mitmproxy started successfully");
9820
+ logger7.log("mitmproxy started successfully");
9443
9821
  process.on("exit", () => {
9444
9822
  if (this.process && !this.process.killed) {
9445
9823
  this.process.kill("SIGKILL");
@@ -9470,24 +9848,24 @@ var ProxyManager = class {
9470
9848
  */
9471
9849
  async stop() {
9472
9850
  if (!this.process || !this.isRunning) {
9473
- logger6.log("Proxy not running");
9851
+ logger7.log("Proxy not running");
9474
9852
  return;
9475
9853
  }
9476
- logger6.log("Stopping mitmproxy...");
9854
+ logger7.log("Stopping mitmproxy...");
9477
9855
  return new Promise((resolve) => {
9478
9856
  if (!this.process) {
9479
9857
  resolve();
9480
9858
  return;
9481
9859
  }
9482
9860
  const timeout = setTimeout(() => {
9483
- logger6.log("Force killing mitmproxy...");
9861
+ logger7.log("Force killing mitmproxy...");
9484
9862
  this.process?.kill("SIGKILL");
9485
9863
  }, 5e3);
9486
9864
  this.process.on("close", () => {
9487
9865
  clearTimeout(timeout);
9488
9866
  this.isRunning = false;
9489
9867
  this.process = null;
9490
- logger6.log("mitmproxy stopped");
9868
+ logger7.log("mitmproxy stopped");
9491
9869
  resolve();
9492
9870
  });
9493
9871
  this.process.kill("SIGTERM");
@@ -9632,15 +10010,15 @@ async function withSandboxTiming(actionType, fn) {
9632
10010
  }
9633
10011
 
9634
10012
  // src/lib/vm-setup/vm-setup.ts
9635
- var logger7 = createLogger("VMSetup");
10013
+ var logger8 = createLogger("VMSetup");
9636
10014
  var VM_PROXY_CA_PATH = "/usr/local/share/ca-certificates/vm0-proxy-ca.crt";
9637
10015
  async function downloadStorages(guest, manifest) {
9638
10016
  const totalArchives = manifest.storages.filter((s) => s.archiveUrl).length + (manifest.artifact?.archiveUrl ? 1 : 0);
9639
10017
  if (totalArchives === 0) {
9640
- logger7.log(`No archives to download`);
10018
+ logger8.log(`No archives to download`);
9641
10019
  return;
9642
10020
  }
9643
- logger7.log(`Downloading ${totalArchives} archive(s)...`);
10021
+ logger8.log(`Downloading ${totalArchives} archive(s)...`);
9644
10022
  const manifestJson = JSON.stringify(manifest);
9645
10023
  await guest.writeFile("/tmp/storage-manifest.json", manifestJson);
9646
10024
  const result = await guest.exec(
@@ -9649,23 +10027,23 @@ async function downloadStorages(guest, manifest) {
9649
10027
  if (result.exitCode !== 0) {
9650
10028
  throw new Error(`Storage download failed: ${result.stderr}`);
9651
10029
  }
9652
- logger7.log(`Storage download completed`);
10030
+ logger8.log(`Storage download completed`);
9653
10031
  }
9654
10032
  async function restoreSessionHistory(guest, resumeSession, workingDir, cliAgentType) {
9655
10033
  const { sessionId, sessionHistory } = resumeSession;
9656
10034
  let sessionPath;
9657
10035
  if (cliAgentType === "codex") {
9658
- logger7.log(`Codex resume session will be handled by checkpoint.py`);
10036
+ logger8.log(`Codex resume session will be handled by checkpoint.py`);
9659
10037
  return;
9660
10038
  } else {
9661
10039
  const projectName = workingDir.replace(/^\//, "").replace(/\//g, "-");
9662
10040
  sessionPath = `/home/user/.claude/projects/-${projectName}/${sessionId}.jsonl`;
9663
10041
  }
9664
- logger7.log(`Restoring session history to ${sessionPath}`);
10042
+ logger8.log(`Restoring session history to ${sessionPath}`);
9665
10043
  const dirPath = sessionPath.substring(0, sessionPath.lastIndexOf("/"));
9666
10044
  await guest.execOrThrow(`mkdir -p "${dirPath}"`);
9667
10045
  await guest.writeFile(sessionPath, sessionHistory);
9668
- logger7.log(
10046
+ logger8.log(
9669
10047
  `Session history restored (${sessionHistory.split("\n").length} lines)`
9670
10048
  );
9671
10049
  }
@@ -9719,7 +10097,7 @@ function buildEnvironmentVariables(context, apiUrl) {
9719
10097
 
9720
10098
  // src/lib/network-logs/network-logs.ts
9721
10099
  import fs8 from "fs";
9722
- var logger8 = createLogger("NetworkLogs");
10100
+ var logger9 = createLogger("NetworkLogs");
9723
10101
  function getNetworkLogPath(runId) {
9724
10102
  return tempPaths.networkLog(runId);
9725
10103
  }
@@ -9733,7 +10111,7 @@ function readNetworkLogs(runId) {
9733
10111
  const lines = content.split("\n").filter((line) => line.trim());
9734
10112
  return lines.map((line) => JSON.parse(line));
9735
10113
  } catch (err) {
9736
- logger8.error(
10114
+ logger9.error(
9737
10115
  `Failed to read network logs: ${err instanceof Error ? err.message : "Unknown error"}`
9738
10116
  );
9739
10117
  return [];
@@ -9746,7 +10124,7 @@ function cleanupNetworkLogs(runId) {
9746
10124
  fs8.unlinkSync(logPath);
9747
10125
  }
9748
10126
  } catch (err) {
9749
- logger8.error(
10127
+ logger9.error(
9750
10128
  `Failed to cleanup network logs: ${err instanceof Error ? err.message : "Unknown error"}`
9751
10129
  );
9752
10130
  }
@@ -9754,10 +10132,10 @@ function cleanupNetworkLogs(runId) {
9754
10132
  async function uploadNetworkLogs(apiUrl, sandboxToken, runId) {
9755
10133
  const networkLogs = readNetworkLogs(runId);
9756
10134
  if (networkLogs.length === 0) {
9757
- logger8.log(`No network logs to upload for ${runId}`);
10135
+ logger9.log(`No network logs to upload for ${runId}`);
9758
10136
  return;
9759
10137
  }
9760
- logger8.log(
10138
+ logger9.log(
9761
10139
  `Uploading ${networkLogs.length} network log entries for ${runId}`
9762
10140
  );
9763
10141
  const headers = {
@@ -9778,15 +10156,15 @@ async function uploadNetworkLogs(apiUrl, sandboxToken, runId) {
9778
10156
  });
9779
10157
  if (!response.ok) {
9780
10158
  const errorText = await response.text();
9781
- logger8.error(`Failed to upload network logs: ${errorText}`);
10159
+ logger9.error(`Failed to upload network logs: ${errorText}`);
9782
10160
  return;
9783
10161
  }
9784
- logger8.log(`Network logs uploaded successfully for ${runId}`);
10162
+ logger9.log(`Network logs uploaded successfully for ${runId}`);
9785
10163
  cleanupNetworkLogs(runId);
9786
10164
  }
9787
10165
 
9788
10166
  // src/lib/executor.ts
9789
- var logger9 = createLogger("Executor");
10167
+ var logger10 = createLogger("Executor");
9790
10168
  function getVmIdFromRunId(runId) {
9791
10169
  return runId.split("-")[0] || runId.substring(0, 8);
9792
10170
  }
@@ -9806,9 +10184,9 @@ async function executeJob(context, config, options = {}) {
9806
10184
  const vmId = getVmIdFromRunId(context.runId);
9807
10185
  let vm = null;
9808
10186
  let guestIp = null;
9809
- logger9.log(`Starting job ${context.runId} in VM ${vmId}`);
10187
+ logger10.log(`Starting job ${context.runId} in VM ${vmId}`);
9810
10188
  try {
9811
- const workspacesDir = path5.join(process.cwd(), "workspaces");
10189
+ const workspacesDir = path6.join(process.cwd(), "workspaces");
9812
10190
  const vmConfig = {
9813
10191
  vmId,
9814
10192
  vcpus: config.sandbox.vcpu,
@@ -9816,30 +10194,30 @@ async function executeJob(context, config, options = {}) {
9816
10194
  kernelPath: config.firecracker.kernel,
9817
10195
  rootfsPath: config.firecracker.rootfs,
9818
10196
  firecrackerBinary: config.firecracker.binary,
9819
- workDir: path5.join(workspacesDir, `vm0-${vmId}`)
10197
+ workDir: path6.join(workspacesDir, `vm0-${vmId}`)
9820
10198
  };
9821
- logger9.log(`Creating VM ${vmId}...`);
10199
+ logger10.log(`Creating VM ${vmId}...`);
9822
10200
  vm = new FirecrackerVM(vmConfig);
9823
10201
  await withSandboxTiming("vm_create", () => vm.start());
9824
10202
  guestIp = vm.getGuestIp();
9825
10203
  if (!guestIp) {
9826
10204
  throw new Error("VM started but no IP address available");
9827
10205
  }
9828
- logger9.log(`VM ${vmId} started, guest IP: ${guestIp}`);
10206
+ logger10.log(`VM ${vmId} started, guest IP: ${guestIp}`);
9829
10207
  const vsockPath = vm.getVsockPath();
9830
10208
  const guest = new VsockClient(vsockPath);
9831
- logger9.log(`Using vsock for guest communication: ${vsockPath}`);
9832
- logger9.log(`Waiting for guest connection...`);
10209
+ logger10.log(`Using vsock for guest communication: ${vsockPath}`);
10210
+ logger10.log(`Waiting for guest connection...`);
9833
10211
  await withSandboxTiming(
9834
10212
  "guest_wait",
9835
10213
  () => guest.waitForGuestConnection(3e4)
9836
10214
  );
9837
- logger9.log(`Guest client ready`);
10215
+ logger10.log(`Guest client ready`);
9838
10216
  const firewallConfig = context.experimentalFirewall;
9839
10217
  if (firewallConfig?.enabled) {
9840
10218
  const mitmEnabled = firewallConfig.experimental_mitm ?? false;
9841
10219
  const sealSecretsEnabled = firewallConfig.experimental_seal_secrets ?? false;
9842
- logger9.log(
10220
+ logger10.log(
9843
10221
  `Setting up network security for VM ${guestIp} (mitm=${mitmEnabled}, sealSecrets=${sealSecretsEnabled})`
9844
10222
  );
9845
10223
  await withSandboxTiming("network_setup", async () => {
@@ -9874,7 +10252,7 @@ async function executeJob(context, config, options = {}) {
9874
10252
  }
9875
10253
  const envVars = buildEnvironmentVariables(context, config.server.url);
9876
10254
  const envJson = JSON.stringify(envVars);
9877
- logger9.log(
10255
+ logger10.log(
9878
10256
  `Writing env JSON (${envJson.length} bytes) to ${ENV_JSON_PATH}`
9879
10257
  );
9880
10258
  await guest.writeFile(ENV_JSON_PATH, envJson);
@@ -9883,14 +10261,14 @@ async function executeJob(context, config, options = {}) {
9883
10261
  const maxWaitMs = 2 * 60 * 60 * 1e3;
9884
10262
  let command;
9885
10263
  if (options.benchmarkMode) {
9886
- logger9.log(`Running command directly (benchmark mode)...`);
10264
+ logger10.log(`Running command directly (benchmark mode)...`);
9887
10265
  command = `${context.prompt} > ${systemLogFile} 2>&1`;
9888
10266
  } else {
9889
- logger9.log(`Running agent via env-loader...`);
10267
+ logger10.log(`Running agent via env-loader...`);
9890
10268
  command = `node ${ENV_LOADER_PATH} > ${systemLogFile} 2>&1`;
9891
10269
  }
9892
10270
  const { pid } = await guest.spawnAndWatch(command, maxWaitMs);
9893
- logger9.log(`Process started with pid=${pid}`);
10271
+ logger10.log(`Process started with pid=${pid}`);
9894
10272
  let exitCode = 1;
9895
10273
  let exitEvent;
9896
10274
  try {
@@ -9899,7 +10277,7 @@ async function executeJob(context, config, options = {}) {
9899
10277
  } catch {
9900
10278
  const durationMs2 = Date.now() - startTime;
9901
10279
  const duration2 = Math.round(durationMs2 / 1e3);
9902
- logger9.log(`Agent timed out after ${duration2}s`);
10280
+ logger10.log(`Agent timed out after ${duration2}s`);
9903
10281
  recordOperation({
9904
10282
  actionType: "agent_execute",
9905
10283
  durationMs: durationMs2,
@@ -9917,7 +10295,7 @@ async function executeJob(context, config, options = {}) {
9917
10295
  `dmesg | tail -20 | grep -iE "killed|oom" 2>/dev/null`
9918
10296
  );
9919
10297
  if (dmesgCheck.stdout.toLowerCase().includes("oom") || dmesgCheck.stdout.toLowerCase().includes("killed")) {
9920
- logger9.log(`OOM detected: ${dmesgCheck.stdout}`);
10298
+ logger10.log(`OOM detected: ${dmesgCheck.stdout}`);
9921
10299
  recordOperation({
9922
10300
  actionType: "agent_execute",
9923
10301
  durationMs,
@@ -9934,9 +10312,9 @@ async function executeJob(context, config, options = {}) {
9934
10312
  durationMs,
9935
10313
  success: exitCode === 0
9936
10314
  });
9937
- logger9.log(`Agent finished in ${duration}s with exit code ${exitCode}`);
10315
+ logger10.log(`Agent finished in ${duration}s with exit code ${exitCode}`);
9938
10316
  if (exitEvent.stderr) {
9939
- logger9.log(
10317
+ logger10.log(
9940
10318
  `Stderr (${exitEvent.stderr.length} chars): ${exitEvent.stderr.substring(0, 500)}`
9941
10319
  );
9942
10320
  }
@@ -9946,14 +10324,14 @@ async function executeJob(context, config, options = {}) {
9946
10324
  };
9947
10325
  } catch (error) {
9948
10326
  const errorMsg = error instanceof Error ? error.message : "Unknown error";
9949
- logger9.error(`Job ${context.runId} failed: ${errorMsg}`);
10327
+ logger10.error(`Job ${context.runId} failed: ${errorMsg}`);
9950
10328
  return {
9951
10329
  exitCode: 1,
9952
10330
  error: errorMsg
9953
10331
  };
9954
10332
  } finally {
9955
10333
  if (context.experimentalFirewall?.enabled && guestIp) {
9956
- logger9.log(`Cleaning up network security for VM ${guestIp}`);
10334
+ logger10.log(`Cleaning up network security for VM ${guestIp}`);
9957
10335
  getVMRegistry().unregister(guestIp);
9958
10336
  if (!options.benchmarkMode) {
9959
10337
  try {
@@ -9963,14 +10341,14 @@ async function executeJob(context, config, options = {}) {
9963
10341
  context.runId
9964
10342
  );
9965
10343
  } catch (err) {
9966
- logger9.error(
10344
+ logger10.error(
9967
10345
  `Failed to upload network logs: ${err instanceof Error ? err.message : "Unknown error"}`
9968
10346
  );
9969
10347
  }
9970
10348
  }
9971
10349
  }
9972
10350
  if (vm) {
9973
- logger9.log(`Cleaning up VM ${vmId}...`);
10351
+ logger10.log(`Cleaning up VM ${vmId}...`);
9974
10352
  await withSandboxTiming("cleanup", () => vm.kill());
9975
10353
  }
9976
10354
  await clearSandboxContext();
@@ -9979,7 +10357,7 @@ async function executeJob(context, config, options = {}) {
9979
10357
 
9980
10358
  // src/lib/runner/status.ts
9981
10359
  import { writeFileSync as writeFileSync2 } from "fs";
9982
- var logger10 = createLogger("Runner");
10360
+ var logger11 = createLogger("Runner");
9983
10361
  function writeStatusFile(statusFilePath, mode, activeRuns, startedAt) {
9984
10362
  const status = {
9985
10363
  mode,
@@ -9991,7 +10369,7 @@ function writeStatusFile(statusFilePath, mode, activeRuns, startedAt) {
9991
10369
  try {
9992
10370
  writeFileSync2(statusFilePath, JSON.stringify(status, null, 2));
9993
10371
  } catch (err) {
9994
- logger10.error(
10372
+ logger11.error(
9995
10373
  `Failed to write status file: ${err instanceof Error ? err.message : "Unknown error"}`
9996
10374
  );
9997
10375
  }
@@ -10008,25 +10386,25 @@ function createStatusUpdater(statusFilePath, state) {
10008
10386
  }
10009
10387
 
10010
10388
  // src/lib/runner/runner-lock.ts
10011
- import { exec as exec4 } from "child_process";
10389
+ import { exec as exec5 } from "child_process";
10012
10390
  import fs9 from "fs";
10013
- import path6 from "path";
10014
- import { promisify as promisify4 } from "util";
10015
- var execAsync4 = promisify4(exec4);
10016
- var logger11 = createLogger("RunnerLock");
10017
- var DEFAULT_PID_FILE = paths.runnerPid;
10391
+ import path7 from "path";
10392
+ import { promisify as promisify5 } from "util";
10393
+ var execAsync5 = promisify5(exec5);
10394
+ var logger12 = createLogger("RunnerLock");
10395
+ var DEFAULT_PID_FILE = runtimePaths.runnerPid;
10018
10396
  var currentPidFile = null;
10019
- async function ensureRunDir2(dirPath, skipSudo) {
10397
+ async function ensureRunDir(dirPath, skipSudo) {
10020
10398
  if (!fs9.existsSync(dirPath)) {
10021
10399
  if (skipSudo) {
10022
10400
  fs9.mkdirSync(dirPath, { recursive: true });
10023
10401
  } else {
10024
- await execAsync4(`sudo mkdir -p ${dirPath}`);
10025
- await execAsync4(`sudo chmod 777 ${dirPath}`);
10402
+ await execAsync5(`sudo mkdir -p ${dirPath}`);
10403
+ await execAsync5(`sudo chmod 777 ${dirPath}`);
10026
10404
  }
10027
10405
  }
10028
10406
  }
10029
- function isProcessRunning(pid) {
10407
+ function isProcessRunning2(pid) {
10030
10408
  try {
10031
10409
  process.kill(pid, 0);
10032
10410
  return true;
@@ -10040,64 +10418,68 @@ function isProcessRunning(pid) {
10040
10418
  async function acquireRunnerLock(options = {}) {
10041
10419
  const pidFile = options.pidFile ?? DEFAULT_PID_FILE;
10042
10420
  const skipSudo = options.skipSudo ?? false;
10043
- const runDir = path6.dirname(pidFile);
10044
- await ensureRunDir2(runDir, skipSudo);
10421
+ const runDir = path7.dirname(pidFile);
10422
+ await ensureRunDir(runDir, skipSudo);
10045
10423
  if (fs9.existsSync(pidFile)) {
10046
10424
  const pidStr = fs9.readFileSync(pidFile, "utf-8").trim();
10047
10425
  const pid = parseInt(pidStr, 10);
10048
- if (!isNaN(pid) && isProcessRunning(pid)) {
10049
- logger11.error(`Error: Another runner is already running (PID ${pid})`);
10050
- logger11.error(`If this is incorrect, remove ${pidFile} and try again.`);
10426
+ if (!isNaN(pid) && isProcessRunning2(pid)) {
10427
+ logger12.error(`Error: Another runner is already running (PID ${pid})`);
10428
+ logger12.error(`If this is incorrect, remove ${pidFile} and try again.`);
10051
10429
  process.exit(1);
10052
10430
  }
10053
10431
  if (isNaN(pid)) {
10054
- logger11.log("Cleaning up invalid PID file");
10432
+ logger12.log("Cleaning up invalid PID file");
10055
10433
  } else {
10056
- logger11.log(`Cleaning up stale PID file (PID ${pid} not running)`);
10434
+ logger12.log(`Cleaning up stale PID file (PID ${pid} not running)`);
10057
10435
  }
10058
10436
  fs9.unlinkSync(pidFile);
10059
10437
  }
10060
10438
  fs9.writeFileSync(pidFile, process.pid.toString());
10061
10439
  currentPidFile = pidFile;
10062
- logger11.log(`Runner lock acquired (PID ${process.pid})`);
10440
+ logger12.log(`Runner lock acquired (PID ${process.pid})`);
10063
10441
  }
10064
10442
  function releaseRunnerLock() {
10065
10443
  const pidFile = currentPidFile ?? DEFAULT_PID_FILE;
10066
10444
  if (fs9.existsSync(pidFile)) {
10067
10445
  fs9.unlinkSync(pidFile);
10068
- logger11.log("Runner lock released");
10446
+ logger12.log("Runner lock released");
10069
10447
  }
10070
10448
  currentPidFile = null;
10071
10449
  }
10072
10450
 
10073
10451
  // src/lib/runner/setup.ts
10074
- var logger12 = createLogger("Runner");
10452
+ var logger13 = createLogger("Runner");
10075
10453
  async function setupEnvironment(options) {
10076
10454
  const { config } = options;
10077
10455
  await acquireRunnerLock();
10078
10456
  const networkCheck = checkNetworkPrerequisites();
10079
10457
  if (!networkCheck.ok) {
10080
- logger12.error("Network prerequisites not met:");
10458
+ logger13.error("Network prerequisites not met:");
10081
10459
  for (const error of networkCheck.errors) {
10082
- logger12.error(` - ${error}`);
10460
+ logger13.error(` - ${error}`);
10083
10461
  }
10084
10462
  process.exit(1);
10085
10463
  }
10086
- logger12.log("Setting up network bridge...");
10464
+ logger13.log("Setting up network bridge...");
10087
10465
  await setupBridge();
10088
- logger12.log("Flushing bridge ARP cache...");
10466
+ logger13.log("Flushing bridge ARP cache...");
10089
10467
  await flushBridgeArpCache();
10090
- logger12.log("Cleaning up orphaned proxy rules...");
10468
+ logger13.log("Cleaning up orphaned proxy rules...");
10091
10469
  await cleanupOrphanedProxyRules(config.name);
10092
- logger12.log("Cleaning up orphaned IP allocations...");
10093
- await cleanupOrphanedAllocations();
10094
- logger12.log("Initializing overlay pool...");
10470
+ logger13.log("Initializing overlay pool...");
10095
10471
  await initOverlayPool({
10096
10472
  size: config.sandbox.max_concurrent + 2,
10097
10473
  replenishThreshold: config.sandbox.max_concurrent,
10098
10474
  poolDir: dataPaths.overlayPool(config.data_dir)
10099
10475
  });
10100
- logger12.log("Initializing network proxy...");
10476
+ logger13.log("Initializing TAP pool...");
10477
+ await initTapPool({
10478
+ name: config.name,
10479
+ size: config.sandbox.max_concurrent + 2,
10480
+ replenishThreshold: config.sandbox.max_concurrent
10481
+ });
10482
+ logger13.log("Initializing network proxy...");
10101
10483
  initVMRegistry();
10102
10484
  const proxyManager = initProxyManager({
10103
10485
  apiUrl: config.server.url,
@@ -10108,14 +10490,14 @@ async function setupEnvironment(options) {
10108
10490
  try {
10109
10491
  await proxyManager.start();
10110
10492
  proxyEnabled = true;
10111
- logger12.log("Network proxy initialized successfully");
10112
- logger12.log("Setting up CIDR proxy rules...");
10493
+ logger13.log("Network proxy initialized successfully");
10494
+ logger13.log("Setting up CIDR proxy rules...");
10113
10495
  await setupCIDRProxyRules(config.proxy.port);
10114
10496
  } catch (err) {
10115
- logger12.log(
10497
+ logger13.log(
10116
10498
  `Network proxy not available: ${err instanceof Error ? err.message : "Unknown error"}`
10117
10499
  );
10118
- logger12.log(
10500
+ logger13.log(
10119
10501
  "Jobs with experimentalFirewall enabled will run without network interception"
10120
10502
  );
10121
10503
  }
@@ -10125,22 +10507,22 @@ async function cleanupEnvironment(resources) {
10125
10507
  const errors = [];
10126
10508
  if (resources.proxyEnabled) {
10127
10509
  try {
10128
- logger12.log("Cleaning up CIDR proxy rules...");
10510
+ logger13.log("Cleaning up CIDR proxy rules...");
10129
10511
  await cleanupCIDRProxyRules(resources.proxyPort);
10130
10512
  } catch (err) {
10131
10513
  const error = err instanceof Error ? err : new Error(String(err));
10132
10514
  errors.push(error);
10133
- logger12.error(`Failed to cleanup CIDR proxy rules: ${error.message}`);
10515
+ logger13.error(`Failed to cleanup CIDR proxy rules: ${error.message}`);
10134
10516
  }
10135
10517
  }
10136
10518
  if (resources.proxyEnabled) {
10137
10519
  try {
10138
- logger12.log("Stopping network proxy...");
10520
+ logger13.log("Stopping network proxy...");
10139
10521
  await getProxyManager().stop();
10140
10522
  } catch (err) {
10141
10523
  const error = err instanceof Error ? err : new Error(String(err));
10142
10524
  errors.push(error);
10143
- logger12.error(`Failed to stop network proxy: ${error.message}`);
10525
+ logger13.error(`Failed to stop network proxy: ${error.message}`);
10144
10526
  }
10145
10527
  }
10146
10528
  try {
@@ -10148,39 +10530,46 @@ async function cleanupEnvironment(resources) {
10148
10530
  } catch (err) {
10149
10531
  const error = err instanceof Error ? err : new Error(String(err));
10150
10532
  errors.push(error);
10151
- logger12.error(`Failed to cleanup overlay pool: ${error.message}`);
10533
+ logger13.error(`Failed to cleanup overlay pool: ${error.message}`);
10534
+ }
10535
+ try {
10536
+ cleanupTapPool();
10537
+ } catch (err) {
10538
+ const error = err instanceof Error ? err : new Error(String(err));
10539
+ errors.push(error);
10540
+ logger13.error(`Failed to cleanup TAP pool: ${error.message}`);
10152
10541
  }
10153
10542
  try {
10154
10543
  releaseRunnerLock();
10155
10544
  } catch (err) {
10156
10545
  const error = err instanceof Error ? err : new Error(String(err));
10157
10546
  errors.push(error);
10158
- logger12.error(`Failed to release runner lock: ${error.message}`);
10547
+ logger13.error(`Failed to release runner lock: ${error.message}`);
10159
10548
  }
10160
10549
  if (errors.length > 0) {
10161
- logger12.error(`Cleanup completed with ${errors.length} error(s)`);
10550
+ logger13.error(`Cleanup completed with ${errors.length} error(s)`);
10162
10551
  }
10163
10552
  }
10164
10553
 
10165
10554
  // src/lib/runner/signals.ts
10166
- var logger13 = createLogger("Runner");
10555
+ var logger14 = createLogger("Runner");
10167
10556
  function setupSignalHandlers(state, handlers) {
10168
10557
  process.on("SIGINT", () => {
10169
- logger13.log("\nShutting down...");
10558
+ logger14.log("\nShutting down...");
10170
10559
  state.mode = "stopping";
10171
10560
  handlers.updateStatus();
10172
10561
  handlers.onShutdown();
10173
10562
  });
10174
10563
  process.on("SIGTERM", () => {
10175
- logger13.log("\nShutting down...");
10564
+ logger14.log("\nShutting down...");
10176
10565
  state.mode = "stopping";
10177
10566
  handlers.updateStatus();
10178
10567
  handlers.onShutdown();
10179
10568
  });
10180
10569
  process.on("SIGUSR1", () => {
10181
10570
  if (state.mode === "running") {
10182
- logger13.log("\n[Maintenance] Entering drain mode...");
10183
- logger13.log(
10571
+ logger14.log("\n[Maintenance] Entering drain mode...");
10572
+ logger14.log(
10184
10573
  `[Maintenance] Active jobs: ${state.activeRuns.size} (will wait for completion)`
10185
10574
  );
10186
10575
  state.mode = "draining";
@@ -10191,7 +10580,7 @@ function setupSignalHandlers(state, handlers) {
10191
10580
  }
10192
10581
 
10193
10582
  // src/lib/runner/runner.ts
10194
- var logger14 = createLogger("Runner");
10583
+ var logger15 = createLogger("Runner");
10195
10584
  var Runner = class _Runner {
10196
10585
  config;
10197
10586
  statusFilePath;
@@ -10230,7 +10619,7 @@ var Runner = class _Runner {
10230
10619
  onDrain: () => {
10231
10620
  this.pendingJobs.length = 0;
10232
10621
  if (this.state.activeRuns.size === 0) {
10233
- logger14.log("[Maintenance] No active jobs, exiting immediately");
10622
+ logger15.log("[Maintenance] No active jobs, exiting immediately");
10234
10623
  this.state.mode = "stopping";
10235
10624
  this.updateStatus();
10236
10625
  this.resolveShutdown?.();
@@ -10238,35 +10627,35 @@ var Runner = class _Runner {
10238
10627
  },
10239
10628
  updateStatus: this.updateStatus
10240
10629
  });
10241
- logger14.log(
10630
+ logger15.log(
10242
10631
  `Starting runner '${this.config.name}' for group '${this.config.group}'...`
10243
10632
  );
10244
- logger14.log(`Max concurrent jobs: ${this.config.sandbox.max_concurrent}`);
10245
- logger14.log(`Status file: ${this.statusFilePath}`);
10246
- logger14.log("Press Ctrl+C to stop");
10247
- logger14.log("");
10633
+ logger15.log(`Max concurrent jobs: ${this.config.sandbox.max_concurrent}`);
10634
+ logger15.log(`Status file: ${this.statusFilePath}`);
10635
+ logger15.log("Press Ctrl+C to stop");
10636
+ logger15.log("");
10248
10637
  this.updateStatus();
10249
- logger14.log("Checking for pending jobs...");
10638
+ logger15.log("Checking for pending jobs...");
10250
10639
  await this.pollFallback();
10251
- logger14.log("Connecting to realtime job notifications...");
10640
+ logger15.log("Connecting to realtime job notifications...");
10252
10641
  this.subscription = await subscribeToJobs(
10253
10642
  this.config.server,
10254
10643
  this.config.group,
10255
10644
  (notification) => {
10256
- logger14.log(`Ably notification: ${notification.runId}`);
10645
+ logger15.log(`Ably notification: ${notification.runId}`);
10257
10646
  this.processJob(notification.runId).catch(console.error);
10258
10647
  },
10259
10648
  (connectionState, reason) => {
10260
- logger14.log(
10649
+ logger15.log(
10261
10650
  `Ably connection: ${connectionState}${reason ? ` (${reason})` : ""}`
10262
10651
  );
10263
10652
  }
10264
10653
  );
10265
- logger14.log("Connected to realtime job notifications");
10654
+ logger15.log("Connected to realtime job notifications");
10266
10655
  this.pollInterval = setInterval(() => {
10267
10656
  this.pollFallback().catch(console.error);
10268
10657
  }, this.config.sandbox.poll_interval_ms);
10269
- logger14.log(
10658
+ logger15.log(
10270
10659
  `Polling fallback enabled (every ${this.config.sandbox.poll_interval_ms / 1e3}s)`
10271
10660
  );
10272
10661
  await shutdownPromise;
@@ -10277,7 +10666,7 @@ var Runner = class _Runner {
10277
10666
  this.subscription.cleanup();
10278
10667
  }
10279
10668
  if (this.state.jobPromises.size > 0) {
10280
- logger14.log(
10669
+ logger15.log(
10281
10670
  `Waiting for ${this.state.jobPromises.size} active job(s) to complete...`
10282
10671
  );
10283
10672
  await Promise.all(this.state.jobPromises);
@@ -10285,7 +10674,7 @@ var Runner = class _Runner {
10285
10674
  await cleanupEnvironment(this.resources);
10286
10675
  this.state.mode = "stopped";
10287
10676
  this.updateStatus();
10288
- logger14.log("Runner stopped");
10677
+ logger15.log("Runner stopped");
10289
10678
  process.exit(0);
10290
10679
  }
10291
10680
  /**
@@ -10302,11 +10691,11 @@ var Runner = class _Runner {
10302
10691
  () => pollForJob(this.config.server, this.config.group)
10303
10692
  );
10304
10693
  if (job) {
10305
- logger14.log(`Poll fallback found job: ${job.runId}`);
10694
+ logger15.log(`Poll fallback found job: ${job.runId}`);
10306
10695
  await this.processJob(job.runId);
10307
10696
  }
10308
10697
  } catch (error) {
10309
- logger14.error(
10698
+ logger15.error(
10310
10699
  `Poll fallback error: ${error instanceof Error ? error.message : "Unknown error"}`
10311
10700
  );
10312
10701
  }
@@ -10316,7 +10705,7 @@ var Runner = class _Runner {
10316
10705
  */
10317
10706
  async processJob(runId) {
10318
10707
  if (this.state.mode !== "running") {
10319
- logger14.log(`Not running (${this.state.mode}), ignoring job ${runId}`);
10708
+ logger15.log(`Not running (${this.state.mode}), ignoring job ${runId}`);
10320
10709
  return;
10321
10710
  }
10322
10711
  if (this.state.activeRuns.has(runId)) {
@@ -10324,10 +10713,10 @@ var Runner = class _Runner {
10324
10713
  }
10325
10714
  if (this.state.activeRuns.size >= this.config.sandbox.max_concurrent) {
10326
10715
  if (!this.pendingJobs.includes(runId) && this.pendingJobs.length < _Runner.MAX_PENDING_QUEUE_SIZE) {
10327
- logger14.log(`At capacity, queueing job ${runId}`);
10716
+ logger15.log(`At capacity, queueing job ${runId}`);
10328
10717
  this.pendingJobs.push(runId);
10329
10718
  } else if (this.pendingJobs.length >= _Runner.MAX_PENDING_QUEUE_SIZE) {
10330
- logger14.log(
10719
+ logger15.log(
10331
10720
  `Pending queue full (${_Runner.MAX_PENDING_QUEUE_SIZE}), dropping job ${runId}`
10332
10721
  );
10333
10722
  }
@@ -10338,11 +10727,11 @@ var Runner = class _Runner {
10338
10727
  "claim",
10339
10728
  () => claimJob(this.config.server, runId)
10340
10729
  );
10341
- logger14.log(`Claimed job: ${context.runId}`);
10730
+ logger15.log(`Claimed job: ${context.runId}`);
10342
10731
  this.state.activeRuns.add(context.runId);
10343
10732
  this.updateStatus();
10344
10733
  const jobPromise = this.executeJob(context).catch((error) => {
10345
- logger14.error(
10734
+ logger15.error(
10346
10735
  `Job ${context.runId} failed: ${error instanceof Error ? error.message : "Unknown error"}`
10347
10736
  );
10348
10737
  }).finally(() => {
@@ -10350,7 +10739,7 @@ var Runner = class _Runner {
10350
10739
  this.state.jobPromises.delete(jobPromise);
10351
10740
  this.updateStatus();
10352
10741
  if (this.state.mode === "draining" && this.state.activeRuns.size === 0) {
10353
- logger14.log("[Maintenance] All jobs completed, exiting");
10742
+ logger15.log("[Maintenance] All jobs completed, exiting");
10354
10743
  this.state.mode = "stopping";
10355
10744
  this.updateStatus();
10356
10745
  this.resolveShutdown?.();
@@ -10365,33 +10754,33 @@ var Runner = class _Runner {
10365
10754
  });
10366
10755
  this.state.jobPromises.add(jobPromise);
10367
10756
  } catch (error) {
10368
- logger14.log(
10757
+ logger15.log(
10369
10758
  `Could not claim job ${runId}: ${error instanceof Error ? error.message : "Unknown error"}`
10370
10759
  );
10371
10760
  }
10372
10761
  }
10373
10762
  async executeJob(context) {
10374
- logger14.log(` Executing job ${context.runId}...`);
10375
- logger14.log(` Prompt: ${context.prompt.substring(0, 100)}...`);
10376
- logger14.log(` Compose version: ${context.agentComposeVersionId}`);
10763
+ logger15.log(` Executing job ${context.runId}...`);
10764
+ logger15.log(` Prompt: ${context.prompt.substring(0, 100)}...`);
10765
+ logger15.log(` Compose version: ${context.agentComposeVersionId}`);
10377
10766
  try {
10378
10767
  const result = await executeJob(context, this.config);
10379
- logger14.log(
10768
+ logger15.log(
10380
10769
  ` Job ${context.runId} execution completed with exit code ${result.exitCode}`
10381
10770
  );
10382
10771
  if (result.exitCode !== 0 && result.error) {
10383
- logger14.error(` Job ${context.runId} failed: ${result.error}`);
10772
+ logger15.error(` Job ${context.runId} failed: ${result.error}`);
10384
10773
  }
10385
10774
  } catch (err) {
10386
10775
  const error = err instanceof Error ? err.message : "Unknown execution error";
10387
- logger14.error(` Job ${context.runId} execution failed: ${error}`);
10776
+ logger15.error(` Job ${context.runId} execution failed: ${error}`);
10388
10777
  const result = await completeJob(
10389
10778
  this.config.server.url,
10390
10779
  context,
10391
10780
  1,
10392
10781
  error
10393
10782
  );
10394
- logger14.log(` Job ${context.runId} reported as ${result.status}`);
10783
+ logger15.log(` Job ${context.runId} reported as ${result.status}`);
10395
10784
  }
10396
10785
  }
10397
10786
  };
@@ -10422,7 +10811,7 @@ import { dirname as dirname2, join as join2 } from "path";
10422
10811
 
10423
10812
  // src/lib/firecracker/process.ts
10424
10813
  import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync3 } from "fs";
10425
- import path7 from "path";
10814
+ import path8 from "path";
10426
10815
  function parseFirecrackerCmdline(cmdline) {
10427
10816
  const args = cmdline.split("\0");
10428
10817
  if (!args[0]?.includes("firecracker")) return null;
@@ -10455,7 +10844,7 @@ function findFirecrackerProcesses() {
10455
10844
  for (const entry of entries) {
10456
10845
  if (!/^\d+$/.test(entry)) continue;
10457
10846
  const pid = parseInt(entry, 10);
10458
- const cmdlinePath = path7.join(procDir, entry, "cmdline");
10847
+ const cmdlinePath = path8.join(procDir, entry, "cmdline");
10459
10848
  if (!existsSync3(cmdlinePath)) continue;
10460
10849
  try {
10461
10850
  const cmdline = readFileSync2(cmdlinePath, "utf-8");
@@ -10473,7 +10862,7 @@ function findProcessByVmId(vmId) {
10473
10862
  const processes = findFirecrackerProcesses();
10474
10863
  return processes.find((p) => p.vmId === vmId) || null;
10475
10864
  }
10476
- function isProcessRunning2(pid) {
10865
+ function isProcessRunning3(pid) {
10477
10866
  try {
10478
10867
  process.kill(pid, 0);
10479
10868
  return true;
@@ -10482,24 +10871,24 @@ function isProcessRunning2(pid) {
10482
10871
  }
10483
10872
  }
10484
10873
  async function killProcess(pid, timeoutMs = 5e3) {
10485
- if (!isProcessRunning2(pid)) return true;
10874
+ if (!isProcessRunning3(pid)) return true;
10486
10875
  try {
10487
10876
  process.kill(pid, "SIGTERM");
10488
10877
  } catch {
10489
- return !isProcessRunning2(pid);
10878
+ return !isProcessRunning3(pid);
10490
10879
  }
10491
10880
  const startTime = Date.now();
10492
10881
  while (Date.now() - startTime < timeoutMs) {
10493
- if (!isProcessRunning2(pid)) return true;
10882
+ if (!isProcessRunning3(pid)) return true;
10494
10883
  await new Promise((resolve) => setTimeout(resolve, 100));
10495
10884
  }
10496
- if (isProcessRunning2(pid)) {
10885
+ if (isProcessRunning3(pid)) {
10497
10886
  try {
10498
10887
  process.kill(pid, "SIGKILL");
10499
10888
  } catch {
10500
10889
  }
10501
10890
  }
10502
- return !isProcessRunning2(pid);
10891
+ return !isProcessRunning3(pid);
10503
10892
  }
10504
10893
  function findMitmproxyProcess() {
10505
10894
  const procDir = "/proc";
@@ -10512,7 +10901,7 @@ function findMitmproxyProcess() {
10512
10901
  for (const entry of entries) {
10513
10902
  if (!/^\d+$/.test(entry)) continue;
10514
10903
  const pid = parseInt(entry, 10);
10515
- const cmdlinePath = path7.join(procDir, entry, "cmdline");
10904
+ const cmdlinePath = path8.join(procDir, entry, "cmdline");
10516
10905
  if (!existsSync3(cmdlinePath)) continue;
10517
10906
  try {
10518
10907
  const cmdline = readFileSync2(cmdlinePath, "utf-8");
@@ -10574,11 +10963,11 @@ var doctorCommand = new Command2("doctor").description("Diagnose runner health,
10574
10963
  const warnings = [];
10575
10964
  const bridgeStatus = await checkBridgeStatus();
10576
10965
  if (bridgeStatus.exists) {
10577
- console.log(` \u2713 Bridge ${BRIDGE_NAME2} (${bridgeStatus.ip})`);
10966
+ console.log(` \u2713 Bridge ${BRIDGE_NAME} (${bridgeStatus.ip})`);
10578
10967
  } else {
10579
- console.log(` \u2717 Bridge ${BRIDGE_NAME2} not found`);
10968
+ console.log(` \u2717 Bridge ${BRIDGE_NAME} not found`);
10580
10969
  warnings.push({
10581
- message: `Network bridge ${BRIDGE_NAME2} does not exist`
10970
+ message: `Network bridge ${BRIDGE_NAME} does not exist`
10582
10971
  });
10583
10972
  }
10584
10973
  const proxyPort = config.proxy.port;
@@ -10601,7 +10990,6 @@ var doctorCommand = new Command2("doctor").description("Diagnose runner health,
10601
10990
  }
10602
10991
  console.log("");
10603
10992
  const processes = findFirecrackerProcesses();
10604
- const tapDevices = await listTapDevices();
10605
10993
  const workspaces = existsSync4(workspacesDir) ? readdirSync2(workspacesDir).filter((d) => d.startsWith("vm0-")) : [];
10606
10994
  const jobs = [];
10607
10995
  const statusVmIds = /* @__PURE__ */ new Set();
@@ -10624,9 +11012,11 @@ var doctorCommand = new Command2("doctor").description("Diagnose runner health,
10624
11012
  }
10625
11013
  const ipToVmIds = /* @__PURE__ */ new Map();
10626
11014
  for (const [ip, allocation] of allocations) {
10627
- const existing = ipToVmIds.get(ip) ?? [];
10628
- existing.push(allocation.vmId);
10629
- ipToVmIds.set(ip, existing);
11015
+ if (allocation.vmId) {
11016
+ const existing = ipToVmIds.get(ip) ?? [];
11017
+ existing.push(allocation.vmId);
11018
+ ipToVmIds.set(ip, existing);
11019
+ }
10630
11020
  }
10631
11021
  const maxConcurrent = config.sandbox.max_concurrent;
10632
11022
  console.log(`Runs (${jobs.length} active, max ${maxConcurrent}):`);
@@ -10674,14 +11064,6 @@ var doctorCommand = new Command2("doctor").description("Diagnose runner health,
10674
11064
  });
10675
11065
  }
10676
11066
  }
10677
- for (const tap of tapDevices) {
10678
- const vmId = tap.replace("tap", "");
10679
- if (!processVmIds.has(vmId) && !statusVmIds.has(vmId)) {
10680
- warnings.push({
10681
- message: `Orphan TAP device: ${tap} (no matching job or process)`
10682
- });
10683
- }
10684
- }
10685
11067
  for (const ws of workspaces) {
10686
11068
  const vmId = ws.replace("vm0-", "");
10687
11069
  if (!processVmIds.has(vmId) && !statusVmIds.has(vmId)) {
@@ -10746,7 +11128,7 @@ var killCommand = new Command3("kill").description("Force terminate a run and cl
10746
11128
  const { vmId, runId } = resolveRunId(runIdArg, statusFilePath);
10747
11129
  console.log(`Killing run ${vmId}...`);
10748
11130
  const proc = findProcessByVmId(vmId);
10749
- const tapDevice = `tap${vmId}`;
11131
+ const guestIp = getIPForVm(vmId);
10750
11132
  const workspaceDir = join3(workspacesDir, `vm0-${vmId}`);
10751
11133
  console.log("");
10752
11134
  console.log("Resources to clean up:");
@@ -10755,7 +11137,9 @@ var killCommand = new Command3("kill").description("Force terminate a run and cl
10755
11137
  } else {
10756
11138
  console.log(" - Firecracker process: not found");
10757
11139
  }
10758
- console.log(` - TAP device: ${tapDevice}`);
11140
+ if (guestIp) {
11141
+ console.log(` - IP address: ${guestIp} (TAP/IP released by runner)`);
11142
+ }
10759
11143
  console.log(` - Workspace: ${workspaceDir}`);
10760
11144
  if (runId) {
10761
11145
  console.log(` - status.json entry: ${runId.substring(0, 12)}...`);
@@ -10783,20 +11167,6 @@ var killCommand = new Command3("kill").description("Force terminate a run and cl
10783
11167
  message: "Not running"
10784
11168
  });
10785
11169
  }
10786
- try {
10787
- await deleteTapDevice(tapDevice);
10788
- results.push({
10789
- step: "TAP device",
10790
- success: true,
10791
- message: `${tapDevice} deleted`
10792
- });
10793
- } catch (error) {
10794
- results.push({
10795
- step: "TAP device",
10796
- success: false,
10797
- message: error instanceof Error ? error.message : "Unknown error"
10798
- });
10799
- }
10800
11170
  if (existsSync5(workspaceDir)) {
10801
11171
  try {
10802
11172
  rmSync(workspaceDir, { recursive: true, force: true });
@@ -10969,6 +11339,8 @@ var benchmarkCommand = new Command4("benchmark").description(
10969
11339
  ).argument("<prompt>", "The bash command to execute in the VM").option("--config <path>", "Config file path", "./runner.yaml").option("--working-dir <path>", "Working directory in VM", "/home/user").option("--agent-type <type>", "Agent type", "claude-code").action(async (prompt, options) => {
10970
11340
  const timer = new Timer();
10971
11341
  setGlobalLogger(timer.log.bind(timer));
11342
+ let exitCode = 1;
11343
+ let poolsInitialized = false;
10972
11344
  try {
10973
11345
  timer.log("Loading configuration...");
10974
11346
  const config = loadDebugConfig(options.config);
@@ -10984,12 +11356,14 @@ var benchmarkCommand = new Command4("benchmark").description(
10984
11356
  }
10985
11357
  timer.log("Setting up network bridge...");
10986
11358
  await setupBridge();
10987
- timer.log("Initializing overlay pool...");
11359
+ timer.log("Initializing pools...");
10988
11360
  await initOverlayPool({
10989
11361
  size: 2,
10990
11362
  replenishThreshold: 1,
10991
11363
  poolDir: dataPaths.overlayPool(config.data_dir)
10992
11364
  });
11365
+ await initTapPool({ name: config.name, size: 2, replenishThreshold: 1 });
11366
+ poolsInitialized = true;
10993
11367
  timer.log(`Executing command: ${prompt}`);
10994
11368
  const context = createBenchmarkContext(prompt, options);
10995
11369
  const result = await executeJob(context, config, {
@@ -11000,17 +11374,22 @@ var benchmarkCommand = new Command4("benchmark").description(
11000
11374
  timer.log(`Error: ${result.error}`);
11001
11375
  }
11002
11376
  timer.log(`Total time: ${timer.totalSeconds().toFixed(1)}s`);
11003
- process.exit(result.exitCode);
11377
+ exitCode = result.exitCode;
11004
11378
  } catch (error) {
11005
11379
  timer.log(
11006
11380
  `Error: ${error instanceof Error ? error.message : "Unknown error"}`
11007
11381
  );
11008
- process.exit(1);
11382
+ } finally {
11383
+ if (poolsInitialized) {
11384
+ cleanupTapPool();
11385
+ cleanupOverlayPool();
11386
+ }
11009
11387
  }
11388
+ process.exit(exitCode);
11010
11389
  });
11011
11390
 
11012
11391
  // src/index.ts
11013
- var version = true ? "3.7.3" : "0.1.0";
11392
+ var version = true ? "3.8.1" : "0.1.0";
11014
11393
  program.name("vm0-runner").version(version).description("Self-hosted runner for VM0 agents");
11015
11394
  program.addCommand(startCommand);
11016
11395
  program.addCommand(doctorCommand);