@vm0/runner 3.8.0 → 3.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +1063 -684
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -16,7 +16,7 @@ import yaml from "yaml";
|
|
|
16
16
|
import path from "path";
|
|
17
17
|
var VM0_RUN_DIR = "/var/run/vm0";
|
|
18
18
|
var VM0_TMP_PREFIX = "/tmp/vm0";
|
|
19
|
-
var
|
|
19
|
+
var runtimePaths = {
|
|
20
20
|
/** Runner PID file for single-instance lock */
|
|
21
21
|
runnerPid: path.join(VM0_RUN_DIR, "runner.pid"),
|
|
22
22
|
/** IP pool lock file */
|
|
@@ -303,12 +303,12 @@ async function subscribeToJobs(server, group, onJob, onConnectionChange) {
|
|
|
303
303
|
}
|
|
304
304
|
|
|
305
305
|
// src/lib/executor.ts
|
|
306
|
-
import
|
|
306
|
+
import path6 from "path";
|
|
307
307
|
|
|
308
308
|
// src/lib/firecracker/vm.ts
|
|
309
309
|
import { spawn } from "child_process";
|
|
310
310
|
import fs4 from "fs";
|
|
311
|
-
import
|
|
311
|
+
import path4 from "path";
|
|
312
312
|
import readline from "readline";
|
|
313
313
|
|
|
314
314
|
// src/lib/firecracker/client.ts
|
|
@@ -321,7 +321,7 @@ var FirecrackerClient = class {
|
|
|
321
321
|
/**
|
|
322
322
|
* Make HTTP request to Firecracker API
|
|
323
323
|
*/
|
|
324
|
-
async request(method,
|
|
324
|
+
async request(method, path9, body) {
|
|
325
325
|
return new Promise((resolve, reject) => {
|
|
326
326
|
const bodyStr = body !== void 0 ? JSON.stringify(body) : void 0;
|
|
327
327
|
const headers = {
|
|
@@ -334,11 +334,11 @@ var FirecrackerClient = class {
|
|
|
334
334
|
headers["Content-Length"] = Buffer.byteLength(bodyStr);
|
|
335
335
|
}
|
|
336
336
|
console.log(
|
|
337
|
-
`[FC API] ${method} ${
|
|
337
|
+
`[FC API] ${method} ${path9}${bodyStr ? ` (${Buffer.byteLength(bodyStr)} bytes)` : ""}`
|
|
338
338
|
);
|
|
339
339
|
const options = {
|
|
340
340
|
socketPath: this.socketPath,
|
|
341
|
-
path:
|
|
341
|
+
path: path9,
|
|
342
342
|
method,
|
|
343
343
|
headers,
|
|
344
344
|
// Disable agent to ensure fresh connection for each request
|
|
@@ -472,13 +472,8 @@ var FirecrackerClient = class {
|
|
|
472
472
|
};
|
|
473
473
|
|
|
474
474
|
// src/lib/firecracker/network.ts
|
|
475
|
-
import { execSync, exec
|
|
476
|
-
import { promisify as promisify2 } from "util";
|
|
477
|
-
|
|
478
|
-
// src/lib/firecracker/ip-pool.ts
|
|
479
|
-
import { exec } from "child_process";
|
|
475
|
+
import { execSync, exec } from "child_process";
|
|
480
476
|
import { promisify } from "util";
|
|
481
|
-
import * as fs2 from "fs";
|
|
482
477
|
|
|
483
478
|
// src/lib/logger.ts
|
|
484
479
|
var _log = null;
|
|
@@ -500,204 +495,10 @@ function createLogger(prefix) {
|
|
|
500
495
|
};
|
|
501
496
|
}
|
|
502
497
|
|
|
503
|
-
// src/lib/firecracker/
|
|
498
|
+
// src/lib/firecracker/network.ts
|
|
504
499
|
var execAsync = promisify(exec);
|
|
505
|
-
var logger = createLogger("
|
|
506
|
-
var REGISTRY_FILE_PATH = paths.ipRegistry;
|
|
500
|
+
var logger = createLogger("Network");
|
|
507
501
|
var BRIDGE_NAME = "vm0br0";
|
|
508
|
-
var IP_PREFIX = "172.16.0.";
|
|
509
|
-
var IP_START = 2;
|
|
510
|
-
var IP_END = 254;
|
|
511
|
-
var LOCK_TIMEOUT_MS = 1e4;
|
|
512
|
-
var LOCK_RETRY_INTERVAL_MS = 100;
|
|
513
|
-
var ALLOCATION_GRACE_PERIOD_MS = 3e4;
|
|
514
|
-
async function ensureRunDir() {
|
|
515
|
-
if (!fs2.existsSync(VM0_RUN_DIR)) {
|
|
516
|
-
await execAsync(`sudo mkdir -p ${VM0_RUN_DIR}`);
|
|
517
|
-
await execAsync(`sudo chmod 777 ${VM0_RUN_DIR}`);
|
|
518
|
-
}
|
|
519
|
-
}
|
|
520
|
-
async function withLock(fn) {
|
|
521
|
-
await ensureRunDir();
|
|
522
|
-
const lockMarker = paths.ipPoolLock;
|
|
523
|
-
const startTime = Date.now();
|
|
524
|
-
let lockAcquired = false;
|
|
525
|
-
while (Date.now() - startTime < LOCK_TIMEOUT_MS) {
|
|
526
|
-
try {
|
|
527
|
-
fs2.writeFileSync(lockMarker, process.pid.toString(), { flag: "wx" });
|
|
528
|
-
lockAcquired = true;
|
|
529
|
-
break;
|
|
530
|
-
} catch {
|
|
531
|
-
try {
|
|
532
|
-
const pidStr = fs2.readFileSync(lockMarker, "utf-8");
|
|
533
|
-
const pid = parseInt(pidStr, 10);
|
|
534
|
-
try {
|
|
535
|
-
process.kill(pid, 0);
|
|
536
|
-
} catch {
|
|
537
|
-
fs2.unlinkSync(lockMarker);
|
|
538
|
-
continue;
|
|
539
|
-
}
|
|
540
|
-
} catch {
|
|
541
|
-
}
|
|
542
|
-
await new Promise(
|
|
543
|
-
(resolve) => setTimeout(resolve, LOCK_RETRY_INTERVAL_MS)
|
|
544
|
-
);
|
|
545
|
-
}
|
|
546
|
-
}
|
|
547
|
-
if (!lockAcquired) {
|
|
548
|
-
throw new Error(
|
|
549
|
-
`Failed to acquire IP pool lock after ${LOCK_TIMEOUT_MS}ms`
|
|
550
|
-
);
|
|
551
|
-
}
|
|
552
|
-
try {
|
|
553
|
-
return await fn();
|
|
554
|
-
} finally {
|
|
555
|
-
try {
|
|
556
|
-
fs2.unlinkSync(lockMarker);
|
|
557
|
-
} catch {
|
|
558
|
-
}
|
|
559
|
-
}
|
|
560
|
-
}
|
|
561
|
-
function readRegistry() {
|
|
562
|
-
try {
|
|
563
|
-
if (fs2.existsSync(REGISTRY_FILE_PATH)) {
|
|
564
|
-
const content = fs2.readFileSync(REGISTRY_FILE_PATH, "utf-8");
|
|
565
|
-
return JSON.parse(content);
|
|
566
|
-
}
|
|
567
|
-
} catch {
|
|
568
|
-
}
|
|
569
|
-
return { allocations: {} };
|
|
570
|
-
}
|
|
571
|
-
function writeRegistry(registry) {
|
|
572
|
-
fs2.writeFileSync(REGISTRY_FILE_PATH, JSON.stringify(registry, null, 2));
|
|
573
|
-
}
|
|
574
|
-
function getAllocations() {
|
|
575
|
-
const registry = readRegistry();
|
|
576
|
-
return new Map(Object.entries(registry.allocations));
|
|
577
|
-
}
|
|
578
|
-
function getIPForVm(vmId) {
|
|
579
|
-
const registry = readRegistry();
|
|
580
|
-
for (const [ip, allocation] of Object.entries(registry.allocations)) {
|
|
581
|
-
if (allocation.vmId === vmId) {
|
|
582
|
-
return ip;
|
|
583
|
-
}
|
|
584
|
-
}
|
|
585
|
-
return void 0;
|
|
586
|
-
}
|
|
587
|
-
async function scanTapDevices() {
|
|
588
|
-
const tapDevices = /* @__PURE__ */ new Map();
|
|
589
|
-
try {
|
|
590
|
-
const { stdout } = await execAsync(
|
|
591
|
-
`ip link show master ${BRIDGE_NAME} 2>/dev/null || true`
|
|
592
|
-
);
|
|
593
|
-
const lines = stdout.split("\n");
|
|
594
|
-
for (const line of lines) {
|
|
595
|
-
const match = line.match(/^\d+:\s+(tap[a-f0-9]+):/);
|
|
596
|
-
if (match && match[1]) {
|
|
597
|
-
const tapName = match[1];
|
|
598
|
-
const vmIdPrefix = tapName.substring(3);
|
|
599
|
-
tapDevices.set(tapName, vmIdPrefix);
|
|
600
|
-
}
|
|
601
|
-
}
|
|
602
|
-
} catch {
|
|
603
|
-
}
|
|
604
|
-
return tapDevices;
|
|
605
|
-
}
|
|
606
|
-
function reconcileRegistry(registry, activeTaps) {
|
|
607
|
-
const reconciled = { allocations: {} };
|
|
608
|
-
const activeTapNames = new Set(activeTaps.keys());
|
|
609
|
-
const now = Date.now();
|
|
610
|
-
for (const [ip, allocation] of Object.entries(registry.allocations)) {
|
|
611
|
-
const allocatedTime = new Date(allocation.allocatedAt).getTime();
|
|
612
|
-
const isWithinGracePeriod = now - allocatedTime < ALLOCATION_GRACE_PERIOD_MS;
|
|
613
|
-
if (activeTapNames.has(allocation.tapDevice)) {
|
|
614
|
-
reconciled.allocations[ip] = allocation;
|
|
615
|
-
} else if (isWithinGracePeriod) {
|
|
616
|
-
reconciled.allocations[ip] = allocation;
|
|
617
|
-
} else {
|
|
618
|
-
logger.log(
|
|
619
|
-
`Removing stale allocation for ${ip} (TAP ${allocation.tapDevice} no longer exists)`
|
|
620
|
-
);
|
|
621
|
-
}
|
|
622
|
-
}
|
|
623
|
-
return reconciled;
|
|
624
|
-
}
|
|
625
|
-
function findFreeIP(registry) {
|
|
626
|
-
const allocatedIPs = new Set(Object.keys(registry.allocations));
|
|
627
|
-
for (let octet = IP_START; octet <= IP_END; octet++) {
|
|
628
|
-
const ip = `${IP_PREFIX}${octet}`;
|
|
629
|
-
if (!allocatedIPs.has(ip)) {
|
|
630
|
-
return ip;
|
|
631
|
-
}
|
|
632
|
-
}
|
|
633
|
-
return null;
|
|
634
|
-
}
|
|
635
|
-
async function allocateIP(vmId) {
|
|
636
|
-
const tapDevice = `tap${vmId.substring(0, 8)}`;
|
|
637
|
-
return withLock(async () => {
|
|
638
|
-
const registry = readRegistry();
|
|
639
|
-
const ip = findFreeIP(registry);
|
|
640
|
-
if (!ip) {
|
|
641
|
-
throw new Error(
|
|
642
|
-
"No free IP addresses available in pool (172.16.0.2-254)"
|
|
643
|
-
);
|
|
644
|
-
}
|
|
645
|
-
const allocatedCount = Object.keys(registry.allocations).length;
|
|
646
|
-
const allocatedIPs = Object.keys(registry.allocations).sort();
|
|
647
|
-
logger.log(
|
|
648
|
-
`Current state: ${allocatedCount} IPs allocated [${allocatedIPs.join(", ")}], assigning ${ip}`
|
|
649
|
-
);
|
|
650
|
-
registry.allocations[ip] = {
|
|
651
|
-
vmId,
|
|
652
|
-
tapDevice,
|
|
653
|
-
allocatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
654
|
-
};
|
|
655
|
-
writeRegistry(registry);
|
|
656
|
-
logger.log(`Allocated ${ip} for VM ${vmId} (TAP ${tapDevice})`);
|
|
657
|
-
return ip;
|
|
658
|
-
});
|
|
659
|
-
}
|
|
660
|
-
async function releaseIP(ip) {
|
|
661
|
-
return withLock(async () => {
|
|
662
|
-
const registry = readRegistry();
|
|
663
|
-
if (registry.allocations[ip]) {
|
|
664
|
-
const allocation = registry.allocations[ip];
|
|
665
|
-
delete registry.allocations[ip];
|
|
666
|
-
writeRegistry(registry);
|
|
667
|
-
logger.log(`Released ${ip} (was allocated to VM ${allocation.vmId})`);
|
|
668
|
-
} else {
|
|
669
|
-
logger.log(`IP ${ip} was not in registry, nothing to release`);
|
|
670
|
-
}
|
|
671
|
-
});
|
|
672
|
-
}
|
|
673
|
-
async function cleanupOrphanedAllocations() {
|
|
674
|
-
return withLock(async () => {
|
|
675
|
-
logger.log("Cleaning up orphaned allocations...");
|
|
676
|
-
const registry = readRegistry();
|
|
677
|
-
const beforeCount = Object.keys(registry.allocations).length;
|
|
678
|
-
if (beforeCount === 0) {
|
|
679
|
-
logger.log("No allocations in registry, nothing to clean up");
|
|
680
|
-
return;
|
|
681
|
-
}
|
|
682
|
-
const activeTaps = await scanTapDevices();
|
|
683
|
-
logger.log(`Found ${activeTaps.size} active TAP device(s) on bridge`);
|
|
684
|
-
const reconciled = reconcileRegistry(registry, activeTaps);
|
|
685
|
-
const afterCount = Object.keys(reconciled.allocations).length;
|
|
686
|
-
if (afterCount !== beforeCount) {
|
|
687
|
-
writeRegistry(reconciled);
|
|
688
|
-
logger.log(
|
|
689
|
-
`Cleaned up ${beforeCount - afterCount} orphaned allocation(s)`
|
|
690
|
-
);
|
|
691
|
-
} else {
|
|
692
|
-
logger.log("No orphaned allocations found");
|
|
693
|
-
}
|
|
694
|
-
});
|
|
695
|
-
}
|
|
696
|
-
|
|
697
|
-
// src/lib/firecracker/network.ts
|
|
698
|
-
var execAsync2 = promisify2(exec2);
|
|
699
|
-
var logger2 = createLogger("Network");
|
|
700
|
-
var BRIDGE_NAME2 = "vm0br0";
|
|
701
502
|
var BRIDGE_IP = "172.16.0.1";
|
|
702
503
|
var BRIDGE_NETMASK = "255.255.255.0";
|
|
703
504
|
var BRIDGE_CIDR = "172.16.0.0/24";
|
|
@@ -728,7 +529,7 @@ function commandExists(cmd) {
|
|
|
728
529
|
async function execCommand(cmd, sudo = true) {
|
|
729
530
|
const fullCmd = sudo ? `sudo ${cmd}` : cmd;
|
|
730
531
|
try {
|
|
731
|
-
const { stdout } = await
|
|
532
|
+
const { stdout } = await execAsync(fullCmd);
|
|
732
533
|
return stdout.trim();
|
|
733
534
|
} catch (error) {
|
|
734
535
|
const execError = error;
|
|
@@ -748,33 +549,33 @@ async function getDefaultInterface() {
|
|
|
748
549
|
}
|
|
749
550
|
async function setupForwardRules() {
|
|
750
551
|
const extIface = await getDefaultInterface();
|
|
751
|
-
|
|
552
|
+
logger.log(`Setting up FORWARD rules for ${BRIDGE_NAME} <-> ${extIface}`);
|
|
752
553
|
try {
|
|
753
554
|
await execCommand(
|
|
754
|
-
`iptables -C FORWARD -i ${
|
|
555
|
+
`iptables -C FORWARD -i ${BRIDGE_NAME} -o ${extIface} -j ACCEPT`
|
|
755
556
|
);
|
|
756
|
-
|
|
557
|
+
logger.log("FORWARD outbound rule already exists");
|
|
757
558
|
} catch {
|
|
758
559
|
await execCommand(
|
|
759
|
-
`iptables -I FORWARD -i ${
|
|
560
|
+
`iptables -I FORWARD -i ${BRIDGE_NAME} -o ${extIface} -j ACCEPT`
|
|
760
561
|
);
|
|
761
|
-
|
|
562
|
+
logger.log("FORWARD outbound rule added");
|
|
762
563
|
}
|
|
763
564
|
try {
|
|
764
565
|
await execCommand(
|
|
765
|
-
`iptables -C FORWARD -i ${extIface} -o ${
|
|
566
|
+
`iptables -C FORWARD -i ${extIface} -o ${BRIDGE_NAME} -m state --state RELATED,ESTABLISHED -j ACCEPT`
|
|
766
567
|
);
|
|
767
|
-
|
|
568
|
+
logger.log("FORWARD inbound rule already exists");
|
|
768
569
|
} catch {
|
|
769
570
|
await execCommand(
|
|
770
|
-
`iptables -I FORWARD -i ${extIface} -o ${
|
|
571
|
+
`iptables -I FORWARD -i ${extIface} -o ${BRIDGE_NAME} -m state --state RELATED,ESTABLISHED -j ACCEPT`
|
|
771
572
|
);
|
|
772
|
-
|
|
573
|
+
logger.log("FORWARD inbound rule added");
|
|
773
574
|
}
|
|
774
575
|
}
|
|
775
576
|
async function bridgeExists() {
|
|
776
577
|
try {
|
|
777
|
-
await execCommand(`ip link show ${
|
|
578
|
+
await execCommand(`ip link show ${BRIDGE_NAME}`, true);
|
|
778
579
|
return true;
|
|
779
580
|
} catch {
|
|
780
581
|
return false;
|
|
@@ -782,108 +583,30 @@ async function bridgeExists() {
|
|
|
782
583
|
}
|
|
783
584
|
async function setupBridge() {
|
|
784
585
|
if (await bridgeExists()) {
|
|
785
|
-
|
|
586
|
+
logger.log(`Bridge ${BRIDGE_NAME} already exists`);
|
|
786
587
|
await setupForwardRules();
|
|
787
588
|
return;
|
|
788
589
|
}
|
|
789
|
-
|
|
790
|
-
await execCommand(`ip link add name ${
|
|
590
|
+
logger.log(`Creating bridge ${BRIDGE_NAME}...`);
|
|
591
|
+
await execCommand(`ip link add name ${BRIDGE_NAME} type bridge`);
|
|
791
592
|
await execCommand(
|
|
792
|
-
`ip addr add ${BRIDGE_IP}/${BRIDGE_NETMASK} dev ${
|
|
593
|
+
`ip addr add ${BRIDGE_IP}/${BRIDGE_NETMASK} dev ${BRIDGE_NAME}`
|
|
793
594
|
);
|
|
794
|
-
await execCommand(`ip link set ${
|
|
595
|
+
await execCommand(`ip link set ${BRIDGE_NAME} up`);
|
|
795
596
|
await execCommand(`sysctl -w net.ipv4.ip_forward=1`);
|
|
796
597
|
try {
|
|
797
598
|
await execCommand(
|
|
798
599
|
`iptables -t nat -C POSTROUTING -s ${BRIDGE_CIDR} -j MASQUERADE`
|
|
799
600
|
);
|
|
800
|
-
|
|
601
|
+
logger.log("NAT rule already exists");
|
|
801
602
|
} catch {
|
|
802
603
|
await execCommand(
|
|
803
604
|
`iptables -t nat -A POSTROUTING -s ${BRIDGE_CIDR} -j MASQUERADE`
|
|
804
605
|
);
|
|
805
|
-
|
|
606
|
+
logger.log("NAT rule added");
|
|
806
607
|
}
|
|
807
608
|
await setupForwardRules();
|
|
808
|
-
|
|
809
|
-
}
|
|
810
|
-
async function tapDeviceExists(tapDevice) {
|
|
811
|
-
try {
|
|
812
|
-
await execCommand(`ip link show ${tapDevice}`, true);
|
|
813
|
-
return true;
|
|
814
|
-
} catch {
|
|
815
|
-
return false;
|
|
816
|
-
}
|
|
817
|
-
}
|
|
818
|
-
async function clearStaleIptablesRulesForIP(ip) {
|
|
819
|
-
try {
|
|
820
|
-
const { stdout } = await execAsync2(
|
|
821
|
-
"sudo iptables -t nat -S PREROUTING 2>/dev/null || true"
|
|
822
|
-
);
|
|
823
|
-
const lines = stdout.split("\n");
|
|
824
|
-
const rulesForIP = lines.filter((line) => line.includes(`-s ${ip}`));
|
|
825
|
-
if (rulesForIP.length === 0) {
|
|
826
|
-
return;
|
|
827
|
-
}
|
|
828
|
-
logger2.log(
|
|
829
|
-
`Clearing ${rulesForIP.length} stale iptables rule(s) for IP ${ip}`
|
|
830
|
-
);
|
|
831
|
-
for (const rule of rulesForIP) {
|
|
832
|
-
const deleteRule = rule.replace("-A ", "-D ");
|
|
833
|
-
try {
|
|
834
|
-
await execCommand(`iptables -t nat ${deleteRule}`);
|
|
835
|
-
} catch {
|
|
836
|
-
}
|
|
837
|
-
}
|
|
838
|
-
} catch {
|
|
839
|
-
}
|
|
840
|
-
}
|
|
841
|
-
async function createTapDevice(vmId) {
|
|
842
|
-
const tapDevice = `tap${vmId.substring(0, 8)}`;
|
|
843
|
-
const guestMac = generateMacAddress(vmId);
|
|
844
|
-
const guestIp = await allocateIP(vmId);
|
|
845
|
-
logger2.log(`[VM ${vmId}] IP allocated: ${guestIp}`);
|
|
846
|
-
await clearStaleIptablesRulesForIP(guestIp);
|
|
847
|
-
logger2.log(`[VM ${vmId}] Stale iptables cleared`);
|
|
848
|
-
if (await tapDeviceExists(tapDevice)) {
|
|
849
|
-
logger2.log(
|
|
850
|
-
`[VM ${vmId}] TAP device ${tapDevice} already exists, deleting...`
|
|
851
|
-
);
|
|
852
|
-
await deleteTapDevice(tapDevice);
|
|
853
|
-
}
|
|
854
|
-
await execCommand(`ip tuntap add ${tapDevice} mode tap`);
|
|
855
|
-
logger2.log(`[VM ${vmId}] TAP device created`);
|
|
856
|
-
await execCommand(`ip link set ${tapDevice} master ${BRIDGE_NAME2}`);
|
|
857
|
-
logger2.log(`[VM ${vmId}] TAP added to bridge`);
|
|
858
|
-
await execCommand(`ip link set ${tapDevice} up`);
|
|
859
|
-
logger2.log(
|
|
860
|
-
`[VM ${vmId}] TAP created: ${tapDevice}, MAC ${guestMac}, IP ${guestIp}`
|
|
861
|
-
);
|
|
862
|
-
return {
|
|
863
|
-
tapDevice,
|
|
864
|
-
guestMac,
|
|
865
|
-
guestIp,
|
|
866
|
-
gatewayIp: BRIDGE_IP,
|
|
867
|
-
netmask: BRIDGE_NETMASK
|
|
868
|
-
};
|
|
869
|
-
}
|
|
870
|
-
async function deleteTapDevice(tapDevice, guestIp) {
|
|
871
|
-
if (!await tapDeviceExists(tapDevice)) {
|
|
872
|
-
logger2.log(`TAP device ${tapDevice} does not exist, skipping delete`);
|
|
873
|
-
} else {
|
|
874
|
-
await execCommand(`ip link delete ${tapDevice}`);
|
|
875
|
-
logger2.log(`TAP device ${tapDevice} deleted`);
|
|
876
|
-
}
|
|
877
|
-
if (guestIp) {
|
|
878
|
-
try {
|
|
879
|
-
await execCommand(`ip neigh del ${guestIp} dev ${BRIDGE_NAME2}`, true);
|
|
880
|
-
logger2.log(`ARP entry cleared for ${guestIp}`);
|
|
881
|
-
} catch {
|
|
882
|
-
}
|
|
883
|
-
}
|
|
884
|
-
if (guestIp) {
|
|
885
|
-
await releaseIP(guestIp);
|
|
886
|
-
}
|
|
609
|
+
logger.log(`Bridge ${BRIDGE_NAME} configured with IP ${BRIDGE_IP}`);
|
|
887
610
|
}
|
|
888
611
|
function generateNetworkBootArgs(config) {
|
|
889
612
|
return `ip=${config.guestIp}::${config.gatewayIp}:${config.netmask}:vm0-guest:eth0:off`;
|
|
@@ -910,69 +633,53 @@ function checkNetworkPrerequisites() {
|
|
|
910
633
|
}
|
|
911
634
|
async function setupCIDRProxyRules(proxyPort) {
|
|
912
635
|
const comment = "vm0:cidr-proxy";
|
|
913
|
-
|
|
636
|
+
logger.log(
|
|
914
637
|
`Setting up CIDR proxy rules for ${BRIDGE_CIDR} -> port ${proxyPort}`
|
|
915
638
|
);
|
|
916
639
|
try {
|
|
917
640
|
await execCommand(
|
|
918
641
|
`iptables -t nat -C PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 80 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
|
|
919
642
|
);
|
|
920
|
-
|
|
643
|
+
logger.log("CIDR proxy rule for port 80 already exists");
|
|
921
644
|
} catch {
|
|
922
645
|
await execCommand(
|
|
923
646
|
`iptables -t nat -A PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 80 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
|
|
924
647
|
);
|
|
925
|
-
|
|
648
|
+
logger.log("CIDR proxy rule for port 80 added");
|
|
926
649
|
}
|
|
927
650
|
try {
|
|
928
651
|
await execCommand(
|
|
929
652
|
`iptables -t nat -C PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 443 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
|
|
930
653
|
);
|
|
931
|
-
|
|
654
|
+
logger.log("CIDR proxy rule for port 443 already exists");
|
|
932
655
|
} catch {
|
|
933
656
|
await execCommand(
|
|
934
657
|
`iptables -t nat -A PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 443 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
|
|
935
658
|
);
|
|
936
|
-
|
|
659
|
+
logger.log("CIDR proxy rule for port 443 added");
|
|
937
660
|
}
|
|
938
661
|
}
|
|
939
662
|
async function cleanupCIDRProxyRules(proxyPort) {
|
|
940
663
|
const comment = "vm0:cidr-proxy";
|
|
941
|
-
|
|
664
|
+
logger.log("Cleaning up CIDR proxy rules...");
|
|
942
665
|
try {
|
|
943
666
|
await execCommand(
|
|
944
667
|
`iptables -t nat -D PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 80 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
|
|
945
668
|
);
|
|
946
|
-
|
|
669
|
+
logger.log("CIDR proxy rule for port 80 removed");
|
|
947
670
|
} catch {
|
|
948
671
|
}
|
|
949
672
|
try {
|
|
950
673
|
await execCommand(
|
|
951
674
|
`iptables -t nat -D PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 443 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
|
|
952
675
|
);
|
|
953
|
-
|
|
676
|
+
logger.log("CIDR proxy rule for port 443 removed");
|
|
954
677
|
} catch {
|
|
955
678
|
}
|
|
956
679
|
}
|
|
957
|
-
async function listTapDevices() {
|
|
958
|
-
try {
|
|
959
|
-
const result = await execCommand("ip -o link show type tuntap", false);
|
|
960
|
-
const devices = [];
|
|
961
|
-
const lines = result.split("\n");
|
|
962
|
-
for (const line of lines) {
|
|
963
|
-
const match = line.match(/^\d+:\s+(tap[a-f0-9]{8}):/);
|
|
964
|
-
if (match && match[1]) {
|
|
965
|
-
devices.push(match[1]);
|
|
966
|
-
}
|
|
967
|
-
}
|
|
968
|
-
return devices;
|
|
969
|
-
} catch {
|
|
970
|
-
return [];
|
|
971
|
-
}
|
|
972
|
-
}
|
|
973
680
|
async function checkBridgeStatus() {
|
|
974
681
|
try {
|
|
975
|
-
const result = await execCommand(`ip -o addr show ${
|
|
682
|
+
const result = await execCommand(`ip -o addr show ${BRIDGE_NAME}`, false);
|
|
976
683
|
const ipMatch = result.match(/inet\s+(\d+\.\d+\.\d+\.\d+)/);
|
|
977
684
|
const upMatch = result.includes("UP") || result.includes("state UP");
|
|
978
685
|
return {
|
|
@@ -1029,17 +736,17 @@ async function findOrphanedIptablesRules(rules, activeVmIps, expectedProxyPort)
|
|
|
1029
736
|
return orphaned;
|
|
1030
737
|
}
|
|
1031
738
|
async function flushBridgeArpCache() {
|
|
1032
|
-
|
|
739
|
+
logger.log(`Flushing ARP cache on bridge ${BRIDGE_NAME}...`);
|
|
1033
740
|
try {
|
|
1034
741
|
if (!await bridgeExists()) {
|
|
1035
|
-
|
|
742
|
+
logger.log("Bridge does not exist, skipping ARP flush");
|
|
1036
743
|
return;
|
|
1037
744
|
}
|
|
1038
|
-
const { stdout } = await
|
|
1039
|
-
`ip neigh show dev ${
|
|
745
|
+
const { stdout } = await execAsync(
|
|
746
|
+
`ip neigh show dev ${BRIDGE_NAME} 2>/dev/null || true`
|
|
1040
747
|
);
|
|
1041
748
|
if (!stdout.trim()) {
|
|
1042
|
-
|
|
749
|
+
logger.log("No ARP entries on bridge");
|
|
1043
750
|
return;
|
|
1044
751
|
}
|
|
1045
752
|
const lines = stdout.split("\n").filter((line) => line.trim());
|
|
@@ -1049,104 +756,642 @@ async function flushBridgeArpCache() {
|
|
|
1049
756
|
if (match && match[1]) {
|
|
1050
757
|
const ip = match[1];
|
|
1051
758
|
try {
|
|
1052
|
-
await execCommand(`ip neigh del ${ip} dev ${
|
|
759
|
+
await execCommand(`ip neigh del ${ip} dev ${BRIDGE_NAME}`, true);
|
|
1053
760
|
cleared++;
|
|
1054
761
|
} catch {
|
|
1055
762
|
}
|
|
1056
763
|
}
|
|
1057
764
|
}
|
|
1058
|
-
|
|
765
|
+
logger.log(`Cleared ${cleared} ARP entries from bridge`);
|
|
1059
766
|
} catch (error) {
|
|
1060
|
-
|
|
767
|
+
logger.log(
|
|
1061
768
|
`Warning: Could not flush ARP cache: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1062
769
|
);
|
|
1063
770
|
}
|
|
1064
771
|
}
|
|
1065
772
|
async function cleanupOrphanedProxyRules(runnerName) {
|
|
1066
773
|
const comment = `vm0:runner:${runnerName}`;
|
|
1067
|
-
|
|
774
|
+
logger.log(`Cleaning up orphaned proxy rules for runner '${runnerName}'...`);
|
|
1068
775
|
try {
|
|
1069
776
|
const rules = await execCommand("iptables -t nat -S PREROUTING", false);
|
|
1070
777
|
const ourRules = rules.split("\n").filter((rule) => rule.includes(comment));
|
|
1071
778
|
if (ourRules.length === 0) {
|
|
1072
|
-
|
|
779
|
+
logger.log("No orphaned proxy rules found");
|
|
1073
780
|
return;
|
|
1074
781
|
}
|
|
1075
|
-
|
|
782
|
+
logger.log(`Found ${ourRules.length} orphaned rule(s) to clean up`);
|
|
1076
783
|
for (const rule of ourRules) {
|
|
1077
784
|
const deleteRule = rule.replace("-A ", "-D ");
|
|
1078
785
|
try {
|
|
1079
786
|
await execCommand(`iptables -t nat ${deleteRule}`);
|
|
1080
|
-
|
|
787
|
+
logger.log(`Deleted orphaned rule: ${rule.substring(0, 80)}...`);
|
|
1081
788
|
} catch {
|
|
1082
|
-
|
|
789
|
+
logger.log(
|
|
1083
790
|
`Failed to delete rule (may already be gone): ${rule.substring(0, 80)}...`
|
|
1084
791
|
);
|
|
1085
792
|
}
|
|
1086
793
|
}
|
|
1087
|
-
|
|
794
|
+
logger.log("Orphaned proxy rules cleanup complete");
|
|
1088
795
|
} catch (error) {
|
|
1089
|
-
|
|
796
|
+
logger.log(
|
|
1090
797
|
`Warning: Could not clean up orphaned rules: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1091
798
|
);
|
|
1092
799
|
}
|
|
1093
800
|
}
|
|
1094
801
|
|
|
1095
|
-
// src/lib/firecracker/overlay-pool.ts
|
|
1096
|
-
import { exec as
|
|
1097
|
-
import { randomUUID } from "crypto";
|
|
1098
|
-
import
|
|
1099
|
-
import path2 from "path";
|
|
1100
|
-
import { promisify as
|
|
1101
|
-
var
|
|
1102
|
-
var
|
|
1103
|
-
var OVERLAY_SIZE = 2 * 1024 * 1024 * 1024;
|
|
1104
|
-
async function defaultCreateFile(filePath) {
|
|
1105
|
-
const fd =
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
await
|
|
802
|
+
// src/lib/firecracker/overlay-pool.ts
|
|
803
|
+
import { exec as exec2 } from "child_process";
|
|
804
|
+
import { randomUUID } from "crypto";
|
|
805
|
+
import fs2 from "fs";
|
|
806
|
+
import path2 from "path";
|
|
807
|
+
import { promisify as promisify2 } from "util";
|
|
808
|
+
var execAsync2 = promisify2(exec2);
|
|
809
|
+
var logger2 = createLogger("OverlayPool");
|
|
810
|
+
var OVERLAY_SIZE = 2 * 1024 * 1024 * 1024;
|
|
811
|
+
async function defaultCreateFile(filePath) {
|
|
812
|
+
const fd = fs2.openSync(filePath, "w");
|
|
813
|
+
fs2.ftruncateSync(fd, OVERLAY_SIZE);
|
|
814
|
+
fs2.closeSync(fd);
|
|
815
|
+
await execAsync2(`mkfs.ext4 -F -q "${filePath}"`);
|
|
816
|
+
}
|
|
817
|
+
var OverlayPool = class {
|
|
818
|
+
initialized = false;
|
|
819
|
+
queue = [];
|
|
820
|
+
replenishing = false;
|
|
821
|
+
config;
|
|
822
|
+
constructor(config) {
|
|
823
|
+
this.config = {
|
|
824
|
+
size: config.size,
|
|
825
|
+
replenishThreshold: config.replenishThreshold,
|
|
826
|
+
poolDir: config.poolDir,
|
|
827
|
+
createFile: config.createFile ?? defaultCreateFile
|
|
828
|
+
};
|
|
829
|
+
}
|
|
830
|
+
/**
|
|
831
|
+
* Generate unique file name using UUID
|
|
832
|
+
*/
|
|
833
|
+
generateFileName() {
|
|
834
|
+
return `overlay-${randomUUID()}.ext4`;
|
|
835
|
+
}
|
|
836
|
+
/**
|
|
837
|
+
* Ensure the pool directory exists
|
|
838
|
+
*/
|
|
839
|
+
async ensurePoolDir() {
|
|
840
|
+
const parentDir = path2.dirname(this.config.poolDir);
|
|
841
|
+
if (!fs2.existsSync(parentDir)) {
|
|
842
|
+
await execAsync2(`sudo mkdir -p ${parentDir}`);
|
|
843
|
+
await execAsync2(`sudo chmod 777 ${parentDir}`);
|
|
844
|
+
}
|
|
845
|
+
if (!fs2.existsSync(this.config.poolDir)) {
|
|
846
|
+
fs2.mkdirSync(this.config.poolDir, { recursive: true });
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
/**
|
|
850
|
+
* Scan pool directory for overlay files
|
|
851
|
+
*/
|
|
852
|
+
scanPoolDir() {
|
|
853
|
+
if (!fs2.existsSync(this.config.poolDir)) {
|
|
854
|
+
return [];
|
|
855
|
+
}
|
|
856
|
+
return fs2.readdirSync(this.config.poolDir).filter((f) => f.startsWith("overlay-") && f.endsWith(".ext4")).map((f) => path2.join(this.config.poolDir, f));
|
|
857
|
+
}
|
|
858
|
+
/**
|
|
859
|
+
* Replenish the pool in background
|
|
860
|
+
*/
|
|
861
|
+
async replenish() {
|
|
862
|
+
if (this.replenishing || !this.initialized) {
|
|
863
|
+
return;
|
|
864
|
+
}
|
|
865
|
+
const needed = this.config.size - this.queue.length;
|
|
866
|
+
if (needed <= 0) {
|
|
867
|
+
return;
|
|
868
|
+
}
|
|
869
|
+
this.replenishing = true;
|
|
870
|
+
logger2.log(`Replenishing pool: creating ${needed} overlay(s)...`);
|
|
871
|
+
try {
|
|
872
|
+
const promises = [];
|
|
873
|
+
for (let i = 0; i < needed; i++) {
|
|
874
|
+
const filePath = path2.join(
|
|
875
|
+
this.config.poolDir,
|
|
876
|
+
this.generateFileName()
|
|
877
|
+
);
|
|
878
|
+
promises.push(
|
|
879
|
+
this.config.createFile(filePath).then(() => {
|
|
880
|
+
this.queue.push(filePath);
|
|
881
|
+
})
|
|
882
|
+
);
|
|
883
|
+
}
|
|
884
|
+
await Promise.all(promises);
|
|
885
|
+
logger2.log(`Pool replenished: ${this.queue.length} available`);
|
|
886
|
+
} catch (err) {
|
|
887
|
+
logger2.error(
|
|
888
|
+
`Replenish failed: ${err instanceof Error ? err.message : "Unknown"}`
|
|
889
|
+
);
|
|
890
|
+
} finally {
|
|
891
|
+
this.replenishing = false;
|
|
892
|
+
}
|
|
893
|
+
}
|
|
894
|
+
/**
|
|
895
|
+
* Initialize the overlay pool
|
|
896
|
+
*/
|
|
897
|
+
async init() {
|
|
898
|
+
this.queue = [];
|
|
899
|
+
logger2.log(
|
|
900
|
+
`Initializing overlay pool (size=${this.config.size}, threshold=${this.config.replenishThreshold})...`
|
|
901
|
+
);
|
|
902
|
+
await this.ensurePoolDir();
|
|
903
|
+
const existing = this.scanPoolDir();
|
|
904
|
+
if (existing.length > 0) {
|
|
905
|
+
logger2.log(`Cleaning up ${existing.length} stale overlay(s)`);
|
|
906
|
+
for (const file of existing) {
|
|
907
|
+
fs2.unlinkSync(file);
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
this.initialized = true;
|
|
911
|
+
await this.replenish();
|
|
912
|
+
logger2.log("Overlay pool initialized");
|
|
913
|
+
}
|
|
914
|
+
/**
|
|
915
|
+
* Acquire an overlay file from the pool
|
|
916
|
+
*
|
|
917
|
+
* Returns the file path. Caller owns the file and must delete it when done.
|
|
918
|
+
* Falls back to on-demand creation if pool is exhausted.
|
|
919
|
+
*/
|
|
920
|
+
async acquire() {
|
|
921
|
+
if (!this.initialized) {
|
|
922
|
+
throw new Error("Overlay pool not initialized");
|
|
923
|
+
}
|
|
924
|
+
const filePath = this.queue.shift();
|
|
925
|
+
if (filePath) {
|
|
926
|
+
logger2.log(`Acquired overlay from pool (${this.queue.length} remaining)`);
|
|
927
|
+
if (this.queue.length < this.config.replenishThreshold) {
|
|
928
|
+
this.replenish().catch((err) => {
|
|
929
|
+
logger2.error(
|
|
930
|
+
`Background replenish failed: ${err instanceof Error ? err.message : "Unknown"}`
|
|
931
|
+
);
|
|
932
|
+
});
|
|
933
|
+
}
|
|
934
|
+
return filePath;
|
|
935
|
+
}
|
|
936
|
+
logger2.log("Pool exhausted, creating overlay on-demand");
|
|
937
|
+
const newPath = path2.join(this.config.poolDir, this.generateFileName());
|
|
938
|
+
await this.config.createFile(newPath);
|
|
939
|
+
return newPath;
|
|
940
|
+
}
|
|
941
|
+
/**
|
|
942
|
+
* Clean up the overlay pool
|
|
943
|
+
*/
|
|
944
|
+
cleanup() {
|
|
945
|
+
if (!this.initialized) {
|
|
946
|
+
return;
|
|
947
|
+
}
|
|
948
|
+
logger2.log("Cleaning up overlay pool...");
|
|
949
|
+
for (const file of this.queue) {
|
|
950
|
+
try {
|
|
951
|
+
fs2.unlinkSync(file);
|
|
952
|
+
} catch (err) {
|
|
953
|
+
logger2.log(
|
|
954
|
+
`Failed to delete ${file}: ${err instanceof Error ? err.message : "Unknown"}`
|
|
955
|
+
);
|
|
956
|
+
}
|
|
957
|
+
}
|
|
958
|
+
this.queue = [];
|
|
959
|
+
for (const file of this.scanPoolDir()) {
|
|
960
|
+
try {
|
|
961
|
+
fs2.unlinkSync(file);
|
|
962
|
+
} catch (err) {
|
|
963
|
+
logger2.log(
|
|
964
|
+
`Failed to delete ${file}: ${err instanceof Error ? err.message : "Unknown"}`
|
|
965
|
+
);
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
this.initialized = false;
|
|
969
|
+
this.replenishing = false;
|
|
970
|
+
logger2.log("Overlay pool cleaned up");
|
|
971
|
+
}
|
|
972
|
+
};
|
|
973
|
+
var overlayPool = null;
|
|
974
|
+
async function initOverlayPool(config) {
|
|
975
|
+
if (overlayPool) {
|
|
976
|
+
overlayPool.cleanup();
|
|
977
|
+
}
|
|
978
|
+
overlayPool = new OverlayPool(config);
|
|
979
|
+
await overlayPool.init();
|
|
980
|
+
return overlayPool;
|
|
981
|
+
}
|
|
982
|
+
function acquireOverlay() {
|
|
983
|
+
if (!overlayPool) {
|
|
984
|
+
throw new Error(
|
|
985
|
+
"Overlay pool not initialized. Call initOverlayPool() first."
|
|
986
|
+
);
|
|
987
|
+
}
|
|
988
|
+
return overlayPool.acquire();
|
|
989
|
+
}
|
|
990
|
+
function cleanupOverlayPool() {
|
|
991
|
+
if (overlayPool) {
|
|
992
|
+
overlayPool.cleanup();
|
|
993
|
+
overlayPool = null;
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
// src/lib/firecracker/tap-pool.ts
|
|
998
|
+
import { createHash } from "crypto";
|
|
999
|
+
import { exec as exec4 } from "child_process";
|
|
1000
|
+
import { promisify as promisify4 } from "util";
|
|
1001
|
+
|
|
1002
|
+
// src/lib/firecracker/ip-registry.ts
|
|
1003
|
+
import { exec as exec3 } from "child_process";
|
|
1004
|
+
import { promisify as promisify3 } from "util";
|
|
1005
|
+
import * as fs3 from "fs";
|
|
1006
|
+
import path3 from "path";
|
|
1007
|
+
var execAsync3 = promisify3(exec3);
|
|
1008
|
+
var logger3 = createLogger("IPRegistry");
|
|
1009
|
+
var IP_PREFIX = "172.16.0.";
|
|
1010
|
+
var IP_START = 2;
|
|
1011
|
+
var IP_END = 254;
|
|
1012
|
+
var LOCK_TIMEOUT_MS = 1e4;
|
|
1013
|
+
var LOCK_RETRY_INTERVAL_MS = 100;
|
|
1014
|
+
async function defaultEnsureRunDir(runDir) {
|
|
1015
|
+
if (!fs3.existsSync(runDir)) {
|
|
1016
|
+
await execAsync3(`sudo mkdir -p ${runDir}`);
|
|
1017
|
+
await execAsync3(`sudo chmod 777 ${runDir}`);
|
|
1018
|
+
}
|
|
1019
|
+
}
|
|
1020
|
+
async function defaultScanTapDevices() {
|
|
1021
|
+
const tapDevices = /* @__PURE__ */ new Set();
|
|
1022
|
+
try {
|
|
1023
|
+
const { stdout } = await execAsync3(
|
|
1024
|
+
`ip -o link show type tuntap 2>/dev/null || true`
|
|
1025
|
+
);
|
|
1026
|
+
const lines = stdout.split("\n");
|
|
1027
|
+
for (const line of lines) {
|
|
1028
|
+
const match = line.match(/^\d+:\s+([a-z0-9]+):/);
|
|
1029
|
+
if (match && match[1]) {
|
|
1030
|
+
tapDevices.add(match[1]);
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
1033
|
+
} catch {
|
|
1034
|
+
}
|
|
1035
|
+
return tapDevices;
|
|
1036
|
+
}
|
|
1037
|
+
async function defaultCheckTapExists(tapDevice) {
|
|
1038
|
+
try {
|
|
1039
|
+
await execAsync3(`ip link show ${tapDevice} 2>/dev/null`);
|
|
1040
|
+
return true;
|
|
1041
|
+
} catch {
|
|
1042
|
+
return false;
|
|
1043
|
+
}
|
|
1044
|
+
}
|
|
1045
|
+
function isProcessRunning(pid) {
|
|
1046
|
+
if (!Number.isInteger(pid) || pid <= 0) {
|
|
1047
|
+
return false;
|
|
1048
|
+
}
|
|
1049
|
+
try {
|
|
1050
|
+
process.kill(pid, 0);
|
|
1051
|
+
return true;
|
|
1052
|
+
} catch (err) {
|
|
1053
|
+
return err.code === "EPERM";
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
var IPRegistry = class {
|
|
1057
|
+
config;
|
|
1058
|
+
constructor(config = {}) {
|
|
1059
|
+
const runDir = config.runDir ?? VM0_RUN_DIR;
|
|
1060
|
+
this.config = {
|
|
1061
|
+
runDir,
|
|
1062
|
+
lockPath: config.lockPath ?? path3.join(runDir, "ip-pool.lock.active"),
|
|
1063
|
+
registryPath: config.registryPath ?? path3.join(runDir, "ip-registry.json"),
|
|
1064
|
+
ensureRunDir: config.ensureRunDir ?? (() => defaultEnsureRunDir(runDir)),
|
|
1065
|
+
scanTapDevices: config.scanTapDevices ?? defaultScanTapDevices,
|
|
1066
|
+
checkTapExists: config.checkTapExists ?? defaultCheckTapExists
|
|
1067
|
+
};
|
|
1068
|
+
}
|
|
1069
|
+
// ============ File Lock ============
|
|
1070
|
+
/**
|
|
1071
|
+
* Execute a function while holding an exclusive lock on the IP pool
|
|
1072
|
+
*/
|
|
1073
|
+
async withIPLock(fn) {
|
|
1074
|
+
await this.config.ensureRunDir();
|
|
1075
|
+
const startTime = Date.now();
|
|
1076
|
+
let lockAcquired = false;
|
|
1077
|
+
while (Date.now() - startTime < LOCK_TIMEOUT_MS) {
|
|
1078
|
+
try {
|
|
1079
|
+
fs3.writeFileSync(this.config.lockPath, process.pid.toString(), {
|
|
1080
|
+
flag: "wx"
|
|
1081
|
+
});
|
|
1082
|
+
lockAcquired = true;
|
|
1083
|
+
break;
|
|
1084
|
+
} catch {
|
|
1085
|
+
try {
|
|
1086
|
+
const pidStr = fs3.readFileSync(this.config.lockPath, "utf-8");
|
|
1087
|
+
const pid = parseInt(pidStr, 10);
|
|
1088
|
+
if (!isProcessRunning(pid)) {
|
|
1089
|
+
fs3.unlinkSync(this.config.lockPath);
|
|
1090
|
+
continue;
|
|
1091
|
+
}
|
|
1092
|
+
} catch {
|
|
1093
|
+
}
|
|
1094
|
+
await new Promise(
|
|
1095
|
+
(resolve) => setTimeout(resolve, LOCK_RETRY_INTERVAL_MS)
|
|
1096
|
+
);
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
if (!lockAcquired) {
|
|
1100
|
+
throw new Error(
|
|
1101
|
+
`Failed to acquire IP pool lock after ${LOCK_TIMEOUT_MS}ms`
|
|
1102
|
+
);
|
|
1103
|
+
}
|
|
1104
|
+
try {
|
|
1105
|
+
return await fn();
|
|
1106
|
+
} finally {
|
|
1107
|
+
try {
|
|
1108
|
+
fs3.unlinkSync(this.config.lockPath);
|
|
1109
|
+
} catch {
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
// ============ Registry CRUD ============
|
|
1114
|
+
/**
|
|
1115
|
+
* Read the IP registry from file
|
|
1116
|
+
*/
|
|
1117
|
+
readRegistry() {
|
|
1118
|
+
try {
|
|
1119
|
+
if (fs3.existsSync(this.config.registryPath)) {
|
|
1120
|
+
const content = fs3.readFileSync(this.config.registryPath, "utf-8");
|
|
1121
|
+
return JSON.parse(content);
|
|
1122
|
+
}
|
|
1123
|
+
} catch {
|
|
1124
|
+
}
|
|
1125
|
+
return { allocations: {} };
|
|
1126
|
+
}
|
|
1127
|
+
/**
|
|
1128
|
+
* Write the IP registry to file
|
|
1129
|
+
*/
|
|
1130
|
+
writeRegistry(registry) {
|
|
1131
|
+
fs3.writeFileSync(
|
|
1132
|
+
this.config.registryPath,
|
|
1133
|
+
JSON.stringify(registry, null, 2)
|
|
1134
|
+
);
|
|
1135
|
+
}
|
|
1136
|
+
/**
|
|
1137
|
+
* Find the first available IP in the range
|
|
1138
|
+
*/
|
|
1139
|
+
findFreeIP(registry) {
|
|
1140
|
+
const allocatedIPs = new Set(Object.keys(registry.allocations));
|
|
1141
|
+
for (let octet = IP_START; octet <= IP_END; octet++) {
|
|
1142
|
+
const ip = `${IP_PREFIX}${octet}`;
|
|
1143
|
+
if (!allocatedIPs.has(ip)) {
|
|
1144
|
+
return ip;
|
|
1145
|
+
}
|
|
1146
|
+
}
|
|
1147
|
+
return null;
|
|
1148
|
+
}
|
|
1149
|
+
// ============ IP Allocation ============
|
|
1150
|
+
/**
|
|
1151
|
+
* Allocate an IP address for a TAP device
|
|
1152
|
+
*/
|
|
1153
|
+
async allocateIP(tapDevice) {
|
|
1154
|
+
return this.withIPLock(async () => {
|
|
1155
|
+
const registry = this.readRegistry();
|
|
1156
|
+
const ip = this.findFreeIP(registry);
|
|
1157
|
+
if (!ip) {
|
|
1158
|
+
throw new Error(
|
|
1159
|
+
"No free IP addresses available in pool (172.16.0.2-254)"
|
|
1160
|
+
);
|
|
1161
|
+
}
|
|
1162
|
+
registry.allocations[ip] = {
|
|
1163
|
+
runnerPid: process.pid,
|
|
1164
|
+
tapDevice,
|
|
1165
|
+
vmId: null
|
|
1166
|
+
};
|
|
1167
|
+
this.writeRegistry(registry);
|
|
1168
|
+
logger3.log(`Allocated IP ${ip} for TAP ${tapDevice}`);
|
|
1169
|
+
return ip;
|
|
1170
|
+
});
|
|
1171
|
+
}
|
|
1172
|
+
/**
|
|
1173
|
+
* Release an IP address back to the pool
|
|
1174
|
+
*/
|
|
1175
|
+
async releaseIP(ip) {
|
|
1176
|
+
return this.withIPLock(async () => {
|
|
1177
|
+
const registry = this.readRegistry();
|
|
1178
|
+
if (registry.allocations[ip]) {
|
|
1179
|
+
const allocation = registry.allocations[ip];
|
|
1180
|
+
delete registry.allocations[ip];
|
|
1181
|
+
this.writeRegistry(registry);
|
|
1182
|
+
logger3.log(
|
|
1183
|
+
`Released IP ${ip} (was allocated to TAP ${allocation.tapDevice})`
|
|
1184
|
+
);
|
|
1185
|
+
}
|
|
1186
|
+
});
|
|
1187
|
+
}
|
|
1188
|
+
// ============ Cleanup ============
|
|
1189
|
+
/**
|
|
1190
|
+
* Clean up orphaned IP allocations
|
|
1191
|
+
*
|
|
1192
|
+
* An allocation is orphaned if:
|
|
1193
|
+
* 1. TAP device no longer exists on the system, OR
|
|
1194
|
+
* 2. Runner process that created it is no longer running
|
|
1195
|
+
*
|
|
1196
|
+
* @returns List of orphaned TAP devices that should be deleted by caller
|
|
1197
|
+
*/
|
|
1198
|
+
async cleanupOrphanedIPs() {
|
|
1199
|
+
const activeTaps = await this.config.scanTapDevices();
|
|
1200
|
+
logger3.log(`Found ${activeTaps.size} TAP device(s) on system`);
|
|
1201
|
+
return this.withIPLock(async () => {
|
|
1202
|
+
const registry = this.readRegistry();
|
|
1203
|
+
const beforeCount = Object.keys(registry.allocations).length;
|
|
1204
|
+
if (beforeCount === 0) {
|
|
1205
|
+
return [];
|
|
1206
|
+
}
|
|
1207
|
+
const cleanedRegistry = { allocations: {} };
|
|
1208
|
+
const orphanedTaps = [];
|
|
1209
|
+
for (const [ip, allocation] of Object.entries(registry.allocations)) {
|
|
1210
|
+
const tapInScan = activeTaps.has(allocation.tapDevice);
|
|
1211
|
+
const runnerAlive = isProcessRunning(allocation.runnerPid);
|
|
1212
|
+
if (!runnerAlive) {
|
|
1213
|
+
logger3.log(
|
|
1214
|
+
`Removing orphaned IP ${ip} (runner PID ${allocation.runnerPid} not running)`
|
|
1215
|
+
);
|
|
1216
|
+
if (tapInScan) {
|
|
1217
|
+
orphanedTaps.push(allocation.tapDevice);
|
|
1218
|
+
}
|
|
1219
|
+
continue;
|
|
1220
|
+
}
|
|
1221
|
+
if (tapInScan) {
|
|
1222
|
+
cleanedRegistry.allocations[ip] = allocation;
|
|
1223
|
+
} else {
|
|
1224
|
+
const exists = await this.config.checkTapExists(allocation.tapDevice);
|
|
1225
|
+
if (exists) {
|
|
1226
|
+
cleanedRegistry.allocations[ip] = allocation;
|
|
1227
|
+
} else {
|
|
1228
|
+
logger3.log(
|
|
1229
|
+
`Removing orphaned IP ${ip} (TAP ${allocation.tapDevice} not found)`
|
|
1230
|
+
);
|
|
1231
|
+
}
|
|
1232
|
+
}
|
|
1233
|
+
}
|
|
1234
|
+
const afterCount = Object.keys(cleanedRegistry.allocations).length;
|
|
1235
|
+
if (afterCount !== beforeCount) {
|
|
1236
|
+
this.writeRegistry(cleanedRegistry);
|
|
1237
|
+
logger3.log(`Cleaned up ${beforeCount - afterCount} orphaned IP(s)`);
|
|
1238
|
+
}
|
|
1239
|
+
return orphanedTaps;
|
|
1240
|
+
});
|
|
1241
|
+
}
|
|
1242
|
+
// ============ VM ID Tracking ============
|
|
1243
|
+
/**
|
|
1244
|
+
* Assign a vmId to an IP allocation (called when VM acquires the pair)
|
|
1245
|
+
*/
|
|
1246
|
+
async assignVmIdToIP(ip, vmId) {
|
|
1247
|
+
return this.withIPLock(async () => {
|
|
1248
|
+
const registry = this.readRegistry();
|
|
1249
|
+
if (registry.allocations[ip]) {
|
|
1250
|
+
registry.allocations[ip].vmId = vmId;
|
|
1251
|
+
this.writeRegistry(registry);
|
|
1252
|
+
}
|
|
1253
|
+
});
|
|
1254
|
+
}
|
|
1255
|
+
/**
|
|
1256
|
+
* Clear vmId from an IP allocation (called when pair is returned to pool)
|
|
1257
|
+
* Only clears if the current vmId matches expectedVmId to prevent race conditions
|
|
1258
|
+
* where a new VM's vmId could be cleared by the previous VM's release.
|
|
1259
|
+
*/
|
|
1260
|
+
async clearVmIdFromIP(ip, expectedVmId) {
|
|
1261
|
+
return this.withIPLock(async () => {
|
|
1262
|
+
const registry = this.readRegistry();
|
|
1263
|
+
if (registry.allocations[ip] && registry.allocations[ip].vmId === expectedVmId) {
|
|
1264
|
+
registry.allocations[ip].vmId = null;
|
|
1265
|
+
this.writeRegistry(registry);
|
|
1266
|
+
}
|
|
1267
|
+
});
|
|
1268
|
+
}
|
|
1269
|
+
// ============ Diagnostic Functions ============
|
|
1270
|
+
/**
|
|
1271
|
+
* Get all current IP allocations (for diagnostic purposes)
|
|
1272
|
+
* Used by the doctor command to display allocated IPs.
|
|
1273
|
+
*/
|
|
1274
|
+
getAllocations() {
|
|
1275
|
+
const registry = this.readRegistry();
|
|
1276
|
+
return new Map(Object.entries(registry.allocations));
|
|
1277
|
+
}
|
|
1278
|
+
/**
|
|
1279
|
+
* Get IP allocation for a specific VM ID (for diagnostic purposes)
|
|
1280
|
+
*/
|
|
1281
|
+
getIPForVm(vmId) {
|
|
1282
|
+
const registry = this.readRegistry();
|
|
1283
|
+
for (const [ip, allocation] of Object.entries(registry.allocations)) {
|
|
1284
|
+
if (allocation.vmId === vmId) {
|
|
1285
|
+
return ip;
|
|
1286
|
+
}
|
|
1287
|
+
}
|
|
1288
|
+
return void 0;
|
|
1289
|
+
}
|
|
1290
|
+
};
|
|
1291
|
+
var globalRegistry = null;
|
|
1292
|
+
function getRegistry() {
|
|
1293
|
+
if (!globalRegistry) {
|
|
1294
|
+
globalRegistry = new IPRegistry();
|
|
1295
|
+
}
|
|
1296
|
+
return globalRegistry;
|
|
1297
|
+
}
|
|
1298
|
+
async function allocateIP(tapDevice) {
|
|
1299
|
+
return getRegistry().allocateIP(tapDevice);
|
|
1300
|
+
}
|
|
1301
|
+
async function releaseIP(ip) {
|
|
1302
|
+
return getRegistry().releaseIP(ip);
|
|
1303
|
+
}
|
|
1304
|
+
async function cleanupOrphanedIPs() {
|
|
1305
|
+
return getRegistry().cleanupOrphanedIPs();
|
|
1306
|
+
}
|
|
1307
|
+
async function assignVmIdToIP(ip, vmId) {
|
|
1308
|
+
return getRegistry().assignVmIdToIP(ip, vmId);
|
|
1309
|
+
}
|
|
1310
|
+
async function clearVmIdFromIP(ip, expectedVmId) {
|
|
1311
|
+
return getRegistry().clearVmIdFromIP(ip, expectedVmId);
|
|
1312
|
+
}
|
|
1313
|
+
function getAllocations() {
|
|
1314
|
+
return getRegistry().getAllocations();
|
|
1315
|
+
}
|
|
1316
|
+
function getIPForVm(vmId) {
|
|
1317
|
+
return getRegistry().getIPForVm(vmId);
|
|
1318
|
+
}
|
|
1319
|
+
|
|
1320
|
+
// src/lib/firecracker/tap-pool.ts
|
|
1321
|
+
var execAsync4 = promisify4(exec4);
|
|
1322
|
+
var logger4 = createLogger("TapPool");
|
|
1323
|
+
function generateTapPrefix(name) {
|
|
1324
|
+
const hash = createHash("md5").update(name).digest("hex").substring(0, 8);
|
|
1325
|
+
return `vm0${hash}`;
|
|
1109
1326
|
}
|
|
1110
|
-
|
|
1327
|
+
async function execCommand2(cmd) {
|
|
1328
|
+
const fullCmd = `sudo ${cmd}`;
|
|
1329
|
+
const { stdout } = await execAsync4(fullCmd);
|
|
1330
|
+
return stdout.trim();
|
|
1331
|
+
}
|
|
1332
|
+
async function defaultCreateTap(name) {
|
|
1333
|
+
await execCommand2(`ip tuntap add ${name} mode tap`);
|
|
1334
|
+
await execCommand2(`ip link set ${name} master ${BRIDGE_NAME}`);
|
|
1335
|
+
await execCommand2(`ip link set ${name} up`);
|
|
1336
|
+
}
|
|
1337
|
+
async function defaultDeleteTap(name) {
|
|
1338
|
+
await execCommand2(`ip link delete ${name}`);
|
|
1339
|
+
}
|
|
1340
|
+
async function defaultSetMac(tap, mac) {
|
|
1341
|
+
await execCommand2(`ip link set dev ${tap} address ${mac}`);
|
|
1342
|
+
}
|
|
1343
|
+
async function clearArpEntry(ip) {
|
|
1344
|
+
try {
|
|
1345
|
+
await execCommand2(`ip neigh del ${ip} dev ${BRIDGE_NAME}`);
|
|
1346
|
+
} catch {
|
|
1347
|
+
}
|
|
1348
|
+
}
|
|
1349
|
+
var TapPool = class {
|
|
1111
1350
|
initialized = false;
|
|
1112
1351
|
queue = [];
|
|
1113
1352
|
replenishing = false;
|
|
1353
|
+
nextIndex = 0;
|
|
1354
|
+
prefix;
|
|
1114
1355
|
config;
|
|
1115
1356
|
constructor(config) {
|
|
1357
|
+
this.prefix = generateTapPrefix(config.name);
|
|
1116
1358
|
this.config = {
|
|
1359
|
+
name: config.name,
|
|
1117
1360
|
size: config.size,
|
|
1118
1361
|
replenishThreshold: config.replenishThreshold,
|
|
1119
|
-
|
|
1120
|
-
|
|
1362
|
+
createTap: config.createTap ?? defaultCreateTap,
|
|
1363
|
+
deleteTap: config.deleteTap ?? defaultDeleteTap,
|
|
1364
|
+
setMac: config.setMac ?? defaultSetMac
|
|
1121
1365
|
};
|
|
1122
1366
|
}
|
|
1123
1367
|
/**
|
|
1124
|
-
* Generate
|
|
1368
|
+
* Generate TAP device name
|
|
1369
|
+
* Format: {prefix}{index} (e.g., vm01a2b3c4d000)
|
|
1125
1370
|
*/
|
|
1126
|
-
|
|
1127
|
-
return
|
|
1371
|
+
generateTapName(index) {
|
|
1372
|
+
return `${this.prefix}${index.toString().padStart(3, "0")}`;
|
|
1128
1373
|
}
|
|
1129
1374
|
/**
|
|
1130
|
-
*
|
|
1375
|
+
* Check if a TAP name belongs to this pool instance
|
|
1131
1376
|
*/
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
if (!fs3.existsSync(parentDir)) {
|
|
1135
|
-
await execAsync3(`sudo mkdir -p ${parentDir}`);
|
|
1136
|
-
await execAsync3(`sudo chmod 777 ${parentDir}`);
|
|
1137
|
-
}
|
|
1138
|
-
if (!fs3.existsSync(this.config.poolDir)) {
|
|
1139
|
-
fs3.mkdirSync(this.config.poolDir, { recursive: true });
|
|
1140
|
-
}
|
|
1377
|
+
isOwnTap(name) {
|
|
1378
|
+
return name.startsWith(this.prefix);
|
|
1141
1379
|
}
|
|
1142
1380
|
/**
|
|
1143
|
-
*
|
|
1381
|
+
* Create a {TAP, IP} pair
|
|
1144
1382
|
*/
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1383
|
+
async createPair() {
|
|
1384
|
+
const tapDevice = this.generateTapName(this.nextIndex++);
|
|
1385
|
+
await this.config.createTap(tapDevice);
|
|
1386
|
+
let guestIp;
|
|
1387
|
+
try {
|
|
1388
|
+
guestIp = await allocateIP(tapDevice);
|
|
1389
|
+
} catch (err) {
|
|
1390
|
+
await this.config.deleteTap(tapDevice).catch(() => {
|
|
1391
|
+
});
|
|
1392
|
+
throw err;
|
|
1148
1393
|
}
|
|
1149
|
-
return
|
|
1394
|
+
return { tapDevice, guestIp };
|
|
1150
1395
|
}
|
|
1151
1396
|
/**
|
|
1152
1397
|
* Replenish the pool in background
|
|
@@ -1160,135 +1405,267 @@ var OverlayPool = class {
|
|
|
1160
1405
|
return;
|
|
1161
1406
|
}
|
|
1162
1407
|
this.replenishing = true;
|
|
1163
|
-
|
|
1408
|
+
logger4.log(`Replenishing pool: creating up to ${needed} pair(s)...`);
|
|
1164
1409
|
try {
|
|
1165
|
-
const promises = [];
|
|
1166
1410
|
for (let i = 0; i < needed; i++) {
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1411
|
+
if (!this.initialized) {
|
|
1412
|
+
logger4.log("Pool shutdown detected, stopping replenish");
|
|
1413
|
+
break;
|
|
1414
|
+
}
|
|
1415
|
+
if (this.queue.length >= this.config.size) {
|
|
1416
|
+
break;
|
|
1417
|
+
}
|
|
1418
|
+
try {
|
|
1419
|
+
const pair = await this.createPair();
|
|
1420
|
+
if (!this.initialized) {
|
|
1421
|
+
await releaseIP(pair.guestIp).catch(() => {
|
|
1422
|
+
});
|
|
1423
|
+
await this.config.deleteTap(pair.tapDevice).catch(() => {
|
|
1424
|
+
});
|
|
1425
|
+
logger4.log("Pool shutdown detected, cleaned up in-flight pair");
|
|
1426
|
+
break;
|
|
1427
|
+
}
|
|
1428
|
+
this.queue.push(pair);
|
|
1429
|
+
} catch (err) {
|
|
1430
|
+
logger4.error(
|
|
1431
|
+
`Failed to create pair: ${err instanceof Error ? err.message : "Unknown"}`
|
|
1432
|
+
);
|
|
1433
|
+
}
|
|
1176
1434
|
}
|
|
1177
|
-
|
|
1178
|
-
logger3.log(`Pool replenished: ${this.queue.length} available`);
|
|
1179
|
-
} catch (err) {
|
|
1180
|
-
logger3.error(
|
|
1181
|
-
`Replenish failed: ${err instanceof Error ? err.message : "Unknown"}`
|
|
1182
|
-
);
|
|
1435
|
+
logger4.log(`Pool replenished: ${this.queue.length} available`);
|
|
1183
1436
|
} finally {
|
|
1184
1437
|
this.replenishing = false;
|
|
1185
1438
|
}
|
|
1186
1439
|
}
|
|
1187
1440
|
/**
|
|
1188
|
-
*
|
|
1441
|
+
* Scan for orphaned TAP devices from previous runs (matching this pool's prefix)
|
|
1442
|
+
*/
|
|
1443
|
+
async scanOrphanedTaps() {
|
|
1444
|
+
try {
|
|
1445
|
+
const { stdout } = await execAsync4(
|
|
1446
|
+
`ip -o link show type tuntap 2>/dev/null || true`
|
|
1447
|
+
);
|
|
1448
|
+
const orphaned = [];
|
|
1449
|
+
const lines = stdout.split("\n");
|
|
1450
|
+
for (const line of lines) {
|
|
1451
|
+
const match = line.match(/^\d+:\s+([a-z0-9]+):/);
|
|
1452
|
+
if (match && match[1] && this.isOwnTap(match[1])) {
|
|
1453
|
+
orphaned.push(match[1]);
|
|
1454
|
+
}
|
|
1455
|
+
}
|
|
1456
|
+
return orphaned;
|
|
1457
|
+
} catch {
|
|
1458
|
+
return [];
|
|
1459
|
+
}
|
|
1460
|
+
}
|
|
1461
|
+
/**
|
|
1462
|
+
* Initialize the TAP pool
|
|
1189
1463
|
*/
|
|
1190
1464
|
async init() {
|
|
1191
1465
|
this.queue = [];
|
|
1192
|
-
|
|
1193
|
-
|
|
1466
|
+
this.nextIndex = 0;
|
|
1467
|
+
logger4.log(
|
|
1468
|
+
`Initializing TAP pool (size=${this.config.size}, threshold=${this.config.replenishThreshold})...`
|
|
1194
1469
|
);
|
|
1195
|
-
await this.
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1470
|
+
const orphanedTaps = await this.scanOrphanedTaps();
|
|
1471
|
+
if (orphanedTaps.length > 0) {
|
|
1472
|
+
logger4.log(`Cleaning up ${orphanedTaps.length} orphaned TAP(s)`);
|
|
1473
|
+
for (const tap of orphanedTaps) {
|
|
1474
|
+
try {
|
|
1475
|
+
await execCommand2(`ip link delete ${tap}`);
|
|
1476
|
+
} catch {
|
|
1477
|
+
}
|
|
1478
|
+
}
|
|
1479
|
+
}
|
|
1480
|
+
const orphanedTapsFromRegistry = await cleanupOrphanedIPs();
|
|
1481
|
+
for (const tap of orphanedTapsFromRegistry) {
|
|
1482
|
+
try {
|
|
1483
|
+
await execCommand2(`ip link delete ${tap}`);
|
|
1484
|
+
logger4.log(`Deleted orphaned TAP ${tap} (runner dead)`);
|
|
1485
|
+
} catch {
|
|
1201
1486
|
}
|
|
1202
1487
|
}
|
|
1203
1488
|
this.initialized = true;
|
|
1204
1489
|
await this.replenish();
|
|
1205
|
-
|
|
1490
|
+
logger4.log("TAP pool initialized");
|
|
1206
1491
|
}
|
|
1207
1492
|
/**
|
|
1208
|
-
* Acquire
|
|
1493
|
+
* Acquire a {TAP, IP} pair from the pool
|
|
1209
1494
|
*
|
|
1210
|
-
* Returns
|
|
1495
|
+
* Returns VMNetworkConfig with TAP device, IP, and MAC.
|
|
1211
1496
|
* Falls back to on-demand creation if pool is exhausted.
|
|
1212
1497
|
*/
|
|
1213
|
-
async acquire() {
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1498
|
+
async acquire(vmId) {
|
|
1499
|
+
let resource;
|
|
1500
|
+
let fromPool;
|
|
1501
|
+
const pooled = this.queue.shift();
|
|
1502
|
+
if (pooled) {
|
|
1503
|
+
resource = pooled;
|
|
1504
|
+
fromPool = true;
|
|
1505
|
+
logger4.log(`Acquired pair from pool (${this.queue.length} remaining)`);
|
|
1220
1506
|
if (this.queue.length < this.config.replenishThreshold) {
|
|
1221
1507
|
this.replenish().catch((err) => {
|
|
1222
|
-
|
|
1508
|
+
logger4.error(
|
|
1509
|
+
`Background replenish failed: ${err instanceof Error ? err.message : "Unknown"}`
|
|
1510
|
+
);
|
|
1511
|
+
});
|
|
1512
|
+
}
|
|
1513
|
+
} else {
|
|
1514
|
+
logger4.log("Pool exhausted, creating pair on-demand");
|
|
1515
|
+
resource = await this.createPair();
|
|
1516
|
+
fromPool = false;
|
|
1517
|
+
if (this.config.replenishThreshold > 0) {
|
|
1518
|
+
this.replenish().catch((err) => {
|
|
1519
|
+
logger4.error(
|
|
1223
1520
|
`Background replenish failed: ${err instanceof Error ? err.message : "Unknown"}`
|
|
1224
1521
|
);
|
|
1225
1522
|
});
|
|
1226
1523
|
}
|
|
1227
|
-
return filePath;
|
|
1228
1524
|
}
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1525
|
+
const guestMac = generateMacAddress(vmId);
|
|
1526
|
+
try {
|
|
1527
|
+
await this.config.setMac(resource.tapDevice, guestMac);
|
|
1528
|
+
} catch (err) {
|
|
1529
|
+
if (fromPool) {
|
|
1530
|
+
this.queue.push(resource);
|
|
1531
|
+
logger4.log(
|
|
1532
|
+
`Returned pair to pool after MAC set failure: ${resource.tapDevice}`
|
|
1533
|
+
);
|
|
1534
|
+
} else {
|
|
1535
|
+
await releaseIP(resource.guestIp).catch(() => {
|
|
1536
|
+
});
|
|
1537
|
+
await this.config.deleteTap(resource.tapDevice).catch(() => {
|
|
1538
|
+
});
|
|
1539
|
+
}
|
|
1540
|
+
throw err;
|
|
1541
|
+
}
|
|
1542
|
+
await clearArpEntry(resource.guestIp);
|
|
1543
|
+
try {
|
|
1544
|
+
await assignVmIdToIP(resource.guestIp, vmId);
|
|
1545
|
+
} catch (err) {
|
|
1546
|
+
logger4.error(
|
|
1547
|
+
`Failed to assign vmId to IP registry: ${err instanceof Error ? err.message : "Unknown"}`
|
|
1548
|
+
);
|
|
1549
|
+
}
|
|
1550
|
+
logger4.log(
|
|
1551
|
+
`Acquired: TAP ${resource.tapDevice}, MAC ${guestMac}, IP ${resource.guestIp}`
|
|
1552
|
+
);
|
|
1553
|
+
return {
|
|
1554
|
+
tapDevice: resource.tapDevice,
|
|
1555
|
+
guestMac,
|
|
1556
|
+
guestIp: resource.guestIp,
|
|
1557
|
+
gatewayIp: BRIDGE_IP,
|
|
1558
|
+
netmask: BRIDGE_NETMASK
|
|
1559
|
+
};
|
|
1233
1560
|
}
|
|
1234
1561
|
/**
|
|
1235
|
-
*
|
|
1562
|
+
* Release a {TAP, IP} pair back to the pool
|
|
1563
|
+
* @param vmId The VM ID that is releasing this pair (for registry cleanup)
|
|
1236
1564
|
*/
|
|
1237
|
-
|
|
1565
|
+
async release(tapDevice, guestIp, vmId) {
|
|
1566
|
+
await clearArpEntry(guestIp);
|
|
1238
1567
|
if (!this.initialized) {
|
|
1568
|
+
await releaseIP(guestIp).catch(() => {
|
|
1569
|
+
});
|
|
1570
|
+
try {
|
|
1571
|
+
await this.config.deleteTap(tapDevice);
|
|
1572
|
+
logger4.log(`Pair deleted (pool shutdown): ${tapDevice}, ${guestIp}`);
|
|
1573
|
+
} catch (err) {
|
|
1574
|
+
logger4.log(
|
|
1575
|
+
`Failed to delete TAP ${tapDevice}: ${err instanceof Error ? err.message : "Unknown"}`
|
|
1576
|
+
);
|
|
1577
|
+
}
|
|
1239
1578
|
return;
|
|
1240
1579
|
}
|
|
1241
|
-
|
|
1242
|
-
|
|
1580
|
+
if (this.isOwnTap(tapDevice)) {
|
|
1581
|
+
const alreadyInQueue = this.queue.some((r) => r.tapDevice === tapDevice);
|
|
1582
|
+
if (alreadyInQueue) {
|
|
1583
|
+
logger4.log(
|
|
1584
|
+
`Pair ${tapDevice} already in pool, ignoring duplicate release`
|
|
1585
|
+
);
|
|
1586
|
+
return;
|
|
1587
|
+
}
|
|
1588
|
+
this.queue.push({ tapDevice, guestIp });
|
|
1589
|
+
logger4.log(
|
|
1590
|
+
`Pair released: ${tapDevice}, ${guestIp} (${this.queue.length} available)`
|
|
1591
|
+
);
|
|
1243
1592
|
try {
|
|
1244
|
-
|
|
1593
|
+
await clearVmIdFromIP(guestIp, vmId);
|
|
1245
1594
|
} catch (err) {
|
|
1246
|
-
|
|
1247
|
-
`Failed to
|
|
1595
|
+
logger4.error(
|
|
1596
|
+
`Failed to clear vmId from IP registry: ${err instanceof Error ? err.message : "Unknown"}`
|
|
1248
1597
|
);
|
|
1249
1598
|
}
|
|
1250
|
-
}
|
|
1251
|
-
|
|
1252
|
-
|
|
1599
|
+
} else {
|
|
1600
|
+
await releaseIP(guestIp).catch(() => {
|
|
1601
|
+
});
|
|
1253
1602
|
try {
|
|
1254
|
-
|
|
1603
|
+
await this.config.deleteTap(tapDevice);
|
|
1604
|
+
logger4.log(`Non-pooled pair deleted: ${tapDevice}, ${guestIp}`);
|
|
1255
1605
|
} catch (err) {
|
|
1256
|
-
|
|
1257
|
-
`Failed to delete ${
|
|
1606
|
+
logger4.log(
|
|
1607
|
+
`Failed to delete non-pooled TAP ${tapDevice}: ${err instanceof Error ? err.message : "Unknown"}`
|
|
1258
1608
|
);
|
|
1259
1609
|
}
|
|
1260
1610
|
}
|
|
1611
|
+
}
|
|
1612
|
+
/**
|
|
1613
|
+
* Clean up the TAP pool
|
|
1614
|
+
*
|
|
1615
|
+
* Note: This is a sync function for compatibility with process cleanup.
|
|
1616
|
+
* Resources are cleaned up asynchronously (fire-and-forget).
|
|
1617
|
+
* Any remaining resources will be cleaned up by init() on next startup.
|
|
1618
|
+
*/
|
|
1619
|
+
cleanup() {
|
|
1620
|
+
if (!this.initialized) {
|
|
1621
|
+
return;
|
|
1622
|
+
}
|
|
1623
|
+
logger4.log(`Cleaning up TAP pool (${this.queue.length} pairs)...`);
|
|
1624
|
+
for (const { tapDevice, guestIp } of this.queue) {
|
|
1625
|
+
releaseIP(guestIp).catch(() => {
|
|
1626
|
+
});
|
|
1627
|
+
this.config.deleteTap(tapDevice).catch((err) => {
|
|
1628
|
+
logger4.log(
|
|
1629
|
+
`Failed to delete ${tapDevice}: ${err instanceof Error ? err.message : "Unknown"}`
|
|
1630
|
+
);
|
|
1631
|
+
});
|
|
1632
|
+
}
|
|
1633
|
+
this.queue = [];
|
|
1261
1634
|
this.initialized = false;
|
|
1262
1635
|
this.replenishing = false;
|
|
1263
|
-
|
|
1636
|
+
logger4.log("TAP pool cleanup initiated");
|
|
1264
1637
|
}
|
|
1265
1638
|
};
|
|
1266
|
-
var
|
|
1267
|
-
async function
|
|
1268
|
-
if (
|
|
1269
|
-
|
|
1639
|
+
var tapPool = null;
|
|
1640
|
+
async function initTapPool(config) {
|
|
1641
|
+
if (tapPool) {
|
|
1642
|
+
tapPool.cleanup();
|
|
1643
|
+
}
|
|
1644
|
+
tapPool = new TapPool(config);
|
|
1645
|
+
await tapPool.init();
|
|
1646
|
+
return tapPool;
|
|
1647
|
+
}
|
|
1648
|
+
async function acquireTap(vmId) {
|
|
1649
|
+
if (!tapPool) {
|
|
1650
|
+
throw new Error("TAP pool not initialized. Call initTapPool() first.");
|
|
1270
1651
|
}
|
|
1271
|
-
|
|
1272
|
-
await overlayPool.init();
|
|
1273
|
-
return overlayPool;
|
|
1652
|
+
return tapPool.acquire(vmId);
|
|
1274
1653
|
}
|
|
1275
|
-
function
|
|
1276
|
-
if (!
|
|
1277
|
-
throw new Error(
|
|
1278
|
-
"Overlay pool not initialized. Call initOverlayPool() first."
|
|
1279
|
-
);
|
|
1654
|
+
async function releaseTap(tapDevice, guestIp, vmId) {
|
|
1655
|
+
if (!tapPool) {
|
|
1656
|
+
throw new Error("TAP pool not initialized. Call initTapPool() first.");
|
|
1280
1657
|
}
|
|
1281
|
-
return
|
|
1658
|
+
return tapPool.release(tapDevice, guestIp, vmId);
|
|
1282
1659
|
}
|
|
1283
|
-
function
|
|
1284
|
-
if (
|
|
1285
|
-
|
|
1286
|
-
|
|
1660
|
+
function cleanupTapPool() {
|
|
1661
|
+
if (tapPool) {
|
|
1662
|
+
tapPool.cleanup();
|
|
1663
|
+
tapPool = null;
|
|
1287
1664
|
}
|
|
1288
1665
|
}
|
|
1289
1666
|
|
|
1290
1667
|
// src/lib/firecracker/vm.ts
|
|
1291
|
-
var
|
|
1668
|
+
var logger5 = createLogger("VM");
|
|
1292
1669
|
var FirecrackerVM = class {
|
|
1293
1670
|
config;
|
|
1294
1671
|
process = null;
|
|
@@ -1304,8 +1681,8 @@ var FirecrackerVM = class {
|
|
|
1304
1681
|
constructor(config) {
|
|
1305
1682
|
this.config = config;
|
|
1306
1683
|
this.workDir = config.workDir || tempPaths.vmWorkDir(config.vmId);
|
|
1307
|
-
this.socketPath =
|
|
1308
|
-
this.vsockPath =
|
|
1684
|
+
this.socketPath = path4.join(this.workDir, "firecracker.sock");
|
|
1685
|
+
this.vsockPath = path4.join(this.workDir, "vsock.sock");
|
|
1309
1686
|
}
|
|
1310
1687
|
/**
|
|
1311
1688
|
* Get current VM state
|
|
@@ -1350,19 +1727,13 @@ var FirecrackerVM = class {
|
|
|
1350
1727
|
if (fs4.existsSync(this.socketPath)) {
|
|
1351
1728
|
fs4.unlinkSync(this.socketPath);
|
|
1352
1729
|
}
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
};
|
|
1360
|
-
const [, networkConfig] = await Promise.all([
|
|
1361
|
-
setupOverlay(),
|
|
1362
|
-
createTapDevice(this.config.vmId)
|
|
1363
|
-
]);
|
|
1364
|
-
this.networkConfig = networkConfig;
|
|
1365
|
-
logger4.log(`[VM ${this.config.vmId}] Starting Firecracker...`);
|
|
1730
|
+
logger5.log(`[VM ${this.config.vmId}] Acquiring overlay...`);
|
|
1731
|
+
this.vmOverlayPath = await acquireOverlay();
|
|
1732
|
+
logger5.log(`[VM ${this.config.vmId}] Overlay acquired`);
|
|
1733
|
+
logger5.log(`[VM ${this.config.vmId}] Acquiring TAP+IP...`);
|
|
1734
|
+
this.networkConfig = await acquireTap(this.config.vmId);
|
|
1735
|
+
logger5.log(`[VM ${this.config.vmId}] TAP+IP acquired`);
|
|
1736
|
+
logger5.log(`[VM ${this.config.vmId}] Starting Firecracker...`);
|
|
1366
1737
|
this.process = spawn(
|
|
1367
1738
|
this.config.firecrackerBinary,
|
|
1368
1739
|
["--api-sock", this.socketPath],
|
|
@@ -1373,11 +1744,11 @@ var FirecrackerVM = class {
|
|
|
1373
1744
|
}
|
|
1374
1745
|
);
|
|
1375
1746
|
this.process.on("error", (err) => {
|
|
1376
|
-
|
|
1747
|
+
logger5.log(`[VM ${this.config.vmId}] Firecracker error: ${err}`);
|
|
1377
1748
|
this.state = "error";
|
|
1378
1749
|
});
|
|
1379
1750
|
this.process.on("exit", (code, signal) => {
|
|
1380
|
-
|
|
1751
|
+
logger5.log(
|
|
1381
1752
|
`[VM ${this.config.vmId}] Firecracker exited: code=${code}, signal=${signal}`
|
|
1382
1753
|
);
|
|
1383
1754
|
if (this.state !== "stopped") {
|
|
@@ -1390,7 +1761,7 @@ var FirecrackerVM = class {
|
|
|
1390
1761
|
});
|
|
1391
1762
|
stdoutRL.on("line", (line) => {
|
|
1392
1763
|
if (line.trim()) {
|
|
1393
|
-
|
|
1764
|
+
logger5.log(`[VM ${this.config.vmId}] ${line}`);
|
|
1394
1765
|
}
|
|
1395
1766
|
});
|
|
1396
1767
|
}
|
|
@@ -1400,19 +1771,19 @@ var FirecrackerVM = class {
|
|
|
1400
1771
|
});
|
|
1401
1772
|
stderrRL.on("line", (line) => {
|
|
1402
1773
|
if (line.trim()) {
|
|
1403
|
-
|
|
1774
|
+
logger5.log(`[VM ${this.config.vmId}] stderr: ${line}`);
|
|
1404
1775
|
}
|
|
1405
1776
|
});
|
|
1406
1777
|
}
|
|
1407
1778
|
this.client = new FirecrackerClient(this.socketPath);
|
|
1408
|
-
|
|
1779
|
+
logger5.log(`[VM ${this.config.vmId}] Waiting for API...`);
|
|
1409
1780
|
await this.client.waitUntilReady(1e4, 100);
|
|
1410
1781
|
this.state = "configuring";
|
|
1411
1782
|
await this.configure();
|
|
1412
|
-
|
|
1783
|
+
logger5.log(`[VM ${this.config.vmId}] Booting...`);
|
|
1413
1784
|
await this.client.start();
|
|
1414
1785
|
this.state = "running";
|
|
1415
|
-
|
|
1786
|
+
logger5.log(
|
|
1416
1787
|
`[VM ${this.config.vmId}] Running at ${this.networkConfig.guestIp}`
|
|
1417
1788
|
);
|
|
1418
1789
|
} catch (error) {
|
|
@@ -1428,7 +1799,7 @@ var FirecrackerVM = class {
|
|
|
1428
1799
|
if (!this.client || !this.networkConfig || !this.vmOverlayPath) {
|
|
1429
1800
|
throw new Error("VM not properly initialized");
|
|
1430
1801
|
}
|
|
1431
|
-
|
|
1802
|
+
logger5.log(
|
|
1432
1803
|
`[VM ${this.config.vmId}] Configuring: ${this.config.vcpus} vCPUs, ${this.config.memoryMb}MB RAM`
|
|
1433
1804
|
);
|
|
1434
1805
|
await this.client.setMachineConfig({
|
|
@@ -1437,12 +1808,12 @@ var FirecrackerVM = class {
|
|
|
1437
1808
|
});
|
|
1438
1809
|
const networkBootArgs = generateNetworkBootArgs(this.networkConfig);
|
|
1439
1810
|
const bootArgs = `console=ttyS0 reboot=k panic=1 pci=off nomodules random.trust_cpu=on quiet loglevel=0 nokaslr audit=0 numa=off mitigations=off noresume init=/sbin/vm-init ${networkBootArgs}`;
|
|
1440
|
-
|
|
1811
|
+
logger5.log(`[VM ${this.config.vmId}] Boot args: ${bootArgs}`);
|
|
1441
1812
|
await this.client.setBootSource({
|
|
1442
1813
|
kernel_image_path: this.config.kernelPath,
|
|
1443
1814
|
boot_args: bootArgs
|
|
1444
1815
|
});
|
|
1445
|
-
|
|
1816
|
+
logger5.log(
|
|
1446
1817
|
`[VM ${this.config.vmId}] Base rootfs: ${this.config.rootfsPath}`
|
|
1447
1818
|
);
|
|
1448
1819
|
await this.client.setDrive({
|
|
@@ -1451,14 +1822,14 @@ var FirecrackerVM = class {
|
|
|
1451
1822
|
is_root_device: true,
|
|
1452
1823
|
is_read_only: true
|
|
1453
1824
|
});
|
|
1454
|
-
|
|
1825
|
+
logger5.log(`[VM ${this.config.vmId}] Overlay: ${this.vmOverlayPath}`);
|
|
1455
1826
|
await this.client.setDrive({
|
|
1456
1827
|
drive_id: "overlay",
|
|
1457
1828
|
path_on_host: this.vmOverlayPath,
|
|
1458
1829
|
is_root_device: false,
|
|
1459
1830
|
is_read_only: false
|
|
1460
1831
|
});
|
|
1461
|
-
|
|
1832
|
+
logger5.log(
|
|
1462
1833
|
`[VM ${this.config.vmId}] Network: ${this.networkConfig.tapDevice}`
|
|
1463
1834
|
);
|
|
1464
1835
|
await this.client.setNetworkInterface({
|
|
@@ -1466,7 +1837,7 @@ var FirecrackerVM = class {
|
|
|
1466
1837
|
guest_mac: this.networkConfig.guestMac,
|
|
1467
1838
|
host_dev_name: this.networkConfig.tapDevice
|
|
1468
1839
|
});
|
|
1469
|
-
|
|
1840
|
+
logger5.log(`[VM ${this.config.vmId}] Vsock: ${this.vsockPath}`);
|
|
1470
1841
|
await this.client.setVsock({
|
|
1471
1842
|
vsock_id: "vsock0",
|
|
1472
1843
|
guest_cid: 3,
|
|
@@ -1478,15 +1849,15 @@ var FirecrackerVM = class {
|
|
|
1478
1849
|
*/
|
|
1479
1850
|
async stop() {
|
|
1480
1851
|
if (this.state !== "running") {
|
|
1481
|
-
|
|
1852
|
+
logger5.log(`[VM ${this.config.vmId}] Not running, state: ${this.state}`);
|
|
1482
1853
|
return;
|
|
1483
1854
|
}
|
|
1484
1855
|
this.state = "stopping";
|
|
1485
|
-
|
|
1856
|
+
logger5.log(`[VM ${this.config.vmId}] Stopping...`);
|
|
1486
1857
|
try {
|
|
1487
1858
|
if (this.client) {
|
|
1488
1859
|
await this.client.sendCtrlAltDel().catch((error) => {
|
|
1489
|
-
|
|
1860
|
+
logger5.log(
|
|
1490
1861
|
`[VM ${this.config.vmId}] Graceful shutdown signal failed (VM may already be stopping): ${error instanceof Error ? error.message : error}`
|
|
1491
1862
|
);
|
|
1492
1863
|
});
|
|
@@ -1499,7 +1870,7 @@ var FirecrackerVM = class {
|
|
|
1499
1870
|
* Force kill the VM
|
|
1500
1871
|
*/
|
|
1501
1872
|
async kill() {
|
|
1502
|
-
|
|
1873
|
+
logger5.log(`[VM ${this.config.vmId}] Force killing...`);
|
|
1503
1874
|
await this.cleanup();
|
|
1504
1875
|
}
|
|
1505
1876
|
/**
|
|
@@ -1513,10 +1884,17 @@ var FirecrackerVM = class {
|
|
|
1513
1884
|
this.process = null;
|
|
1514
1885
|
}
|
|
1515
1886
|
if (this.networkConfig) {
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1887
|
+
try {
|
|
1888
|
+
await releaseTap(
|
|
1889
|
+
this.networkConfig.tapDevice,
|
|
1890
|
+
this.networkConfig.guestIp,
|
|
1891
|
+
this.config.vmId
|
|
1892
|
+
);
|
|
1893
|
+
} catch (err) {
|
|
1894
|
+
logger5.log(
|
|
1895
|
+
`[VM ${this.config.vmId}] Failed to release TAP: ${err instanceof Error ? err.message : "Unknown"}`
|
|
1896
|
+
);
|
|
1897
|
+
}
|
|
1520
1898
|
this.networkConfig = null;
|
|
1521
1899
|
}
|
|
1522
1900
|
if (this.vmOverlayPath && fs4.existsSync(this.vmOverlayPath)) {
|
|
@@ -1528,7 +1906,7 @@ var FirecrackerVM = class {
|
|
|
1528
1906
|
}
|
|
1529
1907
|
this.client = null;
|
|
1530
1908
|
this.state = "stopped";
|
|
1531
|
-
|
|
1909
|
+
logger5.log(`[VM ${this.config.vmId}] Stopped`);
|
|
1532
1910
|
}
|
|
1533
1911
|
/**
|
|
1534
1912
|
* Wait for the VM process to exit
|
|
@@ -1591,8 +1969,8 @@ function encodeExecPayload(command, timeoutMs) {
|
|
|
1591
1969
|
cmdBuf.copy(payload, 8);
|
|
1592
1970
|
return payload;
|
|
1593
1971
|
}
|
|
1594
|
-
function encodeWriteFilePayload(
|
|
1595
|
-
const pathBuf = Buffer.from(
|
|
1972
|
+
function encodeWriteFilePayload(path9, content, sudo) {
|
|
1973
|
+
const pathBuf = Buffer.from(path9, "utf-8");
|
|
1596
1974
|
if (pathBuf.length > 65535) {
|
|
1597
1975
|
throw new Error(`Path too long: ${pathBuf.length} bytes (max 65535)`);
|
|
1598
1976
|
}
|
|
@@ -2449,8 +2827,8 @@ function getErrorMap() {
|
|
|
2449
2827
|
return overrideErrorMap;
|
|
2450
2828
|
}
|
|
2451
2829
|
var makeIssue = (params) => {
|
|
2452
|
-
const { data, path:
|
|
2453
|
-
const fullPath = [...
|
|
2830
|
+
const { data, path: path9, errorMaps, issueData } = params;
|
|
2831
|
+
const fullPath = [...path9, ...issueData.path || []];
|
|
2454
2832
|
const fullIssue = {
|
|
2455
2833
|
...issueData,
|
|
2456
2834
|
path: fullPath
|
|
@@ -2549,11 +2927,11 @@ var errorUtil;
|
|
|
2549
2927
|
errorUtil2.toString = (message) => typeof message === "string" ? message : message === null || message === void 0 ? void 0 : message.message;
|
|
2550
2928
|
})(errorUtil || (errorUtil = {}));
|
|
2551
2929
|
var ParseInputLazyPath = class {
|
|
2552
|
-
constructor(parent, value,
|
|
2930
|
+
constructor(parent, value, path9, key) {
|
|
2553
2931
|
this._cachedPath = [];
|
|
2554
2932
|
this.parent = parent;
|
|
2555
2933
|
this.data = value;
|
|
2556
|
-
this._path =
|
|
2934
|
+
this._path = path9;
|
|
2557
2935
|
this._key = key;
|
|
2558
2936
|
}
|
|
2559
2937
|
get path() {
|
|
@@ -8729,7 +9107,7 @@ var ENV_LOADER_PATH = "/usr/local/bin/vm0-agent/env-loader.mjs";
|
|
|
8729
9107
|
|
|
8730
9108
|
// src/lib/proxy/vm-registry.ts
|
|
8731
9109
|
import fs6 from "fs";
|
|
8732
|
-
var
|
|
9110
|
+
var logger6 = createLogger("VMRegistry");
|
|
8733
9111
|
var DEFAULT_REGISTRY_PATH = tempPaths.vmRegistry;
|
|
8734
9112
|
var VMRegistry = class {
|
|
8735
9113
|
registryPath;
|
|
@@ -8776,7 +9154,7 @@ var VMRegistry = class {
|
|
|
8776
9154
|
this.save();
|
|
8777
9155
|
const firewallInfo = options?.firewallRules ? ` with ${options.firewallRules.length} firewall rules` : "";
|
|
8778
9156
|
const mitmInfo = options?.mitmEnabled ? ", MITM enabled" : "";
|
|
8779
|
-
|
|
9157
|
+
logger6.log(
|
|
8780
9158
|
`Registered VM ${vmIp} for run ${runId}${firewallInfo}${mitmInfo}`
|
|
8781
9159
|
);
|
|
8782
9160
|
}
|
|
@@ -8788,7 +9166,7 @@ var VMRegistry = class {
|
|
|
8788
9166
|
const registration = this.data.vms[vmIp];
|
|
8789
9167
|
delete this.data.vms[vmIp];
|
|
8790
9168
|
this.save();
|
|
8791
|
-
|
|
9169
|
+
logger6.log(`Unregistered VM ${vmIp} (run ${registration.runId})`);
|
|
8792
9170
|
}
|
|
8793
9171
|
}
|
|
8794
9172
|
/**
|
|
@@ -8809,7 +9187,7 @@ var VMRegistry = class {
|
|
|
8809
9187
|
clear() {
|
|
8810
9188
|
this.data.vms = {};
|
|
8811
9189
|
this.save();
|
|
8812
|
-
|
|
9190
|
+
logger6.log("Cleared all registrations");
|
|
8813
9191
|
}
|
|
8814
9192
|
/**
|
|
8815
9193
|
* Get the path to the registry file
|
|
@@ -8818,22 +9196,22 @@ var VMRegistry = class {
|
|
|
8818
9196
|
return this.registryPath;
|
|
8819
9197
|
}
|
|
8820
9198
|
};
|
|
8821
|
-
var
|
|
9199
|
+
var globalRegistry2 = null;
|
|
8822
9200
|
function getVMRegistry() {
|
|
8823
|
-
if (!
|
|
8824
|
-
|
|
9201
|
+
if (!globalRegistry2) {
|
|
9202
|
+
globalRegistry2 = new VMRegistry();
|
|
8825
9203
|
}
|
|
8826
|
-
return
|
|
9204
|
+
return globalRegistry2;
|
|
8827
9205
|
}
|
|
8828
9206
|
function initVMRegistry(registryPath) {
|
|
8829
|
-
|
|
8830
|
-
return
|
|
9207
|
+
globalRegistry2 = new VMRegistry(registryPath);
|
|
9208
|
+
return globalRegistry2;
|
|
8831
9209
|
}
|
|
8832
9210
|
|
|
8833
9211
|
// src/lib/proxy/proxy-manager.ts
|
|
8834
9212
|
import { spawn as spawn2 } from "child_process";
|
|
8835
9213
|
import fs7 from "fs";
|
|
8836
|
-
import
|
|
9214
|
+
import path5 from "path";
|
|
8837
9215
|
|
|
8838
9216
|
// src/lib/proxy/mitm-addon-script.ts
|
|
8839
9217
|
var RUNNER_MITM_ADDON_SCRIPT = `#!/usr/bin/env python3
|
|
@@ -9319,7 +9697,7 @@ addons = [tls_clienthello, request, response]
|
|
|
9319
9697
|
`;
|
|
9320
9698
|
|
|
9321
9699
|
// src/lib/proxy/proxy-manager.ts
|
|
9322
|
-
var
|
|
9700
|
+
var logger7 = createLogger("ProxyManager");
|
|
9323
9701
|
var DEFAULT_PROXY_OPTIONS = {
|
|
9324
9702
|
port: 8080,
|
|
9325
9703
|
registryPath: DEFAULT_REGISTRY_PATH
|
|
@@ -9329,7 +9707,7 @@ var ProxyManager = class {
|
|
|
9329
9707
|
process = null;
|
|
9330
9708
|
isRunning = false;
|
|
9331
9709
|
constructor(config) {
|
|
9332
|
-
const addonPath =
|
|
9710
|
+
const addonPath = path5.join(config.caDir, "mitm_addon.py");
|
|
9333
9711
|
this.config = {
|
|
9334
9712
|
...DEFAULT_PROXY_OPTIONS,
|
|
9335
9713
|
...config,
|
|
@@ -9356,14 +9734,14 @@ var ProxyManager = class {
|
|
|
9356
9734
|
* Ensure the addon script exists at the configured path
|
|
9357
9735
|
*/
|
|
9358
9736
|
ensureAddonScript() {
|
|
9359
|
-
const addonDir =
|
|
9737
|
+
const addonDir = path5.dirname(this.config.addonPath);
|
|
9360
9738
|
if (!fs7.existsSync(addonDir)) {
|
|
9361
9739
|
fs7.mkdirSync(addonDir, { recursive: true });
|
|
9362
9740
|
}
|
|
9363
9741
|
fs7.writeFileSync(this.config.addonPath, RUNNER_MITM_ADDON_SCRIPT, {
|
|
9364
9742
|
mode: 493
|
|
9365
9743
|
});
|
|
9366
|
-
|
|
9744
|
+
logger7.log(`Addon script written to ${this.config.addonPath}`);
|
|
9367
9745
|
}
|
|
9368
9746
|
/**
|
|
9369
9747
|
* Validate proxy configuration
|
|
@@ -9372,7 +9750,7 @@ var ProxyManager = class {
|
|
|
9372
9750
|
if (!fs7.existsSync(this.config.caDir)) {
|
|
9373
9751
|
throw new Error(`Proxy CA directory not found: ${this.config.caDir}`);
|
|
9374
9752
|
}
|
|
9375
|
-
const caCertPath =
|
|
9753
|
+
const caCertPath = path5.join(this.config.caDir, "mitmproxy-ca.pem");
|
|
9376
9754
|
if (!fs7.existsSync(caCertPath)) {
|
|
9377
9755
|
throw new Error(`Proxy CA certificate not found: ${caCertPath}`);
|
|
9378
9756
|
}
|
|
@@ -9383,7 +9761,7 @@ var ProxyManager = class {
|
|
|
9383
9761
|
*/
|
|
9384
9762
|
async start() {
|
|
9385
9763
|
if (this.isRunning) {
|
|
9386
|
-
|
|
9764
|
+
logger7.log("Proxy already running");
|
|
9387
9765
|
return;
|
|
9388
9766
|
}
|
|
9389
9767
|
const mitmproxyInstalled = await this.checkMitmproxyInstalled();
|
|
@@ -9394,11 +9772,11 @@ var ProxyManager = class {
|
|
|
9394
9772
|
}
|
|
9395
9773
|
this.validateConfig();
|
|
9396
9774
|
getVMRegistry();
|
|
9397
|
-
|
|
9398
|
-
|
|
9399
|
-
|
|
9400
|
-
|
|
9401
|
-
|
|
9775
|
+
logger7.log("Starting mitmproxy...");
|
|
9776
|
+
logger7.log(` Port: ${this.config.port}`);
|
|
9777
|
+
logger7.log(` CA Dir: ${this.config.caDir}`);
|
|
9778
|
+
logger7.log(` Addon: ${this.config.addonPath}`);
|
|
9779
|
+
logger7.log(` Registry: ${this.config.registryPath}`);
|
|
9402
9780
|
const args = [
|
|
9403
9781
|
"--mode",
|
|
9404
9782
|
"transparent",
|
|
@@ -9428,18 +9806,18 @@ var ProxyManager = class {
|
|
|
9428
9806
|
mitmLogger.log(data.toString().trim());
|
|
9429
9807
|
});
|
|
9430
9808
|
this.process.on("close", (code) => {
|
|
9431
|
-
|
|
9809
|
+
logger7.log(`mitmproxy exited with code ${code}`);
|
|
9432
9810
|
this.isRunning = false;
|
|
9433
9811
|
this.process = null;
|
|
9434
9812
|
});
|
|
9435
9813
|
this.process.on("error", (err) => {
|
|
9436
|
-
|
|
9814
|
+
logger7.error(`mitmproxy error: ${err.message}`);
|
|
9437
9815
|
this.isRunning = false;
|
|
9438
9816
|
this.process = null;
|
|
9439
9817
|
});
|
|
9440
9818
|
await this.waitForReady();
|
|
9441
9819
|
this.isRunning = true;
|
|
9442
|
-
|
|
9820
|
+
logger7.log("mitmproxy started successfully");
|
|
9443
9821
|
process.on("exit", () => {
|
|
9444
9822
|
if (this.process && !this.process.killed) {
|
|
9445
9823
|
this.process.kill("SIGKILL");
|
|
@@ -9470,24 +9848,24 @@ var ProxyManager = class {
|
|
|
9470
9848
|
*/
|
|
9471
9849
|
async stop() {
|
|
9472
9850
|
if (!this.process || !this.isRunning) {
|
|
9473
|
-
|
|
9851
|
+
logger7.log("Proxy not running");
|
|
9474
9852
|
return;
|
|
9475
9853
|
}
|
|
9476
|
-
|
|
9854
|
+
logger7.log("Stopping mitmproxy...");
|
|
9477
9855
|
return new Promise((resolve) => {
|
|
9478
9856
|
if (!this.process) {
|
|
9479
9857
|
resolve();
|
|
9480
9858
|
return;
|
|
9481
9859
|
}
|
|
9482
9860
|
const timeout = setTimeout(() => {
|
|
9483
|
-
|
|
9861
|
+
logger7.log("Force killing mitmproxy...");
|
|
9484
9862
|
this.process?.kill("SIGKILL");
|
|
9485
9863
|
}, 5e3);
|
|
9486
9864
|
this.process.on("close", () => {
|
|
9487
9865
|
clearTimeout(timeout);
|
|
9488
9866
|
this.isRunning = false;
|
|
9489
9867
|
this.process = null;
|
|
9490
|
-
|
|
9868
|
+
logger7.log("mitmproxy stopped");
|
|
9491
9869
|
resolve();
|
|
9492
9870
|
});
|
|
9493
9871
|
this.process.kill("SIGTERM");
|
|
@@ -9632,15 +10010,15 @@ async function withSandboxTiming(actionType, fn) {
|
|
|
9632
10010
|
}
|
|
9633
10011
|
|
|
9634
10012
|
// src/lib/vm-setup/vm-setup.ts
|
|
9635
|
-
var
|
|
10013
|
+
var logger8 = createLogger("VMSetup");
|
|
9636
10014
|
var VM_PROXY_CA_PATH = "/usr/local/share/ca-certificates/vm0-proxy-ca.crt";
|
|
9637
10015
|
async function downloadStorages(guest, manifest) {
|
|
9638
10016
|
const totalArchives = manifest.storages.filter((s) => s.archiveUrl).length + (manifest.artifact?.archiveUrl ? 1 : 0);
|
|
9639
10017
|
if (totalArchives === 0) {
|
|
9640
|
-
|
|
10018
|
+
logger8.log(`No archives to download`);
|
|
9641
10019
|
return;
|
|
9642
10020
|
}
|
|
9643
|
-
|
|
10021
|
+
logger8.log(`Downloading ${totalArchives} archive(s)...`);
|
|
9644
10022
|
const manifestJson = JSON.stringify(manifest);
|
|
9645
10023
|
await guest.writeFile("/tmp/storage-manifest.json", manifestJson);
|
|
9646
10024
|
const result = await guest.exec(
|
|
@@ -9649,23 +10027,23 @@ async function downloadStorages(guest, manifest) {
|
|
|
9649
10027
|
if (result.exitCode !== 0) {
|
|
9650
10028
|
throw new Error(`Storage download failed: ${result.stderr}`);
|
|
9651
10029
|
}
|
|
9652
|
-
|
|
10030
|
+
logger8.log(`Storage download completed`);
|
|
9653
10031
|
}
|
|
9654
10032
|
async function restoreSessionHistory(guest, resumeSession, workingDir, cliAgentType) {
|
|
9655
10033
|
const { sessionId, sessionHistory } = resumeSession;
|
|
9656
10034
|
let sessionPath;
|
|
9657
10035
|
if (cliAgentType === "codex") {
|
|
9658
|
-
|
|
10036
|
+
logger8.log(`Codex resume session will be handled by checkpoint.py`);
|
|
9659
10037
|
return;
|
|
9660
10038
|
} else {
|
|
9661
10039
|
const projectName = workingDir.replace(/^\//, "").replace(/\//g, "-");
|
|
9662
10040
|
sessionPath = `/home/user/.claude/projects/-${projectName}/${sessionId}.jsonl`;
|
|
9663
10041
|
}
|
|
9664
|
-
|
|
10042
|
+
logger8.log(`Restoring session history to ${sessionPath}`);
|
|
9665
10043
|
const dirPath = sessionPath.substring(0, sessionPath.lastIndexOf("/"));
|
|
9666
10044
|
await guest.execOrThrow(`mkdir -p "${dirPath}"`);
|
|
9667
10045
|
await guest.writeFile(sessionPath, sessionHistory);
|
|
9668
|
-
|
|
10046
|
+
logger8.log(
|
|
9669
10047
|
`Session history restored (${sessionHistory.split("\n").length} lines)`
|
|
9670
10048
|
);
|
|
9671
10049
|
}
|
|
@@ -9719,7 +10097,7 @@ function buildEnvironmentVariables(context, apiUrl) {
|
|
|
9719
10097
|
|
|
9720
10098
|
// src/lib/network-logs/network-logs.ts
|
|
9721
10099
|
import fs8 from "fs";
|
|
9722
|
-
var
|
|
10100
|
+
var logger9 = createLogger("NetworkLogs");
|
|
9723
10101
|
function getNetworkLogPath(runId) {
|
|
9724
10102
|
return tempPaths.networkLog(runId);
|
|
9725
10103
|
}
|
|
@@ -9733,7 +10111,7 @@ function readNetworkLogs(runId) {
|
|
|
9733
10111
|
const lines = content.split("\n").filter((line) => line.trim());
|
|
9734
10112
|
return lines.map((line) => JSON.parse(line));
|
|
9735
10113
|
} catch (err) {
|
|
9736
|
-
|
|
10114
|
+
logger9.error(
|
|
9737
10115
|
`Failed to read network logs: ${err instanceof Error ? err.message : "Unknown error"}`
|
|
9738
10116
|
);
|
|
9739
10117
|
return [];
|
|
@@ -9746,7 +10124,7 @@ function cleanupNetworkLogs(runId) {
|
|
|
9746
10124
|
fs8.unlinkSync(logPath);
|
|
9747
10125
|
}
|
|
9748
10126
|
} catch (err) {
|
|
9749
|
-
|
|
10127
|
+
logger9.error(
|
|
9750
10128
|
`Failed to cleanup network logs: ${err instanceof Error ? err.message : "Unknown error"}`
|
|
9751
10129
|
);
|
|
9752
10130
|
}
|
|
@@ -9754,10 +10132,10 @@ function cleanupNetworkLogs(runId) {
|
|
|
9754
10132
|
async function uploadNetworkLogs(apiUrl, sandboxToken, runId) {
|
|
9755
10133
|
const networkLogs = readNetworkLogs(runId);
|
|
9756
10134
|
if (networkLogs.length === 0) {
|
|
9757
|
-
|
|
10135
|
+
logger9.log(`No network logs to upload for ${runId}`);
|
|
9758
10136
|
return;
|
|
9759
10137
|
}
|
|
9760
|
-
|
|
10138
|
+
logger9.log(
|
|
9761
10139
|
`Uploading ${networkLogs.length} network log entries for ${runId}`
|
|
9762
10140
|
);
|
|
9763
10141
|
const headers = {
|
|
@@ -9778,15 +10156,15 @@ async function uploadNetworkLogs(apiUrl, sandboxToken, runId) {
|
|
|
9778
10156
|
});
|
|
9779
10157
|
if (!response.ok) {
|
|
9780
10158
|
const errorText = await response.text();
|
|
9781
|
-
|
|
10159
|
+
logger9.error(`Failed to upload network logs: ${errorText}`);
|
|
9782
10160
|
return;
|
|
9783
10161
|
}
|
|
9784
|
-
|
|
10162
|
+
logger9.log(`Network logs uploaded successfully for ${runId}`);
|
|
9785
10163
|
cleanupNetworkLogs(runId);
|
|
9786
10164
|
}
|
|
9787
10165
|
|
|
9788
10166
|
// src/lib/executor.ts
|
|
9789
|
-
var
|
|
10167
|
+
var logger10 = createLogger("Executor");
|
|
9790
10168
|
function getVmIdFromRunId(runId) {
|
|
9791
10169
|
return runId.split("-")[0] || runId.substring(0, 8);
|
|
9792
10170
|
}
|
|
@@ -9806,9 +10184,9 @@ async function executeJob(context, config, options = {}) {
|
|
|
9806
10184
|
const vmId = getVmIdFromRunId(context.runId);
|
|
9807
10185
|
let vm = null;
|
|
9808
10186
|
let guestIp = null;
|
|
9809
|
-
|
|
10187
|
+
logger10.log(`Starting job ${context.runId} in VM ${vmId}`);
|
|
9810
10188
|
try {
|
|
9811
|
-
const workspacesDir =
|
|
10189
|
+
const workspacesDir = path6.join(process.cwd(), "workspaces");
|
|
9812
10190
|
const vmConfig = {
|
|
9813
10191
|
vmId,
|
|
9814
10192
|
vcpus: config.sandbox.vcpu,
|
|
@@ -9816,30 +10194,30 @@ async function executeJob(context, config, options = {}) {
|
|
|
9816
10194
|
kernelPath: config.firecracker.kernel,
|
|
9817
10195
|
rootfsPath: config.firecracker.rootfs,
|
|
9818
10196
|
firecrackerBinary: config.firecracker.binary,
|
|
9819
|
-
workDir:
|
|
10197
|
+
workDir: path6.join(workspacesDir, `vm0-${vmId}`)
|
|
9820
10198
|
};
|
|
9821
|
-
|
|
10199
|
+
logger10.log(`Creating VM ${vmId}...`);
|
|
9822
10200
|
vm = new FirecrackerVM(vmConfig);
|
|
9823
10201
|
await withSandboxTiming("vm_create", () => vm.start());
|
|
9824
10202
|
guestIp = vm.getGuestIp();
|
|
9825
10203
|
if (!guestIp) {
|
|
9826
10204
|
throw new Error("VM started but no IP address available");
|
|
9827
10205
|
}
|
|
9828
|
-
|
|
10206
|
+
logger10.log(`VM ${vmId} started, guest IP: ${guestIp}`);
|
|
9829
10207
|
const vsockPath = vm.getVsockPath();
|
|
9830
10208
|
const guest = new VsockClient(vsockPath);
|
|
9831
|
-
|
|
9832
|
-
|
|
10209
|
+
logger10.log(`Using vsock for guest communication: ${vsockPath}`);
|
|
10210
|
+
logger10.log(`Waiting for guest connection...`);
|
|
9833
10211
|
await withSandboxTiming(
|
|
9834
10212
|
"guest_wait",
|
|
9835
10213
|
() => guest.waitForGuestConnection(3e4)
|
|
9836
10214
|
);
|
|
9837
|
-
|
|
10215
|
+
logger10.log(`Guest client ready`);
|
|
9838
10216
|
const firewallConfig = context.experimentalFirewall;
|
|
9839
10217
|
if (firewallConfig?.enabled) {
|
|
9840
10218
|
const mitmEnabled = firewallConfig.experimental_mitm ?? false;
|
|
9841
10219
|
const sealSecretsEnabled = firewallConfig.experimental_seal_secrets ?? false;
|
|
9842
|
-
|
|
10220
|
+
logger10.log(
|
|
9843
10221
|
`Setting up network security for VM ${guestIp} (mitm=${mitmEnabled}, sealSecrets=${sealSecretsEnabled})`
|
|
9844
10222
|
);
|
|
9845
10223
|
await withSandboxTiming("network_setup", async () => {
|
|
@@ -9874,7 +10252,7 @@ async function executeJob(context, config, options = {}) {
|
|
|
9874
10252
|
}
|
|
9875
10253
|
const envVars = buildEnvironmentVariables(context, config.server.url);
|
|
9876
10254
|
const envJson = JSON.stringify(envVars);
|
|
9877
|
-
|
|
10255
|
+
logger10.log(
|
|
9878
10256
|
`Writing env JSON (${envJson.length} bytes) to ${ENV_JSON_PATH}`
|
|
9879
10257
|
);
|
|
9880
10258
|
await guest.writeFile(ENV_JSON_PATH, envJson);
|
|
@@ -9883,14 +10261,14 @@ async function executeJob(context, config, options = {}) {
|
|
|
9883
10261
|
const maxWaitMs = 2 * 60 * 60 * 1e3;
|
|
9884
10262
|
let command;
|
|
9885
10263
|
if (options.benchmarkMode) {
|
|
9886
|
-
|
|
10264
|
+
logger10.log(`Running command directly (benchmark mode)...`);
|
|
9887
10265
|
command = `${context.prompt} > ${systemLogFile} 2>&1`;
|
|
9888
10266
|
} else {
|
|
9889
|
-
|
|
10267
|
+
logger10.log(`Running agent via env-loader...`);
|
|
9890
10268
|
command = `node ${ENV_LOADER_PATH} > ${systemLogFile} 2>&1`;
|
|
9891
10269
|
}
|
|
9892
10270
|
const { pid } = await guest.spawnAndWatch(command, maxWaitMs);
|
|
9893
|
-
|
|
10271
|
+
logger10.log(`Process started with pid=${pid}`);
|
|
9894
10272
|
let exitCode = 1;
|
|
9895
10273
|
let exitEvent;
|
|
9896
10274
|
try {
|
|
@@ -9899,7 +10277,7 @@ async function executeJob(context, config, options = {}) {
|
|
|
9899
10277
|
} catch {
|
|
9900
10278
|
const durationMs2 = Date.now() - startTime;
|
|
9901
10279
|
const duration2 = Math.round(durationMs2 / 1e3);
|
|
9902
|
-
|
|
10280
|
+
logger10.log(`Agent timed out after ${duration2}s`);
|
|
9903
10281
|
recordOperation({
|
|
9904
10282
|
actionType: "agent_execute",
|
|
9905
10283
|
durationMs: durationMs2,
|
|
@@ -9917,7 +10295,7 @@ async function executeJob(context, config, options = {}) {
|
|
|
9917
10295
|
`dmesg | tail -20 | grep -iE "killed|oom" 2>/dev/null`
|
|
9918
10296
|
);
|
|
9919
10297
|
if (dmesgCheck.stdout.toLowerCase().includes("oom") || dmesgCheck.stdout.toLowerCase().includes("killed")) {
|
|
9920
|
-
|
|
10298
|
+
logger10.log(`OOM detected: ${dmesgCheck.stdout}`);
|
|
9921
10299
|
recordOperation({
|
|
9922
10300
|
actionType: "agent_execute",
|
|
9923
10301
|
durationMs,
|
|
@@ -9934,9 +10312,9 @@ async function executeJob(context, config, options = {}) {
|
|
|
9934
10312
|
durationMs,
|
|
9935
10313
|
success: exitCode === 0
|
|
9936
10314
|
});
|
|
9937
|
-
|
|
10315
|
+
logger10.log(`Agent finished in ${duration}s with exit code ${exitCode}`);
|
|
9938
10316
|
if (exitEvent.stderr) {
|
|
9939
|
-
|
|
10317
|
+
logger10.log(
|
|
9940
10318
|
`Stderr (${exitEvent.stderr.length} chars): ${exitEvent.stderr.substring(0, 500)}`
|
|
9941
10319
|
);
|
|
9942
10320
|
}
|
|
@@ -9946,14 +10324,14 @@ async function executeJob(context, config, options = {}) {
|
|
|
9946
10324
|
};
|
|
9947
10325
|
} catch (error) {
|
|
9948
10326
|
const errorMsg = error instanceof Error ? error.message : "Unknown error";
|
|
9949
|
-
|
|
10327
|
+
logger10.error(`Job ${context.runId} failed: ${errorMsg}`);
|
|
9950
10328
|
return {
|
|
9951
10329
|
exitCode: 1,
|
|
9952
10330
|
error: errorMsg
|
|
9953
10331
|
};
|
|
9954
10332
|
} finally {
|
|
9955
10333
|
if (context.experimentalFirewall?.enabled && guestIp) {
|
|
9956
|
-
|
|
10334
|
+
logger10.log(`Cleaning up network security for VM ${guestIp}`);
|
|
9957
10335
|
getVMRegistry().unregister(guestIp);
|
|
9958
10336
|
if (!options.benchmarkMode) {
|
|
9959
10337
|
try {
|
|
@@ -9963,14 +10341,14 @@ async function executeJob(context, config, options = {}) {
|
|
|
9963
10341
|
context.runId
|
|
9964
10342
|
);
|
|
9965
10343
|
} catch (err) {
|
|
9966
|
-
|
|
10344
|
+
logger10.error(
|
|
9967
10345
|
`Failed to upload network logs: ${err instanceof Error ? err.message : "Unknown error"}`
|
|
9968
10346
|
);
|
|
9969
10347
|
}
|
|
9970
10348
|
}
|
|
9971
10349
|
}
|
|
9972
10350
|
if (vm) {
|
|
9973
|
-
|
|
10351
|
+
logger10.log(`Cleaning up VM ${vmId}...`);
|
|
9974
10352
|
await withSandboxTiming("cleanup", () => vm.kill());
|
|
9975
10353
|
}
|
|
9976
10354
|
await clearSandboxContext();
|
|
@@ -9979,7 +10357,7 @@ async function executeJob(context, config, options = {}) {
|
|
|
9979
10357
|
|
|
9980
10358
|
// src/lib/runner/status.ts
|
|
9981
10359
|
import { writeFileSync as writeFileSync2 } from "fs";
|
|
9982
|
-
var
|
|
10360
|
+
var logger11 = createLogger("Runner");
|
|
9983
10361
|
function writeStatusFile(statusFilePath, mode, activeRuns, startedAt) {
|
|
9984
10362
|
const status = {
|
|
9985
10363
|
mode,
|
|
@@ -9991,7 +10369,7 @@ function writeStatusFile(statusFilePath, mode, activeRuns, startedAt) {
|
|
|
9991
10369
|
try {
|
|
9992
10370
|
writeFileSync2(statusFilePath, JSON.stringify(status, null, 2));
|
|
9993
10371
|
} catch (err) {
|
|
9994
|
-
|
|
10372
|
+
logger11.error(
|
|
9995
10373
|
`Failed to write status file: ${err instanceof Error ? err.message : "Unknown error"}`
|
|
9996
10374
|
);
|
|
9997
10375
|
}
|
|
@@ -10008,25 +10386,25 @@ function createStatusUpdater(statusFilePath, state) {
|
|
|
10008
10386
|
}
|
|
10009
10387
|
|
|
10010
10388
|
// src/lib/runner/runner-lock.ts
|
|
10011
|
-
import { exec as
|
|
10389
|
+
import { exec as exec5 } from "child_process";
|
|
10012
10390
|
import fs9 from "fs";
|
|
10013
|
-
import
|
|
10014
|
-
import { promisify as
|
|
10015
|
-
var
|
|
10016
|
-
var
|
|
10017
|
-
var DEFAULT_PID_FILE =
|
|
10391
|
+
import path7 from "path";
|
|
10392
|
+
import { promisify as promisify5 } from "util";
|
|
10393
|
+
var execAsync5 = promisify5(exec5);
|
|
10394
|
+
var logger12 = createLogger("RunnerLock");
|
|
10395
|
+
var DEFAULT_PID_FILE = runtimePaths.runnerPid;
|
|
10018
10396
|
var currentPidFile = null;
|
|
10019
|
-
async function
|
|
10397
|
+
async function ensureRunDir(dirPath, skipSudo) {
|
|
10020
10398
|
if (!fs9.existsSync(dirPath)) {
|
|
10021
10399
|
if (skipSudo) {
|
|
10022
10400
|
fs9.mkdirSync(dirPath, { recursive: true });
|
|
10023
10401
|
} else {
|
|
10024
|
-
await
|
|
10025
|
-
await
|
|
10402
|
+
await execAsync5(`sudo mkdir -p ${dirPath}`);
|
|
10403
|
+
await execAsync5(`sudo chmod 777 ${dirPath}`);
|
|
10026
10404
|
}
|
|
10027
10405
|
}
|
|
10028
10406
|
}
|
|
10029
|
-
function
|
|
10407
|
+
function isProcessRunning2(pid) {
|
|
10030
10408
|
try {
|
|
10031
10409
|
process.kill(pid, 0);
|
|
10032
10410
|
return true;
|
|
@@ -10040,64 +10418,68 @@ function isProcessRunning(pid) {
|
|
|
10040
10418
|
async function acquireRunnerLock(options = {}) {
|
|
10041
10419
|
const pidFile = options.pidFile ?? DEFAULT_PID_FILE;
|
|
10042
10420
|
const skipSudo = options.skipSudo ?? false;
|
|
10043
|
-
const runDir =
|
|
10044
|
-
await
|
|
10421
|
+
const runDir = path7.dirname(pidFile);
|
|
10422
|
+
await ensureRunDir(runDir, skipSudo);
|
|
10045
10423
|
if (fs9.existsSync(pidFile)) {
|
|
10046
10424
|
const pidStr = fs9.readFileSync(pidFile, "utf-8").trim();
|
|
10047
10425
|
const pid = parseInt(pidStr, 10);
|
|
10048
|
-
if (!isNaN(pid) &&
|
|
10049
|
-
|
|
10050
|
-
|
|
10426
|
+
if (!isNaN(pid) && isProcessRunning2(pid)) {
|
|
10427
|
+
logger12.error(`Error: Another runner is already running (PID ${pid})`);
|
|
10428
|
+
logger12.error(`If this is incorrect, remove ${pidFile} and try again.`);
|
|
10051
10429
|
process.exit(1);
|
|
10052
10430
|
}
|
|
10053
10431
|
if (isNaN(pid)) {
|
|
10054
|
-
|
|
10432
|
+
logger12.log("Cleaning up invalid PID file");
|
|
10055
10433
|
} else {
|
|
10056
|
-
|
|
10434
|
+
logger12.log(`Cleaning up stale PID file (PID ${pid} not running)`);
|
|
10057
10435
|
}
|
|
10058
10436
|
fs9.unlinkSync(pidFile);
|
|
10059
10437
|
}
|
|
10060
10438
|
fs9.writeFileSync(pidFile, process.pid.toString());
|
|
10061
10439
|
currentPidFile = pidFile;
|
|
10062
|
-
|
|
10440
|
+
logger12.log(`Runner lock acquired (PID ${process.pid})`);
|
|
10063
10441
|
}
|
|
10064
10442
|
function releaseRunnerLock() {
|
|
10065
10443
|
const pidFile = currentPidFile ?? DEFAULT_PID_FILE;
|
|
10066
10444
|
if (fs9.existsSync(pidFile)) {
|
|
10067
10445
|
fs9.unlinkSync(pidFile);
|
|
10068
|
-
|
|
10446
|
+
logger12.log("Runner lock released");
|
|
10069
10447
|
}
|
|
10070
10448
|
currentPidFile = null;
|
|
10071
10449
|
}
|
|
10072
10450
|
|
|
10073
10451
|
// src/lib/runner/setup.ts
|
|
10074
|
-
var
|
|
10452
|
+
var logger13 = createLogger("Runner");
|
|
10075
10453
|
async function setupEnvironment(options) {
|
|
10076
10454
|
const { config } = options;
|
|
10077
10455
|
await acquireRunnerLock();
|
|
10078
10456
|
const networkCheck = checkNetworkPrerequisites();
|
|
10079
10457
|
if (!networkCheck.ok) {
|
|
10080
|
-
|
|
10458
|
+
logger13.error("Network prerequisites not met:");
|
|
10081
10459
|
for (const error of networkCheck.errors) {
|
|
10082
|
-
|
|
10460
|
+
logger13.error(` - ${error}`);
|
|
10083
10461
|
}
|
|
10084
10462
|
process.exit(1);
|
|
10085
10463
|
}
|
|
10086
|
-
|
|
10464
|
+
logger13.log("Setting up network bridge...");
|
|
10087
10465
|
await setupBridge();
|
|
10088
|
-
|
|
10466
|
+
logger13.log("Flushing bridge ARP cache...");
|
|
10089
10467
|
await flushBridgeArpCache();
|
|
10090
|
-
|
|
10468
|
+
logger13.log("Cleaning up orphaned proxy rules...");
|
|
10091
10469
|
await cleanupOrphanedProxyRules(config.name);
|
|
10092
|
-
|
|
10093
|
-
await cleanupOrphanedAllocations();
|
|
10094
|
-
logger12.log("Initializing overlay pool...");
|
|
10470
|
+
logger13.log("Initializing overlay pool...");
|
|
10095
10471
|
await initOverlayPool({
|
|
10096
10472
|
size: config.sandbox.max_concurrent + 2,
|
|
10097
10473
|
replenishThreshold: config.sandbox.max_concurrent,
|
|
10098
10474
|
poolDir: dataPaths.overlayPool(config.data_dir)
|
|
10099
10475
|
});
|
|
10100
|
-
|
|
10476
|
+
logger13.log("Initializing TAP pool...");
|
|
10477
|
+
await initTapPool({
|
|
10478
|
+
name: config.name,
|
|
10479
|
+
size: config.sandbox.max_concurrent + 2,
|
|
10480
|
+
replenishThreshold: config.sandbox.max_concurrent
|
|
10481
|
+
});
|
|
10482
|
+
logger13.log("Initializing network proxy...");
|
|
10101
10483
|
initVMRegistry();
|
|
10102
10484
|
const proxyManager = initProxyManager({
|
|
10103
10485
|
apiUrl: config.server.url,
|
|
@@ -10108,14 +10490,14 @@ async function setupEnvironment(options) {
|
|
|
10108
10490
|
try {
|
|
10109
10491
|
await proxyManager.start();
|
|
10110
10492
|
proxyEnabled = true;
|
|
10111
|
-
|
|
10112
|
-
|
|
10493
|
+
logger13.log("Network proxy initialized successfully");
|
|
10494
|
+
logger13.log("Setting up CIDR proxy rules...");
|
|
10113
10495
|
await setupCIDRProxyRules(config.proxy.port);
|
|
10114
10496
|
} catch (err) {
|
|
10115
|
-
|
|
10497
|
+
logger13.log(
|
|
10116
10498
|
`Network proxy not available: ${err instanceof Error ? err.message : "Unknown error"}`
|
|
10117
10499
|
);
|
|
10118
|
-
|
|
10500
|
+
logger13.log(
|
|
10119
10501
|
"Jobs with experimentalFirewall enabled will run without network interception"
|
|
10120
10502
|
);
|
|
10121
10503
|
}
|
|
@@ -10125,22 +10507,22 @@ async function cleanupEnvironment(resources) {
|
|
|
10125
10507
|
const errors = [];
|
|
10126
10508
|
if (resources.proxyEnabled) {
|
|
10127
10509
|
try {
|
|
10128
|
-
|
|
10510
|
+
logger13.log("Cleaning up CIDR proxy rules...");
|
|
10129
10511
|
await cleanupCIDRProxyRules(resources.proxyPort);
|
|
10130
10512
|
} catch (err) {
|
|
10131
10513
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
10132
10514
|
errors.push(error);
|
|
10133
|
-
|
|
10515
|
+
logger13.error(`Failed to cleanup CIDR proxy rules: ${error.message}`);
|
|
10134
10516
|
}
|
|
10135
10517
|
}
|
|
10136
10518
|
if (resources.proxyEnabled) {
|
|
10137
10519
|
try {
|
|
10138
|
-
|
|
10520
|
+
logger13.log("Stopping network proxy...");
|
|
10139
10521
|
await getProxyManager().stop();
|
|
10140
10522
|
} catch (err) {
|
|
10141
10523
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
10142
10524
|
errors.push(error);
|
|
10143
|
-
|
|
10525
|
+
logger13.error(`Failed to stop network proxy: ${error.message}`);
|
|
10144
10526
|
}
|
|
10145
10527
|
}
|
|
10146
10528
|
try {
|
|
@@ -10148,39 +10530,46 @@ async function cleanupEnvironment(resources) {
|
|
|
10148
10530
|
} catch (err) {
|
|
10149
10531
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
10150
10532
|
errors.push(error);
|
|
10151
|
-
|
|
10533
|
+
logger13.error(`Failed to cleanup overlay pool: ${error.message}`);
|
|
10534
|
+
}
|
|
10535
|
+
try {
|
|
10536
|
+
cleanupTapPool();
|
|
10537
|
+
} catch (err) {
|
|
10538
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
10539
|
+
errors.push(error);
|
|
10540
|
+
logger13.error(`Failed to cleanup TAP pool: ${error.message}`);
|
|
10152
10541
|
}
|
|
10153
10542
|
try {
|
|
10154
10543
|
releaseRunnerLock();
|
|
10155
10544
|
} catch (err) {
|
|
10156
10545
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
10157
10546
|
errors.push(error);
|
|
10158
|
-
|
|
10547
|
+
logger13.error(`Failed to release runner lock: ${error.message}`);
|
|
10159
10548
|
}
|
|
10160
10549
|
if (errors.length > 0) {
|
|
10161
|
-
|
|
10550
|
+
logger13.error(`Cleanup completed with ${errors.length} error(s)`);
|
|
10162
10551
|
}
|
|
10163
10552
|
}
|
|
10164
10553
|
|
|
10165
10554
|
// src/lib/runner/signals.ts
|
|
10166
|
-
var
|
|
10555
|
+
var logger14 = createLogger("Runner");
|
|
10167
10556
|
function setupSignalHandlers(state, handlers) {
|
|
10168
10557
|
process.on("SIGINT", () => {
|
|
10169
|
-
|
|
10558
|
+
logger14.log("\nShutting down...");
|
|
10170
10559
|
state.mode = "stopping";
|
|
10171
10560
|
handlers.updateStatus();
|
|
10172
10561
|
handlers.onShutdown();
|
|
10173
10562
|
});
|
|
10174
10563
|
process.on("SIGTERM", () => {
|
|
10175
|
-
|
|
10564
|
+
logger14.log("\nShutting down...");
|
|
10176
10565
|
state.mode = "stopping";
|
|
10177
10566
|
handlers.updateStatus();
|
|
10178
10567
|
handlers.onShutdown();
|
|
10179
10568
|
});
|
|
10180
10569
|
process.on("SIGUSR1", () => {
|
|
10181
10570
|
if (state.mode === "running") {
|
|
10182
|
-
|
|
10183
|
-
|
|
10571
|
+
logger14.log("\n[Maintenance] Entering drain mode...");
|
|
10572
|
+
logger14.log(
|
|
10184
10573
|
`[Maintenance] Active jobs: ${state.activeRuns.size} (will wait for completion)`
|
|
10185
10574
|
);
|
|
10186
10575
|
state.mode = "draining";
|
|
@@ -10191,7 +10580,7 @@ function setupSignalHandlers(state, handlers) {
|
|
|
10191
10580
|
}
|
|
10192
10581
|
|
|
10193
10582
|
// src/lib/runner/runner.ts
|
|
10194
|
-
var
|
|
10583
|
+
var logger15 = createLogger("Runner");
|
|
10195
10584
|
var Runner = class _Runner {
|
|
10196
10585
|
config;
|
|
10197
10586
|
statusFilePath;
|
|
@@ -10230,7 +10619,7 @@ var Runner = class _Runner {
|
|
|
10230
10619
|
onDrain: () => {
|
|
10231
10620
|
this.pendingJobs.length = 0;
|
|
10232
10621
|
if (this.state.activeRuns.size === 0) {
|
|
10233
|
-
|
|
10622
|
+
logger15.log("[Maintenance] No active jobs, exiting immediately");
|
|
10234
10623
|
this.state.mode = "stopping";
|
|
10235
10624
|
this.updateStatus();
|
|
10236
10625
|
this.resolveShutdown?.();
|
|
@@ -10238,35 +10627,35 @@ var Runner = class _Runner {
|
|
|
10238
10627
|
},
|
|
10239
10628
|
updateStatus: this.updateStatus
|
|
10240
10629
|
});
|
|
10241
|
-
|
|
10630
|
+
logger15.log(
|
|
10242
10631
|
`Starting runner '${this.config.name}' for group '${this.config.group}'...`
|
|
10243
10632
|
);
|
|
10244
|
-
|
|
10245
|
-
|
|
10246
|
-
|
|
10247
|
-
|
|
10633
|
+
logger15.log(`Max concurrent jobs: ${this.config.sandbox.max_concurrent}`);
|
|
10634
|
+
logger15.log(`Status file: ${this.statusFilePath}`);
|
|
10635
|
+
logger15.log("Press Ctrl+C to stop");
|
|
10636
|
+
logger15.log("");
|
|
10248
10637
|
this.updateStatus();
|
|
10249
|
-
|
|
10638
|
+
logger15.log("Checking for pending jobs...");
|
|
10250
10639
|
await this.pollFallback();
|
|
10251
|
-
|
|
10640
|
+
logger15.log("Connecting to realtime job notifications...");
|
|
10252
10641
|
this.subscription = await subscribeToJobs(
|
|
10253
10642
|
this.config.server,
|
|
10254
10643
|
this.config.group,
|
|
10255
10644
|
(notification) => {
|
|
10256
|
-
|
|
10645
|
+
logger15.log(`Ably notification: ${notification.runId}`);
|
|
10257
10646
|
this.processJob(notification.runId).catch(console.error);
|
|
10258
10647
|
},
|
|
10259
10648
|
(connectionState, reason) => {
|
|
10260
|
-
|
|
10649
|
+
logger15.log(
|
|
10261
10650
|
`Ably connection: ${connectionState}${reason ? ` (${reason})` : ""}`
|
|
10262
10651
|
);
|
|
10263
10652
|
}
|
|
10264
10653
|
);
|
|
10265
|
-
|
|
10654
|
+
logger15.log("Connected to realtime job notifications");
|
|
10266
10655
|
this.pollInterval = setInterval(() => {
|
|
10267
10656
|
this.pollFallback().catch(console.error);
|
|
10268
10657
|
}, this.config.sandbox.poll_interval_ms);
|
|
10269
|
-
|
|
10658
|
+
logger15.log(
|
|
10270
10659
|
`Polling fallback enabled (every ${this.config.sandbox.poll_interval_ms / 1e3}s)`
|
|
10271
10660
|
);
|
|
10272
10661
|
await shutdownPromise;
|
|
@@ -10277,7 +10666,7 @@ var Runner = class _Runner {
|
|
|
10277
10666
|
this.subscription.cleanup();
|
|
10278
10667
|
}
|
|
10279
10668
|
if (this.state.jobPromises.size > 0) {
|
|
10280
|
-
|
|
10669
|
+
logger15.log(
|
|
10281
10670
|
`Waiting for ${this.state.jobPromises.size} active job(s) to complete...`
|
|
10282
10671
|
);
|
|
10283
10672
|
await Promise.all(this.state.jobPromises);
|
|
@@ -10285,7 +10674,7 @@ var Runner = class _Runner {
|
|
|
10285
10674
|
await cleanupEnvironment(this.resources);
|
|
10286
10675
|
this.state.mode = "stopped";
|
|
10287
10676
|
this.updateStatus();
|
|
10288
|
-
|
|
10677
|
+
logger15.log("Runner stopped");
|
|
10289
10678
|
process.exit(0);
|
|
10290
10679
|
}
|
|
10291
10680
|
/**
|
|
@@ -10302,11 +10691,11 @@ var Runner = class _Runner {
|
|
|
10302
10691
|
() => pollForJob(this.config.server, this.config.group)
|
|
10303
10692
|
);
|
|
10304
10693
|
if (job) {
|
|
10305
|
-
|
|
10694
|
+
logger15.log(`Poll fallback found job: ${job.runId}`);
|
|
10306
10695
|
await this.processJob(job.runId);
|
|
10307
10696
|
}
|
|
10308
10697
|
} catch (error) {
|
|
10309
|
-
|
|
10698
|
+
logger15.error(
|
|
10310
10699
|
`Poll fallback error: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
10311
10700
|
);
|
|
10312
10701
|
}
|
|
@@ -10316,7 +10705,7 @@ var Runner = class _Runner {
|
|
|
10316
10705
|
*/
|
|
10317
10706
|
async processJob(runId) {
|
|
10318
10707
|
if (this.state.mode !== "running") {
|
|
10319
|
-
|
|
10708
|
+
logger15.log(`Not running (${this.state.mode}), ignoring job ${runId}`);
|
|
10320
10709
|
return;
|
|
10321
10710
|
}
|
|
10322
10711
|
if (this.state.activeRuns.has(runId)) {
|
|
@@ -10324,10 +10713,10 @@ var Runner = class _Runner {
|
|
|
10324
10713
|
}
|
|
10325
10714
|
if (this.state.activeRuns.size >= this.config.sandbox.max_concurrent) {
|
|
10326
10715
|
if (!this.pendingJobs.includes(runId) && this.pendingJobs.length < _Runner.MAX_PENDING_QUEUE_SIZE) {
|
|
10327
|
-
|
|
10716
|
+
logger15.log(`At capacity, queueing job ${runId}`);
|
|
10328
10717
|
this.pendingJobs.push(runId);
|
|
10329
10718
|
} else if (this.pendingJobs.length >= _Runner.MAX_PENDING_QUEUE_SIZE) {
|
|
10330
|
-
|
|
10719
|
+
logger15.log(
|
|
10331
10720
|
`Pending queue full (${_Runner.MAX_PENDING_QUEUE_SIZE}), dropping job ${runId}`
|
|
10332
10721
|
);
|
|
10333
10722
|
}
|
|
@@ -10338,11 +10727,11 @@ var Runner = class _Runner {
|
|
|
10338
10727
|
"claim",
|
|
10339
10728
|
() => claimJob(this.config.server, runId)
|
|
10340
10729
|
);
|
|
10341
|
-
|
|
10730
|
+
logger15.log(`Claimed job: ${context.runId}`);
|
|
10342
10731
|
this.state.activeRuns.add(context.runId);
|
|
10343
10732
|
this.updateStatus();
|
|
10344
10733
|
const jobPromise = this.executeJob(context).catch((error) => {
|
|
10345
|
-
|
|
10734
|
+
logger15.error(
|
|
10346
10735
|
`Job ${context.runId} failed: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
10347
10736
|
);
|
|
10348
10737
|
}).finally(() => {
|
|
@@ -10350,7 +10739,7 @@ var Runner = class _Runner {
|
|
|
10350
10739
|
this.state.jobPromises.delete(jobPromise);
|
|
10351
10740
|
this.updateStatus();
|
|
10352
10741
|
if (this.state.mode === "draining" && this.state.activeRuns.size === 0) {
|
|
10353
|
-
|
|
10742
|
+
logger15.log("[Maintenance] All jobs completed, exiting");
|
|
10354
10743
|
this.state.mode = "stopping";
|
|
10355
10744
|
this.updateStatus();
|
|
10356
10745
|
this.resolveShutdown?.();
|
|
@@ -10365,33 +10754,33 @@ var Runner = class _Runner {
|
|
|
10365
10754
|
});
|
|
10366
10755
|
this.state.jobPromises.add(jobPromise);
|
|
10367
10756
|
} catch (error) {
|
|
10368
|
-
|
|
10757
|
+
logger15.log(
|
|
10369
10758
|
`Could not claim job ${runId}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
10370
10759
|
);
|
|
10371
10760
|
}
|
|
10372
10761
|
}
|
|
10373
10762
|
async executeJob(context) {
|
|
10374
|
-
|
|
10375
|
-
|
|
10376
|
-
|
|
10763
|
+
logger15.log(` Executing job ${context.runId}...`);
|
|
10764
|
+
logger15.log(` Prompt: ${context.prompt.substring(0, 100)}...`);
|
|
10765
|
+
logger15.log(` Compose version: ${context.agentComposeVersionId}`);
|
|
10377
10766
|
try {
|
|
10378
10767
|
const result = await executeJob(context, this.config);
|
|
10379
|
-
|
|
10768
|
+
logger15.log(
|
|
10380
10769
|
` Job ${context.runId} execution completed with exit code ${result.exitCode}`
|
|
10381
10770
|
);
|
|
10382
10771
|
if (result.exitCode !== 0 && result.error) {
|
|
10383
|
-
|
|
10772
|
+
logger15.error(` Job ${context.runId} failed: ${result.error}`);
|
|
10384
10773
|
}
|
|
10385
10774
|
} catch (err) {
|
|
10386
10775
|
const error = err instanceof Error ? err.message : "Unknown execution error";
|
|
10387
|
-
|
|
10776
|
+
logger15.error(` Job ${context.runId} execution failed: ${error}`);
|
|
10388
10777
|
const result = await completeJob(
|
|
10389
10778
|
this.config.server.url,
|
|
10390
10779
|
context,
|
|
10391
10780
|
1,
|
|
10392
10781
|
error
|
|
10393
10782
|
);
|
|
10394
|
-
|
|
10783
|
+
logger15.log(` Job ${context.runId} reported as ${result.status}`);
|
|
10395
10784
|
}
|
|
10396
10785
|
}
|
|
10397
10786
|
};
|
|
@@ -10422,7 +10811,7 @@ import { dirname as dirname2, join as join2 } from "path";
|
|
|
10422
10811
|
|
|
10423
10812
|
// src/lib/firecracker/process.ts
|
|
10424
10813
|
import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync3 } from "fs";
|
|
10425
|
-
import
|
|
10814
|
+
import path8 from "path";
|
|
10426
10815
|
function parseFirecrackerCmdline(cmdline) {
|
|
10427
10816
|
const args = cmdline.split("\0");
|
|
10428
10817
|
if (!args[0]?.includes("firecracker")) return null;
|
|
@@ -10455,7 +10844,7 @@ function findFirecrackerProcesses() {
|
|
|
10455
10844
|
for (const entry of entries) {
|
|
10456
10845
|
if (!/^\d+$/.test(entry)) continue;
|
|
10457
10846
|
const pid = parseInt(entry, 10);
|
|
10458
|
-
const cmdlinePath =
|
|
10847
|
+
const cmdlinePath = path8.join(procDir, entry, "cmdline");
|
|
10459
10848
|
if (!existsSync3(cmdlinePath)) continue;
|
|
10460
10849
|
try {
|
|
10461
10850
|
const cmdline = readFileSync2(cmdlinePath, "utf-8");
|
|
@@ -10473,7 +10862,7 @@ function findProcessByVmId(vmId) {
|
|
|
10473
10862
|
const processes = findFirecrackerProcesses();
|
|
10474
10863
|
return processes.find((p) => p.vmId === vmId) || null;
|
|
10475
10864
|
}
|
|
10476
|
-
function
|
|
10865
|
+
function isProcessRunning3(pid) {
|
|
10477
10866
|
try {
|
|
10478
10867
|
process.kill(pid, 0);
|
|
10479
10868
|
return true;
|
|
@@ -10482,24 +10871,24 @@ function isProcessRunning2(pid) {
|
|
|
10482
10871
|
}
|
|
10483
10872
|
}
|
|
10484
10873
|
async function killProcess(pid, timeoutMs = 5e3) {
|
|
10485
|
-
if (!
|
|
10874
|
+
if (!isProcessRunning3(pid)) return true;
|
|
10486
10875
|
try {
|
|
10487
10876
|
process.kill(pid, "SIGTERM");
|
|
10488
10877
|
} catch {
|
|
10489
|
-
return !
|
|
10878
|
+
return !isProcessRunning3(pid);
|
|
10490
10879
|
}
|
|
10491
10880
|
const startTime = Date.now();
|
|
10492
10881
|
while (Date.now() - startTime < timeoutMs) {
|
|
10493
|
-
if (!
|
|
10882
|
+
if (!isProcessRunning3(pid)) return true;
|
|
10494
10883
|
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
10495
10884
|
}
|
|
10496
|
-
if (
|
|
10885
|
+
if (isProcessRunning3(pid)) {
|
|
10497
10886
|
try {
|
|
10498
10887
|
process.kill(pid, "SIGKILL");
|
|
10499
10888
|
} catch {
|
|
10500
10889
|
}
|
|
10501
10890
|
}
|
|
10502
|
-
return !
|
|
10891
|
+
return !isProcessRunning3(pid);
|
|
10503
10892
|
}
|
|
10504
10893
|
function findMitmproxyProcess() {
|
|
10505
10894
|
const procDir = "/proc";
|
|
@@ -10512,7 +10901,7 @@ function findMitmproxyProcess() {
|
|
|
10512
10901
|
for (const entry of entries) {
|
|
10513
10902
|
if (!/^\d+$/.test(entry)) continue;
|
|
10514
10903
|
const pid = parseInt(entry, 10);
|
|
10515
|
-
const cmdlinePath =
|
|
10904
|
+
const cmdlinePath = path8.join(procDir, entry, "cmdline");
|
|
10516
10905
|
if (!existsSync3(cmdlinePath)) continue;
|
|
10517
10906
|
try {
|
|
10518
10907
|
const cmdline = readFileSync2(cmdlinePath, "utf-8");
|
|
@@ -10574,11 +10963,11 @@ var doctorCommand = new Command2("doctor").description("Diagnose runner health,
|
|
|
10574
10963
|
const warnings = [];
|
|
10575
10964
|
const bridgeStatus = await checkBridgeStatus();
|
|
10576
10965
|
if (bridgeStatus.exists) {
|
|
10577
|
-
console.log(` \u2713 Bridge ${
|
|
10966
|
+
console.log(` \u2713 Bridge ${BRIDGE_NAME} (${bridgeStatus.ip})`);
|
|
10578
10967
|
} else {
|
|
10579
|
-
console.log(` \u2717 Bridge ${
|
|
10968
|
+
console.log(` \u2717 Bridge ${BRIDGE_NAME} not found`);
|
|
10580
10969
|
warnings.push({
|
|
10581
|
-
message: `Network bridge ${
|
|
10970
|
+
message: `Network bridge ${BRIDGE_NAME} does not exist`
|
|
10582
10971
|
});
|
|
10583
10972
|
}
|
|
10584
10973
|
const proxyPort = config.proxy.port;
|
|
@@ -10601,7 +10990,6 @@ var doctorCommand = new Command2("doctor").description("Diagnose runner health,
|
|
|
10601
10990
|
}
|
|
10602
10991
|
console.log("");
|
|
10603
10992
|
const processes = findFirecrackerProcesses();
|
|
10604
|
-
const tapDevices = await listTapDevices();
|
|
10605
10993
|
const workspaces = existsSync4(workspacesDir) ? readdirSync2(workspacesDir).filter((d) => d.startsWith("vm0-")) : [];
|
|
10606
10994
|
const jobs = [];
|
|
10607
10995
|
const statusVmIds = /* @__PURE__ */ new Set();
|
|
@@ -10624,9 +11012,11 @@ var doctorCommand = new Command2("doctor").description("Diagnose runner health,
|
|
|
10624
11012
|
}
|
|
10625
11013
|
const ipToVmIds = /* @__PURE__ */ new Map();
|
|
10626
11014
|
for (const [ip, allocation] of allocations) {
|
|
10627
|
-
|
|
10628
|
-
|
|
10629
|
-
|
|
11015
|
+
if (allocation.vmId) {
|
|
11016
|
+
const existing = ipToVmIds.get(ip) ?? [];
|
|
11017
|
+
existing.push(allocation.vmId);
|
|
11018
|
+
ipToVmIds.set(ip, existing);
|
|
11019
|
+
}
|
|
10630
11020
|
}
|
|
10631
11021
|
const maxConcurrent = config.sandbox.max_concurrent;
|
|
10632
11022
|
console.log(`Runs (${jobs.length} active, max ${maxConcurrent}):`);
|
|
@@ -10674,14 +11064,6 @@ var doctorCommand = new Command2("doctor").description("Diagnose runner health,
|
|
|
10674
11064
|
});
|
|
10675
11065
|
}
|
|
10676
11066
|
}
|
|
10677
|
-
for (const tap of tapDevices) {
|
|
10678
|
-
const vmId = tap.replace("tap", "");
|
|
10679
|
-
if (!processVmIds.has(vmId) && !statusVmIds.has(vmId)) {
|
|
10680
|
-
warnings.push({
|
|
10681
|
-
message: `Orphan TAP device: ${tap} (no matching job or process)`
|
|
10682
|
-
});
|
|
10683
|
-
}
|
|
10684
|
-
}
|
|
10685
11067
|
for (const ws of workspaces) {
|
|
10686
11068
|
const vmId = ws.replace("vm0-", "");
|
|
10687
11069
|
if (!processVmIds.has(vmId) && !statusVmIds.has(vmId)) {
|
|
@@ -10746,7 +11128,7 @@ var killCommand = new Command3("kill").description("Force terminate a run and cl
|
|
|
10746
11128
|
const { vmId, runId } = resolveRunId(runIdArg, statusFilePath);
|
|
10747
11129
|
console.log(`Killing run ${vmId}...`);
|
|
10748
11130
|
const proc = findProcessByVmId(vmId);
|
|
10749
|
-
const
|
|
11131
|
+
const guestIp = getIPForVm(vmId);
|
|
10750
11132
|
const workspaceDir = join3(workspacesDir, `vm0-${vmId}`);
|
|
10751
11133
|
console.log("");
|
|
10752
11134
|
console.log("Resources to clean up:");
|
|
@@ -10755,7 +11137,9 @@ var killCommand = new Command3("kill").description("Force terminate a run and cl
|
|
|
10755
11137
|
} else {
|
|
10756
11138
|
console.log(" - Firecracker process: not found");
|
|
10757
11139
|
}
|
|
10758
|
-
|
|
11140
|
+
if (guestIp) {
|
|
11141
|
+
console.log(` - IP address: ${guestIp} (TAP/IP released by runner)`);
|
|
11142
|
+
}
|
|
10759
11143
|
console.log(` - Workspace: ${workspaceDir}`);
|
|
10760
11144
|
if (runId) {
|
|
10761
11145
|
console.log(` - status.json entry: ${runId.substring(0, 12)}...`);
|
|
@@ -10783,20 +11167,6 @@ var killCommand = new Command3("kill").description("Force terminate a run and cl
|
|
|
10783
11167
|
message: "Not running"
|
|
10784
11168
|
});
|
|
10785
11169
|
}
|
|
10786
|
-
try {
|
|
10787
|
-
await deleteTapDevice(tapDevice);
|
|
10788
|
-
results.push({
|
|
10789
|
-
step: "TAP device",
|
|
10790
|
-
success: true,
|
|
10791
|
-
message: `${tapDevice} deleted`
|
|
10792
|
-
});
|
|
10793
|
-
} catch (error) {
|
|
10794
|
-
results.push({
|
|
10795
|
-
step: "TAP device",
|
|
10796
|
-
success: false,
|
|
10797
|
-
message: error instanceof Error ? error.message : "Unknown error"
|
|
10798
|
-
});
|
|
10799
|
-
}
|
|
10800
11170
|
if (existsSync5(workspaceDir)) {
|
|
10801
11171
|
try {
|
|
10802
11172
|
rmSync(workspaceDir, { recursive: true, force: true });
|
|
@@ -10969,6 +11339,8 @@ var benchmarkCommand = new Command4("benchmark").description(
|
|
|
10969
11339
|
).argument("<prompt>", "The bash command to execute in the VM").option("--config <path>", "Config file path", "./runner.yaml").option("--working-dir <path>", "Working directory in VM", "/home/user").option("--agent-type <type>", "Agent type", "claude-code").action(async (prompt, options) => {
|
|
10970
11340
|
const timer = new Timer();
|
|
10971
11341
|
setGlobalLogger(timer.log.bind(timer));
|
|
11342
|
+
let exitCode = 1;
|
|
11343
|
+
let poolsInitialized = false;
|
|
10972
11344
|
try {
|
|
10973
11345
|
timer.log("Loading configuration...");
|
|
10974
11346
|
const config = loadDebugConfig(options.config);
|
|
@@ -10984,12 +11356,14 @@ var benchmarkCommand = new Command4("benchmark").description(
|
|
|
10984
11356
|
}
|
|
10985
11357
|
timer.log("Setting up network bridge...");
|
|
10986
11358
|
await setupBridge();
|
|
10987
|
-
timer.log("Initializing
|
|
11359
|
+
timer.log("Initializing pools...");
|
|
10988
11360
|
await initOverlayPool({
|
|
10989
11361
|
size: 2,
|
|
10990
11362
|
replenishThreshold: 1,
|
|
10991
11363
|
poolDir: dataPaths.overlayPool(config.data_dir)
|
|
10992
11364
|
});
|
|
11365
|
+
await initTapPool({ name: config.name, size: 2, replenishThreshold: 1 });
|
|
11366
|
+
poolsInitialized = true;
|
|
10993
11367
|
timer.log(`Executing command: ${prompt}`);
|
|
10994
11368
|
const context = createBenchmarkContext(prompt, options);
|
|
10995
11369
|
const result = await executeJob(context, config, {
|
|
@@ -11000,17 +11374,22 @@ var benchmarkCommand = new Command4("benchmark").description(
|
|
|
11000
11374
|
timer.log(`Error: ${result.error}`);
|
|
11001
11375
|
}
|
|
11002
11376
|
timer.log(`Total time: ${timer.totalSeconds().toFixed(1)}s`);
|
|
11003
|
-
|
|
11377
|
+
exitCode = result.exitCode;
|
|
11004
11378
|
} catch (error) {
|
|
11005
11379
|
timer.log(
|
|
11006
11380
|
`Error: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
11007
11381
|
);
|
|
11008
|
-
|
|
11382
|
+
} finally {
|
|
11383
|
+
if (poolsInitialized) {
|
|
11384
|
+
cleanupTapPool();
|
|
11385
|
+
cleanupOverlayPool();
|
|
11386
|
+
}
|
|
11009
11387
|
}
|
|
11388
|
+
process.exit(exitCode);
|
|
11010
11389
|
});
|
|
11011
11390
|
|
|
11012
11391
|
// src/index.ts
|
|
11013
|
-
var version = true ? "3.8.
|
|
11392
|
+
var version = true ? "3.8.1" : "0.1.0";
|
|
11014
11393
|
program.name("vm0-runner").version(version).description("Self-hosted runner for VM0 agents");
|
|
11015
11394
|
program.addCommand(startCommand);
|
|
11016
11395
|
program.addCommand(doctorCommand);
|