@vm0/runner 3.5.0 → 3.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +592 -556
- package/package.json +1 -6
package/index.js
CHANGED
|
@@ -450,7 +450,30 @@ import { exec } from "child_process";
|
|
|
450
450
|
import { promisify } from "util";
|
|
451
451
|
import * as fs2 from "fs";
|
|
452
452
|
import * as path from "path";
|
|
453
|
+
|
|
454
|
+
// src/lib/logger.ts
|
|
455
|
+
var _log = null;
|
|
456
|
+
var _error = null;
|
|
457
|
+
function getLog() {
|
|
458
|
+
return _log ?? console.log.bind(console);
|
|
459
|
+
}
|
|
460
|
+
function getError() {
|
|
461
|
+
return _error ?? console.error.bind(console);
|
|
462
|
+
}
|
|
463
|
+
function setGlobalLogger(log, error) {
|
|
464
|
+
_log = log;
|
|
465
|
+
_error = error ?? log;
|
|
466
|
+
}
|
|
467
|
+
function createLogger(prefix) {
|
|
468
|
+
return {
|
|
469
|
+
log: (message) => getLog()(`[${prefix}] ${message}`),
|
|
470
|
+
error: (message) => getError()(`[${prefix}] ${message}`)
|
|
471
|
+
};
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
// src/lib/firecracker/ip-pool.ts
|
|
453
475
|
var execAsync = promisify(exec);
|
|
476
|
+
var logger = createLogger("IP Pool");
|
|
454
477
|
var VM0_RUN_DIR = "/var/run/vm0";
|
|
455
478
|
var REGISTRY_FILE_PATH = path.join(VM0_RUN_DIR, "ip-registry.json");
|
|
456
479
|
var BRIDGE_NAME = "vm0br0";
|
|
@@ -564,8 +587,8 @@ function reconcileRegistry(registry, activeTaps) {
|
|
|
564
587
|
} else if (isWithinGracePeriod) {
|
|
565
588
|
reconciled.allocations[ip] = allocation;
|
|
566
589
|
} else {
|
|
567
|
-
|
|
568
|
-
`
|
|
590
|
+
logger.log(
|
|
591
|
+
`Removing stale allocation for ${ip} (TAP ${allocation.tapDevice} no longer exists)`
|
|
569
592
|
);
|
|
570
593
|
}
|
|
571
594
|
}
|
|
@@ -593,8 +616,8 @@ async function allocateIP(vmId) {
|
|
|
593
616
|
}
|
|
594
617
|
const allocatedCount = Object.keys(registry.allocations).length;
|
|
595
618
|
const allocatedIPs = Object.keys(registry.allocations).sort();
|
|
596
|
-
|
|
597
|
-
`
|
|
619
|
+
logger.log(
|
|
620
|
+
`Current state: ${allocatedCount} IPs allocated [${allocatedIPs.join(", ")}], assigning ${ip}`
|
|
598
621
|
);
|
|
599
622
|
registry.allocations[ip] = {
|
|
600
623
|
vmId,
|
|
@@ -602,7 +625,7 @@ async function allocateIP(vmId) {
|
|
|
602
625
|
allocatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
603
626
|
};
|
|
604
627
|
writeRegistry(registry);
|
|
605
|
-
|
|
628
|
+
logger.log(`Allocated ${ip} for VM ${vmId} (TAP ${tapDevice})`);
|
|
606
629
|
return ip;
|
|
607
630
|
});
|
|
608
631
|
}
|
|
@@ -613,42 +636,39 @@ async function releaseIP(ip) {
|
|
|
613
636
|
const allocation = registry.allocations[ip];
|
|
614
637
|
delete registry.allocations[ip];
|
|
615
638
|
writeRegistry(registry);
|
|
616
|
-
|
|
617
|
-
`[IP Pool] Released ${ip} (was allocated to VM ${allocation.vmId})`
|
|
618
|
-
);
|
|
639
|
+
logger.log(`Released ${ip} (was allocated to VM ${allocation.vmId})`);
|
|
619
640
|
} else {
|
|
620
|
-
|
|
641
|
+
logger.log(`IP ${ip} was not in registry, nothing to release`);
|
|
621
642
|
}
|
|
622
643
|
});
|
|
623
644
|
}
|
|
624
645
|
async function cleanupOrphanedAllocations() {
|
|
625
646
|
return withLock(async () => {
|
|
626
|
-
|
|
647
|
+
logger.log("Cleaning up orphaned allocations...");
|
|
627
648
|
const registry = readRegistry();
|
|
628
649
|
const beforeCount = Object.keys(registry.allocations).length;
|
|
629
650
|
if (beforeCount === 0) {
|
|
630
|
-
|
|
651
|
+
logger.log("No allocations in registry, nothing to clean up");
|
|
631
652
|
return;
|
|
632
653
|
}
|
|
633
654
|
const activeTaps = await scanTapDevices();
|
|
634
|
-
|
|
635
|
-
`[IP Pool] Found ${activeTaps.size} active TAP device(s) on bridge`
|
|
636
|
-
);
|
|
655
|
+
logger.log(`Found ${activeTaps.size} active TAP device(s) on bridge`);
|
|
637
656
|
const reconciled = reconcileRegistry(registry, activeTaps);
|
|
638
657
|
const afterCount = Object.keys(reconciled.allocations).length;
|
|
639
658
|
if (afterCount !== beforeCount) {
|
|
640
659
|
writeRegistry(reconciled);
|
|
641
|
-
|
|
642
|
-
`
|
|
660
|
+
logger.log(
|
|
661
|
+
`Cleaned up ${beforeCount - afterCount} orphaned allocation(s)`
|
|
643
662
|
);
|
|
644
663
|
} else {
|
|
645
|
-
|
|
664
|
+
logger.log("No orphaned allocations found");
|
|
646
665
|
}
|
|
647
666
|
});
|
|
648
667
|
}
|
|
649
668
|
|
|
650
669
|
// src/lib/firecracker/network.ts
|
|
651
670
|
var execAsync2 = promisify2(exec2);
|
|
671
|
+
var logger2 = createLogger("Network");
|
|
652
672
|
var BRIDGE_NAME2 = "vm0br0";
|
|
653
673
|
var BRIDGE_IP = "172.16.0.1";
|
|
654
674
|
var BRIDGE_NETMASK = "255.255.255.0";
|
|
@@ -700,28 +720,28 @@ async function getDefaultInterface() {
|
|
|
700
720
|
}
|
|
701
721
|
async function setupForwardRules() {
|
|
702
722
|
const extIface = await getDefaultInterface();
|
|
703
|
-
|
|
723
|
+
logger2.log(`Setting up FORWARD rules for ${BRIDGE_NAME2} <-> ${extIface}`);
|
|
704
724
|
try {
|
|
705
725
|
await execCommand(
|
|
706
726
|
`iptables -C FORWARD -i ${BRIDGE_NAME2} -o ${extIface} -j ACCEPT`
|
|
707
727
|
);
|
|
708
|
-
|
|
728
|
+
logger2.log("FORWARD outbound rule already exists");
|
|
709
729
|
} catch {
|
|
710
730
|
await execCommand(
|
|
711
731
|
`iptables -I FORWARD -i ${BRIDGE_NAME2} -o ${extIface} -j ACCEPT`
|
|
712
732
|
);
|
|
713
|
-
|
|
733
|
+
logger2.log("FORWARD outbound rule added");
|
|
714
734
|
}
|
|
715
735
|
try {
|
|
716
736
|
await execCommand(
|
|
717
737
|
`iptables -C FORWARD -i ${extIface} -o ${BRIDGE_NAME2} -m state --state RELATED,ESTABLISHED -j ACCEPT`
|
|
718
738
|
);
|
|
719
|
-
|
|
739
|
+
logger2.log("FORWARD inbound rule already exists");
|
|
720
740
|
} catch {
|
|
721
741
|
await execCommand(
|
|
722
742
|
`iptables -I FORWARD -i ${extIface} -o ${BRIDGE_NAME2} -m state --state RELATED,ESTABLISHED -j ACCEPT`
|
|
723
743
|
);
|
|
724
|
-
|
|
744
|
+
logger2.log("FORWARD inbound rule added");
|
|
725
745
|
}
|
|
726
746
|
}
|
|
727
747
|
async function bridgeExists() {
|
|
@@ -734,11 +754,11 @@ async function bridgeExists() {
|
|
|
734
754
|
}
|
|
735
755
|
async function setupBridge() {
|
|
736
756
|
if (await bridgeExists()) {
|
|
737
|
-
|
|
757
|
+
logger2.log(`Bridge ${BRIDGE_NAME2} already exists`);
|
|
738
758
|
await setupForwardRules();
|
|
739
759
|
return;
|
|
740
760
|
}
|
|
741
|
-
|
|
761
|
+
logger2.log(`Creating bridge ${BRIDGE_NAME2}...`);
|
|
742
762
|
await execCommand(`ip link add name ${BRIDGE_NAME2} type bridge`);
|
|
743
763
|
await execCommand(
|
|
744
764
|
`ip addr add ${BRIDGE_IP}/${BRIDGE_NETMASK} dev ${BRIDGE_NAME2}`
|
|
@@ -749,15 +769,15 @@ async function setupBridge() {
|
|
|
749
769
|
await execCommand(
|
|
750
770
|
`iptables -t nat -C POSTROUTING -s ${BRIDGE_CIDR} -j MASQUERADE`
|
|
751
771
|
);
|
|
752
|
-
|
|
772
|
+
logger2.log("NAT rule already exists");
|
|
753
773
|
} catch {
|
|
754
774
|
await execCommand(
|
|
755
775
|
`iptables -t nat -A POSTROUTING -s ${BRIDGE_CIDR} -j MASQUERADE`
|
|
756
776
|
);
|
|
757
|
-
|
|
777
|
+
logger2.log("NAT rule added");
|
|
758
778
|
}
|
|
759
779
|
await setupForwardRules();
|
|
760
|
-
|
|
780
|
+
logger2.log(`Bridge ${BRIDGE_NAME2} configured with IP ${BRIDGE_IP}`);
|
|
761
781
|
}
|
|
762
782
|
async function tapDeviceExists(tapDevice) {
|
|
763
783
|
try {
|
|
@@ -777,7 +797,7 @@ async function clearStaleIptablesRulesForIP(ip) {
|
|
|
777
797
|
if (rulesForIP.length === 0) {
|
|
778
798
|
return;
|
|
779
799
|
}
|
|
780
|
-
|
|
800
|
+
logger2.log(
|
|
781
801
|
`Clearing ${rulesForIP.length} stale iptables rule(s) for IP ${ip}`
|
|
782
802
|
);
|
|
783
803
|
for (const rule of rulesForIP) {
|
|
@@ -790,24 +810,27 @@ async function clearStaleIptablesRulesForIP(ip) {
|
|
|
790
810
|
} catch {
|
|
791
811
|
}
|
|
792
812
|
}
|
|
793
|
-
async function createTapDevice(vmId
|
|
794
|
-
const log = logger ?? console.log;
|
|
813
|
+
async function createTapDevice(vmId) {
|
|
795
814
|
const tapDevice = `tap${vmId.substring(0, 8)}`;
|
|
796
815
|
const guestMac = generateMacAddress(vmId);
|
|
797
816
|
const guestIp = await allocateIP(vmId);
|
|
798
|
-
log(`[VM ${vmId}] IP allocated: ${guestIp}`);
|
|
817
|
+
logger2.log(`[VM ${vmId}] IP allocated: ${guestIp}`);
|
|
799
818
|
await clearStaleIptablesRulesForIP(guestIp);
|
|
800
|
-
log(`[VM ${vmId}] Stale iptables cleared`);
|
|
819
|
+
logger2.log(`[VM ${vmId}] Stale iptables cleared`);
|
|
801
820
|
if (await tapDeviceExists(tapDevice)) {
|
|
802
|
-
log(
|
|
821
|
+
logger2.log(
|
|
822
|
+
`[VM ${vmId}] TAP device ${tapDevice} already exists, deleting...`
|
|
823
|
+
);
|
|
803
824
|
await deleteTapDevice(tapDevice);
|
|
804
825
|
}
|
|
805
826
|
await execCommand(`ip tuntap add ${tapDevice} mode tap`);
|
|
806
|
-
log(`[VM ${vmId}] TAP device created`);
|
|
827
|
+
logger2.log(`[VM ${vmId}] TAP device created`);
|
|
807
828
|
await execCommand(`ip link set ${tapDevice} master ${BRIDGE_NAME2}`);
|
|
808
|
-
log(`[VM ${vmId}] TAP added to bridge`);
|
|
829
|
+
logger2.log(`[VM ${vmId}] TAP added to bridge`);
|
|
809
830
|
await execCommand(`ip link set ${tapDevice} up`);
|
|
810
|
-
log(
|
|
831
|
+
logger2.log(
|
|
832
|
+
`[VM ${vmId}] TAP created: ${tapDevice}, MAC ${guestMac}, IP ${guestIp}`
|
|
833
|
+
);
|
|
811
834
|
return {
|
|
812
835
|
tapDevice,
|
|
813
836
|
guestMac,
|
|
@@ -818,15 +841,15 @@ async function createTapDevice(vmId, logger) {
|
|
|
818
841
|
}
|
|
819
842
|
async function deleteTapDevice(tapDevice, guestIp) {
|
|
820
843
|
if (!await tapDeviceExists(tapDevice)) {
|
|
821
|
-
|
|
844
|
+
logger2.log(`TAP device ${tapDevice} does not exist, skipping delete`);
|
|
822
845
|
} else {
|
|
823
846
|
await execCommand(`ip link delete ${tapDevice}`);
|
|
824
|
-
|
|
847
|
+
logger2.log(`TAP device ${tapDevice} deleted`);
|
|
825
848
|
}
|
|
826
849
|
if (guestIp) {
|
|
827
850
|
try {
|
|
828
851
|
await execCommand(`ip neigh del ${guestIp} dev ${BRIDGE_NAME2}`, true);
|
|
829
|
-
|
|
852
|
+
logger2.log(`ARP entry cleared for ${guestIp}`);
|
|
830
853
|
} catch {
|
|
831
854
|
}
|
|
832
855
|
}
|
|
@@ -857,53 +880,51 @@ function checkNetworkPrerequisites() {
|
|
|
857
880
|
errors
|
|
858
881
|
};
|
|
859
882
|
}
|
|
860
|
-
async function
|
|
861
|
-
const comment =
|
|
862
|
-
|
|
863
|
-
`Setting up proxy rules for
|
|
883
|
+
async function setupCIDRProxyRules(proxyPort) {
|
|
884
|
+
const comment = "vm0:cidr-proxy";
|
|
885
|
+
logger2.log(
|
|
886
|
+
`Setting up CIDR proxy rules for ${BRIDGE_CIDR} -> port ${proxyPort}`
|
|
864
887
|
);
|
|
865
888
|
try {
|
|
866
889
|
await execCommand(
|
|
867
|
-
`iptables -t nat -C PREROUTING -s ${
|
|
890
|
+
`iptables -t nat -C PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 80 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
|
|
868
891
|
);
|
|
869
|
-
|
|
892
|
+
logger2.log("CIDR proxy rule for port 80 already exists");
|
|
870
893
|
} catch {
|
|
871
894
|
await execCommand(
|
|
872
|
-
`iptables -t nat -A PREROUTING -s ${
|
|
895
|
+
`iptables -t nat -A PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 80 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
|
|
873
896
|
);
|
|
874
|
-
|
|
897
|
+
logger2.log("CIDR proxy rule for port 80 added");
|
|
875
898
|
}
|
|
876
899
|
try {
|
|
877
900
|
await execCommand(
|
|
878
|
-
`iptables -t nat -C PREROUTING -s ${
|
|
901
|
+
`iptables -t nat -C PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 443 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
|
|
879
902
|
);
|
|
880
|
-
|
|
903
|
+
logger2.log("CIDR proxy rule for port 443 already exists");
|
|
881
904
|
} catch {
|
|
882
905
|
await execCommand(
|
|
883
|
-
`iptables -t nat -A PREROUTING -s ${
|
|
906
|
+
`iptables -t nat -A PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 443 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
|
|
884
907
|
);
|
|
885
|
-
|
|
908
|
+
logger2.log("CIDR proxy rule for port 443 added");
|
|
886
909
|
}
|
|
887
|
-
console.log(`Proxy rules configured for VM ${vmIp}`);
|
|
888
910
|
}
|
|
889
|
-
async function
|
|
890
|
-
const comment =
|
|
891
|
-
|
|
911
|
+
async function cleanupCIDRProxyRules(proxyPort) {
|
|
912
|
+
const comment = "vm0:cidr-proxy";
|
|
913
|
+
logger2.log("Cleaning up CIDR proxy rules...");
|
|
892
914
|
try {
|
|
893
915
|
await execCommand(
|
|
894
|
-
`iptables -t nat -D PREROUTING -s ${
|
|
916
|
+
`iptables -t nat -D PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 80 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
|
|
895
917
|
);
|
|
896
|
-
|
|
918
|
+
logger2.log("CIDR proxy rule for port 80 removed");
|
|
897
919
|
} catch {
|
|
898
920
|
}
|
|
899
921
|
try {
|
|
900
922
|
await execCommand(
|
|
901
|
-
`iptables -t nat -D PREROUTING -s ${
|
|
923
|
+
`iptables -t nat -D PREROUTING -s ${BRIDGE_CIDR} -p tcp --dport 443 -j REDIRECT --to-port ${proxyPort} -m comment --comment "${comment}"`
|
|
902
924
|
);
|
|
903
|
-
|
|
925
|
+
logger2.log("CIDR proxy rule for port 443 removed");
|
|
904
926
|
} catch {
|
|
905
927
|
}
|
|
906
|
-
console.log(`Proxy rules cleanup complete for VM ${vmIp}`);
|
|
907
928
|
}
|
|
908
929
|
async function listTapDevices() {
|
|
909
930
|
try {
|
|
@@ -980,17 +1001,17 @@ async function findOrphanedIptablesRules(rules, activeVmIps, expectedProxyPort)
|
|
|
980
1001
|
return orphaned;
|
|
981
1002
|
}
|
|
982
1003
|
async function flushBridgeArpCache() {
|
|
983
|
-
|
|
1004
|
+
logger2.log(`Flushing ARP cache on bridge ${BRIDGE_NAME2}...`);
|
|
984
1005
|
try {
|
|
985
1006
|
if (!await bridgeExists()) {
|
|
986
|
-
|
|
1007
|
+
logger2.log("Bridge does not exist, skipping ARP flush");
|
|
987
1008
|
return;
|
|
988
1009
|
}
|
|
989
1010
|
const { stdout } = await execAsync2(
|
|
990
1011
|
`ip neigh show dev ${BRIDGE_NAME2} 2>/dev/null || true`
|
|
991
1012
|
);
|
|
992
1013
|
if (!stdout.trim()) {
|
|
993
|
-
|
|
1014
|
+
logger2.log("No ARP entries on bridge");
|
|
994
1015
|
return;
|
|
995
1016
|
}
|
|
996
1017
|
const lines = stdout.split("\n").filter((line) => line.trim());
|
|
@@ -1006,38 +1027,38 @@ async function flushBridgeArpCache() {
|
|
|
1006
1027
|
}
|
|
1007
1028
|
}
|
|
1008
1029
|
}
|
|
1009
|
-
|
|
1030
|
+
logger2.log(`Cleared ${cleared} ARP entries from bridge`);
|
|
1010
1031
|
} catch (error) {
|
|
1011
|
-
|
|
1032
|
+
logger2.log(
|
|
1012
1033
|
`Warning: Could not flush ARP cache: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1013
1034
|
);
|
|
1014
1035
|
}
|
|
1015
1036
|
}
|
|
1016
1037
|
async function cleanupOrphanedProxyRules(runnerName) {
|
|
1017
1038
|
const comment = `vm0:runner:${runnerName}`;
|
|
1018
|
-
|
|
1039
|
+
logger2.log(`Cleaning up orphaned proxy rules for runner '${runnerName}'...`);
|
|
1019
1040
|
try {
|
|
1020
1041
|
const rules = await execCommand("iptables -t nat -S PREROUTING", false);
|
|
1021
1042
|
const ourRules = rules.split("\n").filter((rule) => rule.includes(comment));
|
|
1022
1043
|
if (ourRules.length === 0) {
|
|
1023
|
-
|
|
1044
|
+
logger2.log("No orphaned proxy rules found");
|
|
1024
1045
|
return;
|
|
1025
1046
|
}
|
|
1026
|
-
|
|
1047
|
+
logger2.log(`Found ${ourRules.length} orphaned rule(s) to clean up`);
|
|
1027
1048
|
for (const rule of ourRules) {
|
|
1028
1049
|
const deleteRule = rule.replace("-A ", "-D ");
|
|
1029
1050
|
try {
|
|
1030
1051
|
await execCommand(`iptables -t nat ${deleteRule}`);
|
|
1031
|
-
|
|
1052
|
+
logger2.log(`Deleted orphaned rule: ${rule.substring(0, 80)}...`);
|
|
1032
1053
|
} catch {
|
|
1033
|
-
|
|
1054
|
+
logger2.log(
|
|
1034
1055
|
`Failed to delete rule (may already be gone): ${rule.substring(0, 80)}...`
|
|
1035
1056
|
);
|
|
1036
1057
|
}
|
|
1037
1058
|
}
|
|
1038
|
-
|
|
1059
|
+
logger2.log("Orphaned proxy rules cleanup complete");
|
|
1039
1060
|
} catch (error) {
|
|
1040
|
-
|
|
1061
|
+
logger2.log(
|
|
1041
1062
|
`Warning: Could not clean up orphaned rules: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1042
1063
|
);
|
|
1043
1064
|
}
|
|
@@ -1045,6 +1066,7 @@ async function cleanupOrphanedProxyRules(runnerName) {
|
|
|
1045
1066
|
|
|
1046
1067
|
// src/lib/firecracker/vm.ts
|
|
1047
1068
|
var execAsync3 = promisify3(exec3);
|
|
1069
|
+
var logger3 = createLogger("VM");
|
|
1048
1070
|
var FirecrackerVM = class {
|
|
1049
1071
|
config;
|
|
1050
1072
|
process = null;
|
|
@@ -1064,9 +1086,6 @@ var FirecrackerVM = class {
|
|
|
1064
1086
|
this.vmOverlayPath = path2.join(this.workDir, "overlay.ext4");
|
|
1065
1087
|
this.vsockPath = path2.join(this.workDir, "vsock.sock");
|
|
1066
1088
|
}
|
|
1067
|
-
log(msg) {
|
|
1068
|
-
(this.config.logger ?? console.log)(msg);
|
|
1069
|
-
}
|
|
1070
1089
|
/**
|
|
1071
1090
|
* Get current VM state
|
|
1072
1091
|
*/
|
|
@@ -1110,21 +1129,21 @@ var FirecrackerVM = class {
|
|
|
1110
1129
|
if (fs3.existsSync(this.socketPath)) {
|
|
1111
1130
|
fs3.unlinkSync(this.socketPath);
|
|
1112
1131
|
}
|
|
1113
|
-
|
|
1132
|
+
logger3.log(`[VM ${this.config.vmId}] Setting up overlay and network...`);
|
|
1114
1133
|
const createOverlay = async () => {
|
|
1115
1134
|
const overlaySize = 2 * 1024 * 1024 * 1024;
|
|
1116
1135
|
const fd = fs3.openSync(this.vmOverlayPath, "w");
|
|
1117
1136
|
fs3.ftruncateSync(fd, overlaySize);
|
|
1118
1137
|
fs3.closeSync(fd);
|
|
1119
1138
|
await execAsync3(`mkfs.ext4 -F -q "${this.vmOverlayPath}"`);
|
|
1120
|
-
|
|
1139
|
+
logger3.log(`[VM ${this.config.vmId}] Overlay created`);
|
|
1121
1140
|
};
|
|
1122
1141
|
const [, networkConfig] = await Promise.all([
|
|
1123
1142
|
createOverlay(),
|
|
1124
|
-
createTapDevice(this.config.vmId
|
|
1143
|
+
createTapDevice(this.config.vmId)
|
|
1125
1144
|
]);
|
|
1126
1145
|
this.networkConfig = networkConfig;
|
|
1127
|
-
|
|
1146
|
+
logger3.log(`[VM ${this.config.vmId}] Starting Firecracker...`);
|
|
1128
1147
|
this.process = spawn(
|
|
1129
1148
|
this.config.firecrackerBinary,
|
|
1130
1149
|
["--api-sock", this.socketPath],
|
|
@@ -1135,11 +1154,11 @@ var FirecrackerVM = class {
|
|
|
1135
1154
|
}
|
|
1136
1155
|
);
|
|
1137
1156
|
this.process.on("error", (err) => {
|
|
1138
|
-
|
|
1157
|
+
logger3.log(`[VM ${this.config.vmId}] Firecracker error: ${err}`);
|
|
1139
1158
|
this.state = "error";
|
|
1140
1159
|
});
|
|
1141
1160
|
this.process.on("exit", (code, signal) => {
|
|
1142
|
-
|
|
1161
|
+
logger3.log(
|
|
1143
1162
|
`[VM ${this.config.vmId}] Firecracker exited: code=${code}, signal=${signal}`
|
|
1144
1163
|
);
|
|
1145
1164
|
if (this.state !== "stopped") {
|
|
@@ -1152,7 +1171,7 @@ var FirecrackerVM = class {
|
|
|
1152
1171
|
});
|
|
1153
1172
|
stdoutRL.on("line", (line) => {
|
|
1154
1173
|
if (line.trim()) {
|
|
1155
|
-
|
|
1174
|
+
logger3.log(`[VM ${this.config.vmId}] ${line}`);
|
|
1156
1175
|
}
|
|
1157
1176
|
});
|
|
1158
1177
|
}
|
|
@@ -1162,19 +1181,19 @@ var FirecrackerVM = class {
|
|
|
1162
1181
|
});
|
|
1163
1182
|
stderrRL.on("line", (line) => {
|
|
1164
1183
|
if (line.trim()) {
|
|
1165
|
-
|
|
1184
|
+
logger3.log(`[VM ${this.config.vmId}] stderr: ${line}`);
|
|
1166
1185
|
}
|
|
1167
1186
|
});
|
|
1168
1187
|
}
|
|
1169
1188
|
this.client = new FirecrackerClient(this.socketPath);
|
|
1170
|
-
|
|
1189
|
+
logger3.log(`[VM ${this.config.vmId}] Waiting for API...`);
|
|
1171
1190
|
await this.client.waitUntilReady(1e4, 100);
|
|
1172
1191
|
this.state = "configuring";
|
|
1173
1192
|
await this.configure();
|
|
1174
|
-
|
|
1193
|
+
logger3.log(`[VM ${this.config.vmId}] Booting...`);
|
|
1175
1194
|
await this.client.start();
|
|
1176
1195
|
this.state = "running";
|
|
1177
|
-
|
|
1196
|
+
logger3.log(
|
|
1178
1197
|
`[VM ${this.config.vmId}] Running at ${this.networkConfig.guestIp}`
|
|
1179
1198
|
);
|
|
1180
1199
|
} catch (error) {
|
|
@@ -1190,7 +1209,7 @@ var FirecrackerVM = class {
|
|
|
1190
1209
|
if (!this.client || !this.networkConfig) {
|
|
1191
1210
|
throw new Error("VM not properly initialized");
|
|
1192
1211
|
}
|
|
1193
|
-
|
|
1212
|
+
logger3.log(
|
|
1194
1213
|
`[VM ${this.config.vmId}] Configuring: ${this.config.vcpus} vCPUs, ${this.config.memoryMb}MB RAM`
|
|
1195
1214
|
);
|
|
1196
1215
|
await this.client.setMachineConfig({
|
|
@@ -1199,26 +1218,28 @@ var FirecrackerVM = class {
|
|
|
1199
1218
|
});
|
|
1200
1219
|
const networkBootArgs = generateNetworkBootArgs(this.networkConfig);
|
|
1201
1220
|
const bootArgs = `console=ttyS0 reboot=k panic=1 pci=off nomodules random.trust_cpu=on quiet loglevel=0 nokaslr audit=0 numa=off mitigations=off noresume init=/sbin/vm-init ${networkBootArgs}`;
|
|
1202
|
-
|
|
1221
|
+
logger3.log(`[VM ${this.config.vmId}] Boot args: ${bootArgs}`);
|
|
1203
1222
|
await this.client.setBootSource({
|
|
1204
1223
|
kernel_image_path: this.config.kernelPath,
|
|
1205
1224
|
boot_args: bootArgs
|
|
1206
1225
|
});
|
|
1207
|
-
|
|
1226
|
+
logger3.log(
|
|
1227
|
+
`[VM ${this.config.vmId}] Base rootfs: ${this.config.rootfsPath}`
|
|
1228
|
+
);
|
|
1208
1229
|
await this.client.setDrive({
|
|
1209
1230
|
drive_id: "rootfs",
|
|
1210
1231
|
path_on_host: this.config.rootfsPath,
|
|
1211
1232
|
is_root_device: true,
|
|
1212
1233
|
is_read_only: true
|
|
1213
1234
|
});
|
|
1214
|
-
|
|
1235
|
+
logger3.log(`[VM ${this.config.vmId}] Overlay: ${this.vmOverlayPath}`);
|
|
1215
1236
|
await this.client.setDrive({
|
|
1216
1237
|
drive_id: "overlay",
|
|
1217
1238
|
path_on_host: this.vmOverlayPath,
|
|
1218
1239
|
is_root_device: false,
|
|
1219
1240
|
is_read_only: false
|
|
1220
1241
|
});
|
|
1221
|
-
|
|
1242
|
+
logger3.log(
|
|
1222
1243
|
`[VM ${this.config.vmId}] Network: ${this.networkConfig.tapDevice}`
|
|
1223
1244
|
);
|
|
1224
1245
|
await this.client.setNetworkInterface({
|
|
@@ -1226,7 +1247,7 @@ var FirecrackerVM = class {
|
|
|
1226
1247
|
guest_mac: this.networkConfig.guestMac,
|
|
1227
1248
|
host_dev_name: this.networkConfig.tapDevice
|
|
1228
1249
|
});
|
|
1229
|
-
|
|
1250
|
+
logger3.log(`[VM ${this.config.vmId}] Vsock: ${this.vsockPath}`);
|
|
1230
1251
|
await this.client.setVsock({
|
|
1231
1252
|
vsock_id: "vsock0",
|
|
1232
1253
|
guest_cid: 3,
|
|
@@ -1238,15 +1259,15 @@ var FirecrackerVM = class {
|
|
|
1238
1259
|
*/
|
|
1239
1260
|
async stop() {
|
|
1240
1261
|
if (this.state !== "running") {
|
|
1241
|
-
|
|
1262
|
+
logger3.log(`[VM ${this.config.vmId}] Not running, state: ${this.state}`);
|
|
1242
1263
|
return;
|
|
1243
1264
|
}
|
|
1244
1265
|
this.state = "stopping";
|
|
1245
|
-
|
|
1266
|
+
logger3.log(`[VM ${this.config.vmId}] Stopping...`);
|
|
1246
1267
|
try {
|
|
1247
1268
|
if (this.client) {
|
|
1248
1269
|
await this.client.sendCtrlAltDel().catch((error) => {
|
|
1249
|
-
|
|
1270
|
+
logger3.log(
|
|
1250
1271
|
`[VM ${this.config.vmId}] Graceful shutdown signal failed (VM may already be stopping): ${error instanceof Error ? error.message : error}`
|
|
1251
1272
|
);
|
|
1252
1273
|
});
|
|
@@ -1259,7 +1280,7 @@ var FirecrackerVM = class {
|
|
|
1259
1280
|
* Force kill the VM
|
|
1260
1281
|
*/
|
|
1261
1282
|
async kill() {
|
|
1262
|
-
|
|
1283
|
+
logger3.log(`[VM ${this.config.vmId}] Force killing...`);
|
|
1263
1284
|
await this.cleanup();
|
|
1264
1285
|
}
|
|
1265
1286
|
/**
|
|
@@ -1284,7 +1305,7 @@ var FirecrackerVM = class {
|
|
|
1284
1305
|
}
|
|
1285
1306
|
this.client = null;
|
|
1286
1307
|
this.state = "stopped";
|
|
1287
|
-
|
|
1308
|
+
logger3.log(`[VM ${this.config.vmId}] Stopped`);
|
|
1288
1309
|
}
|
|
1289
1310
|
/**
|
|
1290
1311
|
* Wait for the VM process to exit
|
|
@@ -1325,6 +1346,9 @@ var MSG_PING = 1;
|
|
|
1325
1346
|
var MSG_PONG = 2;
|
|
1326
1347
|
var MSG_EXEC = 3;
|
|
1327
1348
|
var MSG_WRITE_FILE = 5;
|
|
1349
|
+
var MSG_SPAWN_WATCH = 7;
|
|
1350
|
+
var MSG_SPAWN_WATCH_RESULT = 8;
|
|
1351
|
+
var MSG_PROCESS_EXIT = 9;
|
|
1328
1352
|
var MSG_ERROR = 255;
|
|
1329
1353
|
var FLAG_SUDO = 1;
|
|
1330
1354
|
function encode(type, seq, payload = Buffer.alloc(0)) {
|
|
@@ -1363,33 +1387,90 @@ function encodeWriteFilePayload(path6, content, sudo) {
|
|
|
1363
1387
|
return payload;
|
|
1364
1388
|
}
|
|
1365
1389
|
function decodeExecResult(payload) {
|
|
1366
|
-
if (payload.length <
|
|
1367
|
-
return {
|
|
1390
|
+
if (payload.length < 12) {
|
|
1391
|
+
return {
|
|
1392
|
+
exitCode: 1,
|
|
1393
|
+
stdout: "",
|
|
1394
|
+
stderr: "Invalid exec_result payload: too short"
|
|
1395
|
+
};
|
|
1368
1396
|
}
|
|
1369
1397
|
const exitCode = payload.readInt32BE(0);
|
|
1370
1398
|
const stdoutLen = payload.readUInt32BE(4);
|
|
1371
|
-
const stdout = payload.subarray(8, 8 + stdoutLen).toString("utf-8");
|
|
1372
1399
|
const stderrLenOffset = 8 + stdoutLen;
|
|
1400
|
+
if (payload.length < stderrLenOffset + 4) {
|
|
1401
|
+
return {
|
|
1402
|
+
exitCode: 1,
|
|
1403
|
+
stdout: "",
|
|
1404
|
+
stderr: "Invalid exec_result payload: stdout truncated"
|
|
1405
|
+
};
|
|
1406
|
+
}
|
|
1407
|
+
const stdout = payload.subarray(8, 8 + stdoutLen).toString("utf-8");
|
|
1373
1408
|
const stderrLen = payload.readUInt32BE(stderrLenOffset);
|
|
1409
|
+
const expectedLen = stderrLenOffset + 4 + stderrLen;
|
|
1410
|
+
if (payload.length < expectedLen) {
|
|
1411
|
+
return {
|
|
1412
|
+
exitCode: 1,
|
|
1413
|
+
stdout,
|
|
1414
|
+
stderr: "Invalid exec_result payload: stderr truncated"
|
|
1415
|
+
};
|
|
1416
|
+
}
|
|
1374
1417
|
const stderr = payload.subarray(stderrLenOffset + 4, stderrLenOffset + 4 + stderrLen).toString("utf-8");
|
|
1375
1418
|
return { exitCode, stdout, stderr };
|
|
1376
1419
|
}
|
|
1377
1420
|
function decodeWriteFileResult(payload) {
|
|
1378
1421
|
if (payload.length < 3) {
|
|
1379
|
-
return {
|
|
1422
|
+
return {
|
|
1423
|
+
success: false,
|
|
1424
|
+
error: "Invalid write_file_result payload: too short"
|
|
1425
|
+
};
|
|
1380
1426
|
}
|
|
1381
1427
|
const success = payload.readUInt8(0) === 1;
|
|
1382
1428
|
const errorLen = payload.readUInt16BE(1);
|
|
1429
|
+
if (payload.length < 3 + errorLen) {
|
|
1430
|
+
return {
|
|
1431
|
+
success: false,
|
|
1432
|
+
error: "Invalid write_file_result payload: error truncated"
|
|
1433
|
+
};
|
|
1434
|
+
}
|
|
1383
1435
|
const error = payload.subarray(3, 3 + errorLen).toString("utf-8");
|
|
1384
1436
|
return { success, error };
|
|
1385
1437
|
}
|
|
1386
1438
|
function decodeError(payload) {
|
|
1387
1439
|
if (payload.length < 2) {
|
|
1388
|
-
return "Invalid error payload";
|
|
1440
|
+
return "Invalid error payload: too short";
|
|
1389
1441
|
}
|
|
1390
1442
|
const errorLen = payload.readUInt16BE(0);
|
|
1443
|
+
if (payload.length < 2 + errorLen) {
|
|
1444
|
+
return "Invalid error payload: message truncated";
|
|
1445
|
+
}
|
|
1391
1446
|
return payload.subarray(2, 2 + errorLen).toString("utf-8");
|
|
1392
1447
|
}
|
|
1448
|
+
function decodeSpawnWatchResult(payload) {
|
|
1449
|
+
if (payload.length < 4) {
|
|
1450
|
+
throw new Error("Invalid spawn_watch_result payload");
|
|
1451
|
+
}
|
|
1452
|
+
return { pid: payload.readUInt32BE(0) };
|
|
1453
|
+
}
|
|
1454
|
+
function decodeProcessExit(payload) {
|
|
1455
|
+
if (payload.length < 16) {
|
|
1456
|
+
throw new Error("Invalid process_exit payload: too short");
|
|
1457
|
+
}
|
|
1458
|
+
const pid = payload.readUInt32BE(0);
|
|
1459
|
+
const exitCode = payload.readInt32BE(4);
|
|
1460
|
+
const stdoutLen = payload.readUInt32BE(8);
|
|
1461
|
+
const stderrLenOffset = 12 + stdoutLen;
|
|
1462
|
+
if (payload.length < stderrLenOffset + 4) {
|
|
1463
|
+
throw new Error("Invalid process_exit payload: stdout truncated");
|
|
1464
|
+
}
|
|
1465
|
+
const stdout = payload.subarray(12, 12 + stdoutLen).toString("utf-8");
|
|
1466
|
+
const stderrLen = payload.readUInt32BE(stderrLenOffset);
|
|
1467
|
+
const expectedLen = stderrLenOffset + 4 + stderrLen;
|
|
1468
|
+
if (payload.length < expectedLen) {
|
|
1469
|
+
throw new Error("Invalid process_exit payload: stderr truncated");
|
|
1470
|
+
}
|
|
1471
|
+
const stderr = payload.subarray(stderrLenOffset + 4, stderrLenOffset + 4 + stderrLen).toString("utf-8");
|
|
1472
|
+
return { pid, exitCode, stdout, stderr };
|
|
1473
|
+
}
|
|
1393
1474
|
var Decoder = class {
|
|
1394
1475
|
buf = Buffer.alloc(0);
|
|
1395
1476
|
decode(data) {
|
|
@@ -1421,6 +1502,9 @@ var VsockClient = class {
|
|
|
1421
1502
|
connected = false;
|
|
1422
1503
|
nextSeq = 1;
|
|
1423
1504
|
pendingRequests = /* @__PURE__ */ new Map();
|
|
1505
|
+
pendingExits = /* @__PURE__ */ new Map();
|
|
1506
|
+
// Cache for exit events that arrive before waitForExit is called
|
|
1507
|
+
cachedExits = /* @__PURE__ */ new Map();
|
|
1424
1508
|
constructor(vsockPath) {
|
|
1425
1509
|
this.vsockPath = vsockPath;
|
|
1426
1510
|
}
|
|
@@ -1437,6 +1521,21 @@ var VsockClient = class {
|
|
|
1437
1521
|
* Handle incoming message and route to pending request
|
|
1438
1522
|
*/
|
|
1439
1523
|
handleMessage(msg) {
|
|
1524
|
+
if (msg.type === MSG_PROCESS_EXIT && msg.seq === 0) {
|
|
1525
|
+
const event = decodeProcessExit(msg.payload);
|
|
1526
|
+
const pending2 = this.pendingExits.get(event.pid);
|
|
1527
|
+
if (pending2) {
|
|
1528
|
+
if (pending2.timeout) clearTimeout(pending2.timeout);
|
|
1529
|
+
this.pendingExits.delete(event.pid);
|
|
1530
|
+
pending2.resolve(event);
|
|
1531
|
+
} else if (!this.cachedExits.has(event.pid)) {
|
|
1532
|
+
this.cachedExits.set(event.pid, {
|
|
1533
|
+
event,
|
|
1534
|
+
timestamp: Date.now()
|
|
1535
|
+
});
|
|
1536
|
+
}
|
|
1537
|
+
return;
|
|
1538
|
+
}
|
|
1440
1539
|
const pending = this.pendingRequests.get(msg.seq);
|
|
1441
1540
|
if (pending) {
|
|
1442
1541
|
clearTimeout(pending.timeout);
|
|
@@ -1640,7 +1739,11 @@ var VsockClient = class {
|
|
|
1640
1739
|
this.connected = true;
|
|
1641
1740
|
resolve();
|
|
1642
1741
|
} else if (state === 2 /* Connected */) {
|
|
1643
|
-
|
|
1742
|
+
try {
|
|
1743
|
+
this.handleMessage(msg);
|
|
1744
|
+
} catch (msgErr) {
|
|
1745
|
+
console.error(`[vsock] Error handling message: ${msgErr}`);
|
|
1746
|
+
}
|
|
1644
1747
|
}
|
|
1645
1748
|
}
|
|
1646
1749
|
} catch (e) {
|
|
@@ -1656,12 +1759,19 @@ var VsockClient = class {
|
|
|
1656
1759
|
}
|
|
1657
1760
|
this.connected = false;
|
|
1658
1761
|
this.socket = null;
|
|
1659
|
-
const
|
|
1762
|
+
const pendingReqs = Array.from(this.pendingRequests.values());
|
|
1660
1763
|
this.pendingRequests.clear();
|
|
1661
|
-
for (const req of
|
|
1764
|
+
for (const req of pendingReqs) {
|
|
1662
1765
|
clearTimeout(req.timeout);
|
|
1663
1766
|
req.reject(new Error("Connection closed"));
|
|
1664
1767
|
}
|
|
1768
|
+
const pendingExits = Array.from(this.pendingExits.values());
|
|
1769
|
+
this.pendingExits.clear();
|
|
1770
|
+
for (const exit of pendingExits) {
|
|
1771
|
+
if (exit.timeout) clearTimeout(exit.timeout);
|
|
1772
|
+
exit.reject(new Error("Connection closed"));
|
|
1773
|
+
}
|
|
1774
|
+
this.cachedExits.clear();
|
|
1665
1775
|
});
|
|
1666
1776
|
});
|
|
1667
1777
|
server.listen(listenerPath, () => {
|
|
@@ -1681,6 +1791,59 @@ var VsockClient = class {
|
|
|
1681
1791
|
const result = await this.exec(`test -e '${remotePath}'`);
|
|
1682
1792
|
return result.exitCode === 0;
|
|
1683
1793
|
}
|
|
1794
|
+
/**
|
|
1795
|
+
* Spawn a process and monitor for exit (event-driven mode)
|
|
1796
|
+
*
|
|
1797
|
+
* Returns immediately with the PID. Use waitForExit() to wait for completion.
|
|
1798
|
+
* When the process exits, the agent sends an unsolicited notification.
|
|
1799
|
+
*/
|
|
1800
|
+
async spawnAndWatch(command, timeoutMs = 0) {
|
|
1801
|
+
const payload = encodeExecPayload(command, timeoutMs);
|
|
1802
|
+
const response = await this.request(
|
|
1803
|
+
MSG_SPAWN_WATCH,
|
|
1804
|
+
payload,
|
|
1805
|
+
3e4
|
|
1806
|
+
// 30s timeout for spawn acknowledgment
|
|
1807
|
+
);
|
|
1808
|
+
if (response.type === MSG_ERROR) {
|
|
1809
|
+
throw new Error(`spawnAndWatch failed: ${decodeError(response.payload)}`);
|
|
1810
|
+
}
|
|
1811
|
+
if (response.type !== MSG_SPAWN_WATCH_RESULT) {
|
|
1812
|
+
throw new Error(
|
|
1813
|
+
`Unexpected response type: 0x${response.type.toString(16)}`
|
|
1814
|
+
);
|
|
1815
|
+
}
|
|
1816
|
+
return decodeSpawnWatchResult(response.payload);
|
|
1817
|
+
}
|
|
1818
|
+
/**
|
|
1819
|
+
* Wait for a spawned process to exit
|
|
1820
|
+
*
|
|
1821
|
+
* Blocks until the process exits or timeout is reached.
|
|
1822
|
+
* The exit event is pushed by the guest agent (no polling).
|
|
1823
|
+
*/
|
|
1824
|
+
async waitForExit(pid, timeoutMs = 0) {
|
|
1825
|
+
if (!this.connected || !this.socket) {
|
|
1826
|
+
throw new Error("Not connected - cannot wait for process exit");
|
|
1827
|
+
}
|
|
1828
|
+
if (this.pendingExits.has(pid)) {
|
|
1829
|
+
throw new Error(`Already waiting for process ${pid} to exit`);
|
|
1830
|
+
}
|
|
1831
|
+
const cached = this.cachedExits.get(pid);
|
|
1832
|
+
if (cached) {
|
|
1833
|
+
this.cachedExits.delete(pid);
|
|
1834
|
+
return cached.event;
|
|
1835
|
+
}
|
|
1836
|
+
return new Promise((resolve, reject) => {
|
|
1837
|
+
const pending = { resolve, reject };
|
|
1838
|
+
if (timeoutMs > 0) {
|
|
1839
|
+
pending.timeout = setTimeout(() => {
|
|
1840
|
+
this.pendingExits.delete(pid);
|
|
1841
|
+
reject(new Error(`Timeout waiting for process ${pid} to exit`));
|
|
1842
|
+
}, timeoutMs);
|
|
1843
|
+
}
|
|
1844
|
+
this.pendingExits.set(pid, pending);
|
|
1845
|
+
});
|
|
1846
|
+
}
|
|
1684
1847
|
/**
|
|
1685
1848
|
* Get the vsock path (for logging/debugging)
|
|
1686
1849
|
*/
|
|
@@ -1696,12 +1859,19 @@ var VsockClient = class {
|
|
|
1696
1859
|
this.socket = null;
|
|
1697
1860
|
}
|
|
1698
1861
|
this.connected = false;
|
|
1699
|
-
const
|
|
1862
|
+
const pendingRequests = Array.from(this.pendingRequests.values());
|
|
1700
1863
|
this.pendingRequests.clear();
|
|
1701
|
-
for (const req of
|
|
1864
|
+
for (const req of pendingRequests) {
|
|
1702
1865
|
clearTimeout(req.timeout);
|
|
1703
1866
|
req.reject(new Error("Connection closed"));
|
|
1704
1867
|
}
|
|
1868
|
+
const pendingExits = Array.from(this.pendingExits.values());
|
|
1869
|
+
this.pendingExits.clear();
|
|
1870
|
+
for (const exit of pendingExits) {
|
|
1871
|
+
if (exit.timeout) clearTimeout(exit.timeout);
|
|
1872
|
+
exit.reject(new Error("Connection closed"));
|
|
1873
|
+
}
|
|
1874
|
+
this.cachedExits.clear();
|
|
1705
1875
|
}
|
|
1706
1876
|
};
|
|
1707
1877
|
|
|
@@ -7114,7 +7284,7 @@ var credentialsByNameContract = c10.router({
|
|
|
7114
7284
|
name: credentialNameSchema
|
|
7115
7285
|
}),
|
|
7116
7286
|
responses: {
|
|
7117
|
-
204:
|
|
7287
|
+
204: c10.noBody(),
|
|
7118
7288
|
401: apiErrorSchema,
|
|
7119
7289
|
404: apiErrorSchema,
|
|
7120
7290
|
500: apiErrorSchema
|
|
@@ -7210,7 +7380,7 @@ var modelProvidersByTypeContract = c11.router({
|
|
|
7210
7380
|
type: modelProviderTypeSchema
|
|
7211
7381
|
}),
|
|
7212
7382
|
responses: {
|
|
7213
|
-
204:
|
|
7383
|
+
204: c11.noBody(),
|
|
7214
7384
|
401: apiErrorSchema,
|
|
7215
7385
|
404: apiErrorSchema,
|
|
7216
7386
|
500: apiErrorSchema
|
|
@@ -7495,7 +7665,7 @@ var schedulesByNameContract = c13.router({
|
|
|
7495
7665
|
composeId: z19.string().uuid("Compose ID required")
|
|
7496
7666
|
}),
|
|
7497
7667
|
responses: {
|
|
7498
|
-
204:
|
|
7668
|
+
204: c13.noBody(),
|
|
7499
7669
|
401: apiErrorSchema,
|
|
7500
7670
|
404: apiErrorSchema
|
|
7501
7671
|
},
|
|
@@ -8340,6 +8510,7 @@ var ENV_LOADER_PATH = "/usr/local/bin/vm0-agent/env-loader.mjs";
|
|
|
8340
8510
|
|
|
8341
8511
|
// src/lib/proxy/vm-registry.ts
|
|
8342
8512
|
import fs5 from "fs";
|
|
8513
|
+
var logger4 = createLogger("VMRegistry");
|
|
8343
8514
|
var DEFAULT_REGISTRY_PATH = "/tmp/vm0-vm-registry.json";
|
|
8344
8515
|
var VMRegistry = class {
|
|
8345
8516
|
registryPath;
|
|
@@ -8386,8 +8557,8 @@ var VMRegistry = class {
|
|
|
8386
8557
|
this.save();
|
|
8387
8558
|
const firewallInfo = options?.firewallRules ? ` with ${options.firewallRules.length} firewall rules` : "";
|
|
8388
8559
|
const mitmInfo = options?.mitmEnabled ? ", MITM enabled" : "";
|
|
8389
|
-
|
|
8390
|
-
`
|
|
8560
|
+
logger4.log(
|
|
8561
|
+
`Registered VM ${vmIp} for run ${runId}${firewallInfo}${mitmInfo}`
|
|
8391
8562
|
);
|
|
8392
8563
|
}
|
|
8393
8564
|
/**
|
|
@@ -8398,9 +8569,7 @@ var VMRegistry = class {
|
|
|
8398
8569
|
const registration = this.data.vms[vmIp];
|
|
8399
8570
|
delete this.data.vms[vmIp];
|
|
8400
8571
|
this.save();
|
|
8401
|
-
|
|
8402
|
-
`[VMRegistry] Unregistered VM ${vmIp} (run ${registration.runId})`
|
|
8403
|
-
);
|
|
8572
|
+
logger4.log(`Unregistered VM ${vmIp} (run ${registration.runId})`);
|
|
8404
8573
|
}
|
|
8405
8574
|
}
|
|
8406
8575
|
/**
|
|
@@ -8421,7 +8590,7 @@ var VMRegistry = class {
|
|
|
8421
8590
|
clear() {
|
|
8422
8591
|
this.data.vms = {};
|
|
8423
8592
|
this.save();
|
|
8424
|
-
|
|
8593
|
+
logger4.log("Cleared all registrations");
|
|
8425
8594
|
}
|
|
8426
8595
|
/**
|
|
8427
8596
|
* Get the path to the registry file
|
|
@@ -8654,7 +8823,10 @@ def tls_clienthello(data: tls.ClientHelloData) -> None:
|
|
|
8654
8823
|
|
|
8655
8824
|
vm_info = get_vm_info(client_ip)
|
|
8656
8825
|
if not vm_info:
|
|
8657
|
-
|
|
8826
|
+
# Not a registered VM - pass through without MITM interception
|
|
8827
|
+
# This is critical for CIDR-based rules where all VM traffic is redirected
|
|
8828
|
+
data.ignore_connection = True
|
|
8829
|
+
return
|
|
8658
8830
|
|
|
8659
8831
|
# If MITM is enabled, let the normal flow handle it
|
|
8660
8832
|
if vm_info.get("mitmEnabled", False):
|
|
@@ -8928,6 +9100,7 @@ addons = [tls_clienthello, request, response]
|
|
|
8928
9100
|
`;
|
|
8929
9101
|
|
|
8930
9102
|
// src/lib/proxy/proxy-manager.ts
|
|
9103
|
+
var logger5 = createLogger("ProxyManager");
|
|
8931
9104
|
var DEFAULT_PROXY_OPTIONS = {
|
|
8932
9105
|
port: 8080,
|
|
8933
9106
|
registryPath: DEFAULT_REGISTRY_PATH,
|
|
@@ -8972,9 +9145,7 @@ var ProxyManager = class {
|
|
|
8972
9145
|
fs6.writeFileSync(this.config.addonPath, RUNNER_MITM_ADDON_SCRIPT, {
|
|
8973
9146
|
mode: 493
|
|
8974
9147
|
});
|
|
8975
|
-
|
|
8976
|
-
`[ProxyManager] Addon script written to ${this.config.addonPath}`
|
|
8977
|
-
);
|
|
9148
|
+
logger5.log(`Addon script written to ${this.config.addonPath}`);
|
|
8978
9149
|
}
|
|
8979
9150
|
/**
|
|
8980
9151
|
* Validate proxy configuration
|
|
@@ -8994,7 +9165,7 @@ var ProxyManager = class {
|
|
|
8994
9165
|
*/
|
|
8995
9166
|
async start() {
|
|
8996
9167
|
if (this.isRunning) {
|
|
8997
|
-
|
|
9168
|
+
logger5.log("Proxy already running");
|
|
8998
9169
|
return;
|
|
8999
9170
|
}
|
|
9000
9171
|
const mitmproxyInstalled = await this.checkMitmproxyInstalled();
|
|
@@ -9005,11 +9176,11 @@ var ProxyManager = class {
|
|
|
9005
9176
|
}
|
|
9006
9177
|
this.validateConfig();
|
|
9007
9178
|
getVMRegistry();
|
|
9008
|
-
|
|
9009
|
-
|
|
9010
|
-
|
|
9011
|
-
|
|
9012
|
-
|
|
9179
|
+
logger5.log("Starting mitmproxy...");
|
|
9180
|
+
logger5.log(` Port: ${this.config.port}`);
|
|
9181
|
+
logger5.log(` CA Dir: ${this.config.caDir}`);
|
|
9182
|
+
logger5.log(` Addon: ${this.config.addonPath}`);
|
|
9183
|
+
logger5.log(` Registry: ${this.config.registryPath}`);
|
|
9013
9184
|
const args = [
|
|
9014
9185
|
"--mode",
|
|
9015
9186
|
"transparent",
|
|
@@ -9031,25 +9202,26 @@ var ProxyManager = class {
|
|
|
9031
9202
|
stdio: ["ignore", "pipe", "pipe"],
|
|
9032
9203
|
detached: false
|
|
9033
9204
|
});
|
|
9205
|
+
const mitmLogger = createLogger("mitmproxy");
|
|
9034
9206
|
this.process.stdout?.on("data", (data) => {
|
|
9035
|
-
|
|
9207
|
+
mitmLogger.log(data.toString().trim());
|
|
9036
9208
|
});
|
|
9037
9209
|
this.process.stderr?.on("data", (data) => {
|
|
9038
|
-
|
|
9210
|
+
mitmLogger.log(data.toString().trim());
|
|
9039
9211
|
});
|
|
9040
9212
|
this.process.on("close", (code) => {
|
|
9041
|
-
|
|
9213
|
+
logger5.log(`mitmproxy exited with code ${code}`);
|
|
9042
9214
|
this.isRunning = false;
|
|
9043
9215
|
this.process = null;
|
|
9044
9216
|
});
|
|
9045
9217
|
this.process.on("error", (err) => {
|
|
9046
|
-
|
|
9218
|
+
logger5.error(`mitmproxy error: ${err.message}`);
|
|
9047
9219
|
this.isRunning = false;
|
|
9048
9220
|
this.process = null;
|
|
9049
9221
|
});
|
|
9050
9222
|
await this.waitForReady();
|
|
9051
9223
|
this.isRunning = true;
|
|
9052
|
-
|
|
9224
|
+
logger5.log("mitmproxy started successfully");
|
|
9053
9225
|
}
|
|
9054
9226
|
/**
|
|
9055
9227
|
* Wait for proxy to be ready
|
|
@@ -9075,24 +9247,24 @@ var ProxyManager = class {
|
|
|
9075
9247
|
*/
|
|
9076
9248
|
async stop() {
|
|
9077
9249
|
if (!this.process || !this.isRunning) {
|
|
9078
|
-
|
|
9250
|
+
logger5.log("Proxy not running");
|
|
9079
9251
|
return;
|
|
9080
9252
|
}
|
|
9081
|
-
|
|
9253
|
+
logger5.log("Stopping mitmproxy...");
|
|
9082
9254
|
return new Promise((resolve) => {
|
|
9083
9255
|
if (!this.process) {
|
|
9084
9256
|
resolve();
|
|
9085
9257
|
return;
|
|
9086
9258
|
}
|
|
9087
9259
|
const timeout = setTimeout(() => {
|
|
9088
|
-
|
|
9260
|
+
logger5.log("Force killing mitmproxy...");
|
|
9089
9261
|
this.process?.kill("SIGKILL");
|
|
9090
9262
|
}, 5e3);
|
|
9091
9263
|
this.process.on("close", () => {
|
|
9092
9264
|
clearTimeout(timeout);
|
|
9093
9265
|
this.isRunning = false;
|
|
9094
9266
|
this.process = null;
|
|
9095
|
-
|
|
9267
|
+
logger5.log("mitmproxy stopped");
|
|
9096
9268
|
resolve();
|
|
9097
9269
|
});
|
|
9098
9270
|
this.process.kill("SIGTERM");
|
|
@@ -9125,171 +9297,80 @@ function initProxyManager(config) {
|
|
|
9125
9297
|
return globalProxyManager;
|
|
9126
9298
|
}
|
|
9127
9299
|
|
|
9128
|
-
// src/lib/metrics/
|
|
9129
|
-
|
|
9130
|
-
|
|
9131
|
-
|
|
9132
|
-
|
|
9133
|
-
|
|
9134
|
-
|
|
9135
|
-
|
|
9136
|
-
|
|
9137
|
-
var meterProvider = null;
|
|
9138
|
-
var initialized = false;
|
|
9139
|
-
var enabled = false;
|
|
9140
|
-
var _runnerLabel = "";
|
|
9141
|
-
function initMetrics(config) {
|
|
9142
|
-
if (initialized) return;
|
|
9143
|
-
initialized = true;
|
|
9144
|
-
_runnerLabel = config.runnerLabel;
|
|
9145
|
-
if (!config.axiomToken) {
|
|
9146
|
-
console.log("[metrics] AXIOM_TOKEN not configured, metrics disabled");
|
|
9147
|
-
return;
|
|
9148
|
-
}
|
|
9149
|
-
const env = config.environment ?? "dev";
|
|
9150
|
-
const exporter = new OTLPMetricExporter({
|
|
9151
|
-
url: "https://api.axiom.co/v1/metrics",
|
|
9152
|
-
headers: {
|
|
9153
|
-
Authorization: `Bearer ${config.axiomToken}`,
|
|
9154
|
-
"X-Axiom-Dataset": `vm0-sandbox-op-log-${env}`
|
|
9155
|
-
}
|
|
9156
|
-
});
|
|
9157
|
-
meterProvider = new MeterProvider({
|
|
9158
|
-
resource: new Resource({
|
|
9159
|
-
[ATTR_SERVICE_NAME]: config.serviceName,
|
|
9160
|
-
"deployment.environment": env,
|
|
9161
|
-
"runner.label": config.runnerLabel
|
|
9162
|
-
}),
|
|
9163
|
-
readers: [
|
|
9164
|
-
new PeriodicExportingMetricReader({
|
|
9165
|
-
exporter,
|
|
9166
|
-
exportIntervalMillis: config.exportIntervalMs ?? 3e4
|
|
9167
|
-
})
|
|
9168
|
-
]
|
|
9169
|
-
});
|
|
9170
|
-
metrics.setGlobalMeterProvider(meterProvider);
|
|
9171
|
-
enabled = true;
|
|
9172
|
-
console.log(
|
|
9173
|
-
`[metrics] initialized for ${config.serviceName} (${env}), runner: ${config.runnerLabel}`
|
|
9174
|
-
);
|
|
9175
|
-
}
|
|
9176
|
-
function isMetricsEnabled() {
|
|
9177
|
-
return enabled;
|
|
9178
|
-
}
|
|
9179
|
-
function getRunnerLabel() {
|
|
9180
|
-
return _runnerLabel;
|
|
9181
|
-
}
|
|
9182
|
-
function getMeter(name) {
|
|
9183
|
-
return metrics.getMeter(name);
|
|
9300
|
+
// src/lib/metrics/instruments.ts
|
|
9301
|
+
var FLUSH_THRESHOLD_MS = 3e4;
|
|
9302
|
+
var sandboxContext = null;
|
|
9303
|
+
var pendingOps = [];
|
|
9304
|
+
var oldestPendingTime = null;
|
|
9305
|
+
function setSandboxContext(ctx) {
|
|
9306
|
+
sandboxContext = ctx;
|
|
9307
|
+
pendingOps = [];
|
|
9308
|
+
oldestPendingTime = null;
|
|
9184
9309
|
}
|
|
9185
|
-
async function
|
|
9186
|
-
|
|
9187
|
-
|
|
9310
|
+
async function clearSandboxContext() {
|
|
9311
|
+
const ctx = sandboxContext;
|
|
9312
|
+
const ops = pendingOps;
|
|
9313
|
+
sandboxContext = null;
|
|
9314
|
+
pendingOps = [];
|
|
9315
|
+
oldestPendingTime = null;
|
|
9316
|
+
if (ctx && ops.length > 0) {
|
|
9317
|
+
await flushOpsWithContext(ctx, ops);
|
|
9188
9318
|
}
|
|
9189
9319
|
}
|
|
9190
|
-
async function
|
|
9191
|
-
if (
|
|
9192
|
-
|
|
9193
|
-
|
|
9320
|
+
async function flushOps() {
|
|
9321
|
+
if (!sandboxContext || pendingOps.length === 0) return;
|
|
9322
|
+
const ctx = sandboxContext;
|
|
9323
|
+
const ops = pendingOps;
|
|
9324
|
+
pendingOps = [];
|
|
9325
|
+
oldestPendingTime = null;
|
|
9326
|
+
await flushOpsWithContext(ctx, ops);
|
|
9194
9327
|
}
|
|
9195
|
-
|
|
9196
|
-
|
|
9197
|
-
|
|
9198
|
-
|
|
9199
|
-
|
|
9200
|
-
var sandboxOperationTotal = null;
|
|
9201
|
-
var sandboxOperationErrorsTotal = null;
|
|
9202
|
-
var sandboxOperationDuration = null;
|
|
9203
|
-
function getRunnerInstruments() {
|
|
9204
|
-
if (!runnerOperationTotal) {
|
|
9205
|
-
const meter = getMeter("vm0-runner");
|
|
9206
|
-
runnerOperationTotal = meter.createCounter("runner_operation_total", {
|
|
9207
|
-
description: "Total number of runner operations"
|
|
9208
|
-
});
|
|
9209
|
-
runnerOperationErrorsTotal = meter.createCounter(
|
|
9210
|
-
"runner_operation_errors_total",
|
|
9211
|
-
{
|
|
9212
|
-
description: "Total number of runner operation errors"
|
|
9213
|
-
}
|
|
9214
|
-
);
|
|
9215
|
-
runnerOperationDuration = meter.createHistogram(
|
|
9216
|
-
"runner_operation_duration_ms",
|
|
9217
|
-
{
|
|
9218
|
-
description: "Runner operation duration in milliseconds",
|
|
9219
|
-
unit: "ms"
|
|
9220
|
-
}
|
|
9221
|
-
);
|
|
9222
|
-
}
|
|
9223
|
-
return {
|
|
9224
|
-
runnerOperationTotal,
|
|
9225
|
-
runnerOperationErrorsTotal,
|
|
9226
|
-
runnerOperationDuration
|
|
9328
|
+
async function flushOpsWithContext(ctx, ops) {
|
|
9329
|
+
const { apiUrl, runId, sandboxToken } = ctx;
|
|
9330
|
+
const headers = {
|
|
9331
|
+
Authorization: `Bearer ${sandboxToken}`,
|
|
9332
|
+
"Content-Type": "application/json"
|
|
9227
9333
|
};
|
|
9228
|
-
|
|
9229
|
-
|
|
9230
|
-
|
|
9231
|
-
|
|
9232
|
-
|
|
9233
|
-
|
|
9334
|
+
const bypassSecret = process.env.VERCEL_AUTOMATION_BYPASS_SECRET;
|
|
9335
|
+
if (bypassSecret) {
|
|
9336
|
+
headers["x-vercel-protection-bypass"] = bypassSecret;
|
|
9337
|
+
}
|
|
9338
|
+
try {
|
|
9339
|
+
const response = await fetch(`${apiUrl}/api/webhooks/agent/telemetry`, {
|
|
9340
|
+
method: "POST",
|
|
9341
|
+
headers,
|
|
9342
|
+
body: JSON.stringify({
|
|
9343
|
+
runId,
|
|
9344
|
+
sandboxOperations: ops
|
|
9345
|
+
})
|
|
9234
9346
|
});
|
|
9235
|
-
|
|
9236
|
-
|
|
9237
|
-
|
|
9238
|
-
|
|
9239
|
-
|
|
9240
|
-
|
|
9241
|
-
|
|
9242
|
-
|
|
9243
|
-
{
|
|
9244
|
-
description: "Sandbox operation duration in milliseconds",
|
|
9245
|
-
unit: "ms"
|
|
9246
|
-
}
|
|
9247
|
-
);
|
|
9347
|
+
await response.text();
|
|
9348
|
+
if (!response.ok) {
|
|
9349
|
+
console.warn(
|
|
9350
|
+
`[metrics] Failed to flush operations: HTTP ${response.status}`
|
|
9351
|
+
);
|
|
9352
|
+
}
|
|
9353
|
+
} catch (err) {
|
|
9354
|
+
console.warn(`[metrics] Failed to flush operations: ${err}`);
|
|
9248
9355
|
}
|
|
9249
|
-
return {
|
|
9250
|
-
sandboxOperationTotal,
|
|
9251
|
-
sandboxOperationErrorsTotal,
|
|
9252
|
-
sandboxOperationDuration
|
|
9253
|
-
};
|
|
9254
9356
|
}
|
|
9255
|
-
function
|
|
9256
|
-
if (!
|
|
9257
|
-
|
|
9258
|
-
runnerOperationTotal: runnerOperationTotal2,
|
|
9259
|
-
runnerOperationErrorsTotal: runnerOperationErrorsTotal2,
|
|
9260
|
-
runnerOperationDuration: runnerOperationDuration2
|
|
9261
|
-
} = getRunnerInstruments();
|
|
9262
|
-
const labels = {
|
|
9263
|
-
action_type: attrs.actionType,
|
|
9264
|
-
runner_label: getRunnerLabel()
|
|
9265
|
-
};
|
|
9266
|
-
runnerOperationTotal2.add(1, labels);
|
|
9267
|
-
if (!attrs.success) {
|
|
9268
|
-
runnerOperationErrorsTotal2.add(1, labels);
|
|
9357
|
+
function recordOperation(attrs) {
|
|
9358
|
+
if (!sandboxContext) {
|
|
9359
|
+
return;
|
|
9269
9360
|
}
|
|
9270
|
-
|
|
9271
|
-
|
|
9272
|
-
|
|
9273
|
-
|
|
9274
|
-
}
|
|
9275
|
-
|
|
9276
|
-
|
|
9277
|
-
const {
|
|
9278
|
-
sandboxOperationTotal: sandboxOperationTotal2,
|
|
9279
|
-
sandboxOperationErrorsTotal: sandboxOperationErrorsTotal2,
|
|
9280
|
-
sandboxOperationDuration: sandboxOperationDuration2
|
|
9281
|
-
} = getSandboxInstruments();
|
|
9282
|
-
const labels = {
|
|
9283
|
-
sandbox_type: "runner",
|
|
9284
|
-
action_type: attrs.actionType
|
|
9285
|
-
};
|
|
9286
|
-
sandboxOperationTotal2.add(1, labels);
|
|
9287
|
-
if (!attrs.success) {
|
|
9288
|
-
sandboxOperationErrorsTotal2.add(1, labels);
|
|
9361
|
+
const now = Date.now();
|
|
9362
|
+
if (oldestPendingTime && now - oldestPendingTime >= FLUSH_THRESHOLD_MS) {
|
|
9363
|
+
flushOps().catch(() => {
|
|
9364
|
+
});
|
|
9365
|
+
}
|
|
9366
|
+
if (oldestPendingTime === null) {
|
|
9367
|
+
oldestPendingTime = now;
|
|
9289
9368
|
}
|
|
9290
|
-
|
|
9291
|
-
|
|
9292
|
-
|
|
9369
|
+
pendingOps.push({
|
|
9370
|
+
ts: (/* @__PURE__ */ new Date()).toISOString(),
|
|
9371
|
+
action_type: attrs.actionType,
|
|
9372
|
+
duration_ms: attrs.durationMs,
|
|
9373
|
+
success: attrs.success
|
|
9293
9374
|
});
|
|
9294
9375
|
}
|
|
9295
9376
|
|
|
@@ -9303,7 +9384,7 @@ async function withRunnerTiming(actionType, fn) {
|
|
|
9303
9384
|
success = false;
|
|
9304
9385
|
throw error;
|
|
9305
9386
|
} finally {
|
|
9306
|
-
|
|
9387
|
+
recordOperation({
|
|
9307
9388
|
actionType,
|
|
9308
9389
|
durationMs: Date.now() - startTime,
|
|
9309
9390
|
success
|
|
@@ -9319,7 +9400,7 @@ async function withSandboxTiming(actionType, fn) {
|
|
|
9319
9400
|
success = false;
|
|
9320
9401
|
throw error;
|
|
9321
9402
|
} finally {
|
|
9322
|
-
|
|
9403
|
+
recordOperation({
|
|
9323
9404
|
actionType,
|
|
9324
9405
|
durationMs: Date.now() - startTime,
|
|
9325
9406
|
success
|
|
@@ -9327,6 +9408,45 @@ async function withSandboxTiming(actionType, fn) {
|
|
|
9327
9408
|
}
|
|
9328
9409
|
}
|
|
9329
9410
|
|
|
9411
|
+
// src/lib/vm-setup/vm-setup.ts
|
|
9412
|
+
var logger6 = createLogger("VMSetup");
|
|
9413
|
+
var VM_PROXY_CA_PATH = "/usr/local/share/ca-certificates/vm0-proxy-ca.crt";
|
|
9414
|
+
async function downloadStorages(guest, manifest) {
|
|
9415
|
+
const totalArchives = manifest.storages.filter((s) => s.archiveUrl).length + (manifest.artifact?.archiveUrl ? 1 : 0);
|
|
9416
|
+
if (totalArchives === 0) {
|
|
9417
|
+
logger6.log(`No archives to download`);
|
|
9418
|
+
return;
|
|
9419
|
+
}
|
|
9420
|
+
logger6.log(`Downloading ${totalArchives} archive(s)...`);
|
|
9421
|
+
const manifestJson = JSON.stringify(manifest);
|
|
9422
|
+
await guest.writeFile("/tmp/storage-manifest.json", manifestJson);
|
|
9423
|
+
const result = await guest.exec(
|
|
9424
|
+
`node ${SCRIPT_PATHS.download} /tmp/storage-manifest.json`
|
|
9425
|
+
);
|
|
9426
|
+
if (result.exitCode !== 0) {
|
|
9427
|
+
throw new Error(`Storage download failed: ${result.stderr}`);
|
|
9428
|
+
}
|
|
9429
|
+
logger6.log(`Storage download completed`);
|
|
9430
|
+
}
|
|
9431
|
+
async function restoreSessionHistory(guest, resumeSession, workingDir, cliAgentType) {
|
|
9432
|
+
const { sessionId, sessionHistory } = resumeSession;
|
|
9433
|
+
let sessionPath;
|
|
9434
|
+
if (cliAgentType === "codex") {
|
|
9435
|
+
logger6.log(`Codex resume session will be handled by checkpoint.py`);
|
|
9436
|
+
return;
|
|
9437
|
+
} else {
|
|
9438
|
+
const projectName = workingDir.replace(/^\//, "").replace(/\//g, "-");
|
|
9439
|
+
sessionPath = `/home/user/.claude/projects/-${projectName}/${sessionId}.jsonl`;
|
|
9440
|
+
}
|
|
9441
|
+
logger6.log(`Restoring session history to ${sessionPath}`);
|
|
9442
|
+
const dirPath = sessionPath.substring(0, sessionPath.lastIndexOf("/"));
|
|
9443
|
+
await guest.execOrThrow(`mkdir -p "${dirPath}"`);
|
|
9444
|
+
await guest.writeFile(sessionPath, sessionHistory);
|
|
9445
|
+
logger6.log(
|
|
9446
|
+
`Session history restored (${sessionHistory.split("\n").length} lines)`
|
|
9447
|
+
);
|
|
9448
|
+
}
|
|
9449
|
+
|
|
9330
9450
|
// src/lib/executor-env.ts
|
|
9331
9451
|
var ENV_JSON_PATH = "/tmp/vm0-env.json";
|
|
9332
9452
|
function buildEnvironmentVariables(context, apiUrl) {
|
|
@@ -9369,13 +9489,14 @@ function buildEnvironmentVariables(context, apiUrl) {
|
|
|
9369
9489
|
}
|
|
9370
9490
|
}
|
|
9371
9491
|
if (context.experimentalFirewall?.experimental_mitm) {
|
|
9372
|
-
envVars.NODE_EXTRA_CA_CERTS =
|
|
9492
|
+
envVars.NODE_EXTRA_CA_CERTS = VM_PROXY_CA_PATH;
|
|
9373
9493
|
}
|
|
9374
9494
|
return envVars;
|
|
9375
9495
|
}
|
|
9376
9496
|
|
|
9377
9497
|
// src/lib/network-logs/network-logs.ts
|
|
9378
9498
|
import fs7 from "fs";
|
|
9499
|
+
var logger7 = createLogger("NetworkLogs");
|
|
9379
9500
|
function getNetworkLogPath(runId) {
|
|
9380
9501
|
return `/tmp/vm0-network-${runId}.jsonl`;
|
|
9381
9502
|
}
|
|
@@ -9389,8 +9510,8 @@ function readNetworkLogs(runId) {
|
|
|
9389
9510
|
const lines = content.split("\n").filter((line) => line.trim());
|
|
9390
9511
|
return lines.map((line) => JSON.parse(line));
|
|
9391
9512
|
} catch (err) {
|
|
9392
|
-
|
|
9393
|
-
`
|
|
9513
|
+
logger7.error(
|
|
9514
|
+
`Failed to read network logs: ${err instanceof Error ? err.message : "Unknown error"}`
|
|
9394
9515
|
);
|
|
9395
9516
|
return [];
|
|
9396
9517
|
}
|
|
@@ -9402,19 +9523,19 @@ function cleanupNetworkLogs(runId) {
|
|
|
9402
9523
|
fs7.unlinkSync(logPath);
|
|
9403
9524
|
}
|
|
9404
9525
|
} catch (err) {
|
|
9405
|
-
|
|
9406
|
-
`
|
|
9526
|
+
logger7.error(
|
|
9527
|
+
`Failed to cleanup network logs: ${err instanceof Error ? err.message : "Unknown error"}`
|
|
9407
9528
|
);
|
|
9408
9529
|
}
|
|
9409
9530
|
}
|
|
9410
9531
|
async function uploadNetworkLogs(apiUrl, sandboxToken, runId) {
|
|
9411
9532
|
const networkLogs = readNetworkLogs(runId);
|
|
9412
9533
|
if (networkLogs.length === 0) {
|
|
9413
|
-
|
|
9534
|
+
logger7.log(`No network logs to upload for ${runId}`);
|
|
9414
9535
|
return;
|
|
9415
9536
|
}
|
|
9416
|
-
|
|
9417
|
-
`
|
|
9537
|
+
logger7.log(
|
|
9538
|
+
`Uploading ${networkLogs.length} network log entries for ${runId}`
|
|
9418
9539
|
);
|
|
9419
9540
|
const headers = {
|
|
9420
9541
|
Authorization: `Bearer ${sandboxToken}`,
|
|
@@ -9434,71 +9555,15 @@ async function uploadNetworkLogs(apiUrl, sandboxToken, runId) {
|
|
|
9434
9555
|
});
|
|
9435
9556
|
if (!response.ok) {
|
|
9436
9557
|
const errorText = await response.text();
|
|
9437
|
-
|
|
9558
|
+
logger7.error(`Failed to upload network logs: ${errorText}`);
|
|
9438
9559
|
return;
|
|
9439
9560
|
}
|
|
9440
|
-
|
|
9561
|
+
logger7.log(`Network logs uploaded successfully for ${runId}`);
|
|
9441
9562
|
cleanupNetworkLogs(runId);
|
|
9442
9563
|
}
|
|
9443
9564
|
|
|
9444
|
-
// src/lib/vm-setup/vm-setup.ts
|
|
9445
|
-
import fs8 from "fs";
|
|
9446
|
-
async function downloadStorages(guest, manifest) {
|
|
9447
|
-
const totalArchives = manifest.storages.filter((s) => s.archiveUrl).length + (manifest.artifact?.archiveUrl ? 1 : 0);
|
|
9448
|
-
if (totalArchives === 0) {
|
|
9449
|
-
console.log(`[Executor] No archives to download`);
|
|
9450
|
-
return;
|
|
9451
|
-
}
|
|
9452
|
-
console.log(`[Executor] Downloading ${totalArchives} archive(s)...`);
|
|
9453
|
-
const manifestJson = JSON.stringify(manifest);
|
|
9454
|
-
await guest.writeFile("/tmp/storage-manifest.json", manifestJson);
|
|
9455
|
-
const result = await guest.exec(
|
|
9456
|
-
`node ${SCRIPT_PATHS.download} /tmp/storage-manifest.json`
|
|
9457
|
-
);
|
|
9458
|
-
if (result.exitCode !== 0) {
|
|
9459
|
-
throw new Error(`Storage download failed: ${result.stderr}`);
|
|
9460
|
-
}
|
|
9461
|
-
console.log(`[Executor] Storage download completed`);
|
|
9462
|
-
}
|
|
9463
|
-
async function restoreSessionHistory(guest, resumeSession, workingDir, cliAgentType) {
|
|
9464
|
-
const { sessionId, sessionHistory } = resumeSession;
|
|
9465
|
-
let sessionPath;
|
|
9466
|
-
if (cliAgentType === "codex") {
|
|
9467
|
-
console.log(
|
|
9468
|
-
`[Executor] Codex resume session will be handled by checkpoint.py`
|
|
9469
|
-
);
|
|
9470
|
-
return;
|
|
9471
|
-
} else {
|
|
9472
|
-
const projectName = workingDir.replace(/^\//, "").replace(/\//g, "-");
|
|
9473
|
-
sessionPath = `/home/user/.claude/projects/-${projectName}/${sessionId}.jsonl`;
|
|
9474
|
-
}
|
|
9475
|
-
console.log(`[Executor] Restoring session history to ${sessionPath}`);
|
|
9476
|
-
const dirPath = sessionPath.substring(0, sessionPath.lastIndexOf("/"));
|
|
9477
|
-
await guest.execOrThrow(`mkdir -p "${dirPath}"`);
|
|
9478
|
-
await guest.writeFile(sessionPath, sessionHistory);
|
|
9479
|
-
console.log(
|
|
9480
|
-
`[Executor] Session history restored (${sessionHistory.split("\n").length} lines)`
|
|
9481
|
-
);
|
|
9482
|
-
}
|
|
9483
|
-
async function installProxyCA(guest, caCertPath) {
|
|
9484
|
-
if (!fs8.existsSync(caCertPath)) {
|
|
9485
|
-
throw new Error(
|
|
9486
|
-
`Proxy CA certificate not found at ${caCertPath}. Run generate-proxy-ca.sh first.`
|
|
9487
|
-
);
|
|
9488
|
-
}
|
|
9489
|
-
const caCert = fs8.readFileSync(caCertPath, "utf-8");
|
|
9490
|
-
console.log(
|
|
9491
|
-
`[Executor] Installing proxy CA certificate (${caCert.length} bytes)`
|
|
9492
|
-
);
|
|
9493
|
-
await guest.writeFileWithSudo(
|
|
9494
|
-
"/usr/local/share/ca-certificates/vm0-proxy-ca.crt",
|
|
9495
|
-
caCert
|
|
9496
|
-
);
|
|
9497
|
-
await guest.execOrThrow("sudo update-ca-certificates");
|
|
9498
|
-
console.log(`[Executor] Proxy CA certificate installed successfully`);
|
|
9499
|
-
}
|
|
9500
|
-
|
|
9501
9565
|
// src/lib/executor.ts
|
|
9566
|
+
var logger8 = createLogger("Executor");
|
|
9502
9567
|
function getVmIdFromRunId(runId) {
|
|
9503
9568
|
return runId.split("-")[0] || runId.substring(0, 8);
|
|
9504
9569
|
}
|
|
@@ -9544,20 +9609,31 @@ async function reportPreflightFailure(apiUrl, runId, sandboxToken, error, bypass
|
|
|
9544
9609
|
})
|
|
9545
9610
|
});
|
|
9546
9611
|
if (!response.ok) {
|
|
9547
|
-
|
|
9548
|
-
`
|
|
9612
|
+
logger8.error(
|
|
9613
|
+
`Failed to report preflight failure: HTTP ${response.status}`
|
|
9549
9614
|
);
|
|
9550
9615
|
}
|
|
9551
9616
|
} catch (err) {
|
|
9552
|
-
|
|
9617
|
+
logger8.error(`Failed to report preflight failure: ${err}`);
|
|
9553
9618
|
}
|
|
9554
9619
|
}
|
|
9555
9620
|
async function executeJob(context, config, options = {}) {
|
|
9621
|
+
setSandboxContext({
|
|
9622
|
+
apiUrl: config.server.url,
|
|
9623
|
+
runId: context.runId,
|
|
9624
|
+
sandboxToken: context.sandboxToken
|
|
9625
|
+
});
|
|
9626
|
+
if (context.apiStartTime) {
|
|
9627
|
+
recordOperation({
|
|
9628
|
+
actionType: "api_to_vm_start",
|
|
9629
|
+
durationMs: Date.now() - context.apiStartTime,
|
|
9630
|
+
success: true
|
|
9631
|
+
});
|
|
9632
|
+
}
|
|
9556
9633
|
const vmId = getVmIdFromRunId(context.runId);
|
|
9557
9634
|
let vm = null;
|
|
9558
9635
|
let guestIp = null;
|
|
9559
|
-
|
|
9560
|
-
log(`[Executor] Starting job ${context.runId} in VM ${vmId}`);
|
|
9636
|
+
logger8.log(`Starting job ${context.runId} in VM ${vmId}`);
|
|
9561
9637
|
try {
|
|
9562
9638
|
const workspacesDir = path4.join(process.cwd(), "workspaces");
|
|
9563
9639
|
const vmConfig = {
|
|
@@ -9567,46 +9643,44 @@ async function executeJob(context, config, options = {}) {
|
|
|
9567
9643
|
kernelPath: config.firecracker.kernel,
|
|
9568
9644
|
rootfsPath: config.firecracker.rootfs,
|
|
9569
9645
|
firecrackerBinary: config.firecracker.binary,
|
|
9570
|
-
workDir: path4.join(workspacesDir, `vm0-${vmId}`)
|
|
9571
|
-
logger: log
|
|
9646
|
+
workDir: path4.join(workspacesDir, `vm0-${vmId}`)
|
|
9572
9647
|
};
|
|
9573
|
-
log(`
|
|
9648
|
+
logger8.log(`Creating VM ${vmId}...`);
|
|
9574
9649
|
vm = new FirecrackerVM(vmConfig);
|
|
9575
9650
|
await withSandboxTiming("vm_create", () => vm.start());
|
|
9576
9651
|
guestIp = vm.getGuestIp();
|
|
9577
9652
|
if (!guestIp) {
|
|
9578
9653
|
throw new Error("VM started but no IP address available");
|
|
9579
9654
|
}
|
|
9580
|
-
log(`
|
|
9655
|
+
logger8.log(`VM ${vmId} started, guest IP: ${guestIp}`);
|
|
9581
9656
|
const vsockPath = vm.getVsockPath();
|
|
9582
9657
|
const guest = new VsockClient(vsockPath);
|
|
9583
|
-
log(`
|
|
9584
|
-
log(`
|
|
9658
|
+
logger8.log(`Using vsock for guest communication: ${vsockPath}`);
|
|
9659
|
+
logger8.log(`Waiting for guest connection...`);
|
|
9585
9660
|
await withSandboxTiming(
|
|
9586
9661
|
"guest_wait",
|
|
9587
9662
|
() => guest.waitForGuestConnection(3e4)
|
|
9588
9663
|
);
|
|
9589
|
-
log(`
|
|
9664
|
+
logger8.log(`Guest client ready`);
|
|
9590
9665
|
const firewallConfig = context.experimentalFirewall;
|
|
9591
9666
|
if (firewallConfig?.enabled) {
|
|
9592
9667
|
const mitmEnabled = firewallConfig.experimental_mitm ?? false;
|
|
9593
9668
|
const sealSecretsEnabled = firewallConfig.experimental_seal_secrets ?? false;
|
|
9594
|
-
log(
|
|
9595
|
-
`
|
|
9669
|
+
logger8.log(
|
|
9670
|
+
`Setting up network security for VM ${guestIp} (mitm=${mitmEnabled}, sealSecrets=${sealSecretsEnabled})`
|
|
9596
9671
|
);
|
|
9597
|
-
await
|
|
9598
|
-
|
|
9599
|
-
|
|
9600
|
-
|
|
9601
|
-
|
|
9602
|
-
|
|
9603
|
-
|
|
9604
|
-
|
|
9605
|
-
|
|
9606
|
-
|
|
9672
|
+
await withSandboxTiming("network_setup", async () => {
|
|
9673
|
+
getVMRegistry().register(
|
|
9674
|
+
guestIp,
|
|
9675
|
+
context.runId,
|
|
9676
|
+
context.sandboxToken,
|
|
9677
|
+
{
|
|
9678
|
+
firewallRules: firewallConfig?.rules,
|
|
9679
|
+
mitmEnabled,
|
|
9680
|
+
sealSecretsEnabled
|
|
9681
|
+
}
|
|
9607
9682
|
);
|
|
9608
|
-
|
|
9609
|
-
}
|
|
9683
|
+
});
|
|
9610
9684
|
}
|
|
9611
9685
|
if (context.storageManifest) {
|
|
9612
9686
|
await withSandboxTiming(
|
|
@@ -9627,22 +9701,25 @@ async function executeJob(context, config, options = {}) {
|
|
|
9627
9701
|
}
|
|
9628
9702
|
const envVars = buildEnvironmentVariables(context, config.server.url);
|
|
9629
9703
|
const envJson = JSON.stringify(envVars);
|
|
9630
|
-
log(
|
|
9631
|
-
`
|
|
9704
|
+
logger8.log(
|
|
9705
|
+
`Writing env JSON (${envJson.length} bytes) to ${ENV_JSON_PATH}`
|
|
9632
9706
|
);
|
|
9633
9707
|
await guest.writeFile(ENV_JSON_PATH, envJson);
|
|
9634
9708
|
if (!options.benchmarkMode) {
|
|
9635
|
-
log(`
|
|
9709
|
+
logger8.log(`Running preflight connectivity check...`);
|
|
9636
9710
|
const bypassSecret = process.env.VERCEL_AUTOMATION_BYPASS_SECRET;
|
|
9637
|
-
const preflight = await
|
|
9638
|
-
|
|
9639
|
-
|
|
9640
|
-
|
|
9641
|
-
|
|
9642
|
-
|
|
9711
|
+
const preflight = await withSandboxTiming(
|
|
9712
|
+
"preflight_check",
|
|
9713
|
+
() => runPreflightCheck(
|
|
9714
|
+
guest,
|
|
9715
|
+
config.server.url,
|
|
9716
|
+
context.runId,
|
|
9717
|
+
context.sandboxToken,
|
|
9718
|
+
bypassSecret
|
|
9719
|
+
)
|
|
9643
9720
|
);
|
|
9644
9721
|
if (!preflight.success) {
|
|
9645
|
-
log(`
|
|
9722
|
+
logger8.log(`Preflight check failed: ${preflight.error}`);
|
|
9646
9723
|
await reportPreflightFailure(
|
|
9647
9724
|
config.server.url,
|
|
9648
9725
|
context.runId,
|
|
@@ -9655,123 +9732,84 @@ async function executeJob(context, config, options = {}) {
|
|
|
9655
9732
|
error: preflight.error
|
|
9656
9733
|
};
|
|
9657
9734
|
}
|
|
9658
|
-
log(`
|
|
9735
|
+
logger8.log(`Preflight check passed`);
|
|
9659
9736
|
}
|
|
9660
9737
|
const systemLogFile = `/tmp/vm0-main-${context.runId}.log`;
|
|
9661
|
-
const exitCodeFile = `/tmp/vm0-exit-${context.runId}`;
|
|
9662
9738
|
const startTime = Date.now();
|
|
9739
|
+
const maxWaitMs = 2 * 60 * 60 * 1e3;
|
|
9740
|
+
let command;
|
|
9663
9741
|
if (options.benchmarkMode) {
|
|
9664
|
-
log(`
|
|
9665
|
-
|
|
9666
|
-
`nohup sh -c '${context.prompt}; echo $? > ${exitCodeFile}' > ${systemLogFile} 2>&1 &`
|
|
9667
|
-
);
|
|
9668
|
-
log(`[Executor] Command started in background`);
|
|
9742
|
+
logger8.log(`Running command directly (benchmark mode)...`);
|
|
9743
|
+
command = `${context.prompt} > ${systemLogFile} 2>&1`;
|
|
9669
9744
|
} else {
|
|
9670
|
-
log(`
|
|
9671
|
-
|
|
9672
|
-
`nohup sh -c 'node ${ENV_LOADER_PATH}; echo $? > ${exitCodeFile}' > ${systemLogFile} 2>&1 &`
|
|
9673
|
-
);
|
|
9674
|
-
log(`[Executor] Agent started in background`);
|
|
9745
|
+
logger8.log(`Running agent via env-loader...`);
|
|
9746
|
+
command = `node ${ENV_LOADER_PATH} > ${systemLogFile} 2>&1`;
|
|
9675
9747
|
}
|
|
9676
|
-
const
|
|
9677
|
-
|
|
9748
|
+
const { pid } = await guest.spawnAndWatch(command, maxWaitMs);
|
|
9749
|
+
logger8.log(`Process started with pid=${pid}`);
|
|
9678
9750
|
let exitCode = 1;
|
|
9679
|
-
let
|
|
9680
|
-
|
|
9681
|
-
|
|
9682
|
-
|
|
9683
|
-
|
|
9684
|
-
|
|
9685
|
-
|
|
9686
|
-
|
|
9687
|
-
|
|
9688
|
-
}
|
|
9689
|
-
if (!options.benchmarkMode) {
|
|
9690
|
-
const processCheck = await guest.exec(
|
|
9691
|
-
`pgrep -f "env-loader.mjs" > /dev/null 2>&1 && echo "RUNNING" || echo "DEAD"`
|
|
9692
|
-
);
|
|
9693
|
-
if (processCheck.stdout.trim() === "DEAD") {
|
|
9694
|
-
log(
|
|
9695
|
-
`[Executor] Agent process died unexpectedly without writing exit code`
|
|
9696
|
-
);
|
|
9697
|
-
const logContent = await guest.exec(
|
|
9698
|
-
`tail -50 ${systemLogFile} 2>/dev/null`
|
|
9699
|
-
);
|
|
9700
|
-
const dmesgCheck = await guest.exec(
|
|
9701
|
-
`dmesg | tail -20 | grep -iE "killed|oom" 2>/dev/null`
|
|
9702
|
-
);
|
|
9703
|
-
let errorMsg = "Agent process terminated unexpectedly";
|
|
9704
|
-
if (dmesgCheck.stdout.toLowerCase().includes("oom") || dmesgCheck.stdout.toLowerCase().includes("killed")) {
|
|
9705
|
-
errorMsg = "Agent process killed by OOM killer";
|
|
9706
|
-
log(`[Executor] OOM detected: ${dmesgCheck.stdout}`);
|
|
9707
|
-
}
|
|
9708
|
-
if (logContent.stdout) {
|
|
9709
|
-
log(
|
|
9710
|
-
`[Executor] Last log output: ${logContent.stdout.substring(0, 500)}`
|
|
9711
|
-
);
|
|
9712
|
-
}
|
|
9713
|
-
const durationMs2 = Date.now() - startTime;
|
|
9714
|
-
recordRunnerOperation({
|
|
9715
|
-
actionType: "agent_execute",
|
|
9716
|
-
durationMs: durationMs2,
|
|
9717
|
-
success: false
|
|
9718
|
-
});
|
|
9719
|
-
return {
|
|
9720
|
-
exitCode: 1,
|
|
9721
|
-
error: errorMsg
|
|
9722
|
-
};
|
|
9723
|
-
}
|
|
9724
|
-
}
|
|
9725
|
-
}
|
|
9726
|
-
const durationMs = Date.now() - startTime;
|
|
9727
|
-
const duration = Math.round(durationMs / 1e3);
|
|
9728
|
-
if (!completed) {
|
|
9729
|
-
log(`[Executor] Agent timed out after ${duration}s`);
|
|
9730
|
-
recordRunnerOperation({
|
|
9751
|
+
let exitEvent;
|
|
9752
|
+
try {
|
|
9753
|
+
exitEvent = await guest.waitForExit(pid, maxWaitMs + 5e3);
|
|
9754
|
+
exitCode = exitEvent.exitCode;
|
|
9755
|
+
} catch {
|
|
9756
|
+
const durationMs2 = Date.now() - startTime;
|
|
9757
|
+
const duration2 = Math.round(durationMs2 / 1e3);
|
|
9758
|
+
logger8.log(`Agent timed out after ${duration2}s`);
|
|
9759
|
+
recordOperation({
|
|
9731
9760
|
actionType: "agent_execute",
|
|
9732
|
-
durationMs,
|
|
9761
|
+
durationMs: durationMs2,
|
|
9733
9762
|
success: false
|
|
9734
9763
|
});
|
|
9735
9764
|
return {
|
|
9736
9765
|
exitCode: 1,
|
|
9737
|
-
error: `Agent execution timed out after ${
|
|
9766
|
+
error: `Agent execution timed out after ${duration2}s`
|
|
9738
9767
|
};
|
|
9739
9768
|
}
|
|
9740
|
-
|
|
9769
|
+
const durationMs = Date.now() - startTime;
|
|
9770
|
+
const duration = Math.round(durationMs / 1e3);
|
|
9771
|
+
if (exitCode === 137 || exitCode === 9) {
|
|
9772
|
+
const dmesgCheck = await guest.exec(
|
|
9773
|
+
`dmesg | tail -20 | grep -iE "killed|oom" 2>/dev/null`
|
|
9774
|
+
);
|
|
9775
|
+
if (dmesgCheck.stdout.toLowerCase().includes("oom") || dmesgCheck.stdout.toLowerCase().includes("killed")) {
|
|
9776
|
+
logger8.log(`OOM detected: ${dmesgCheck.stdout}`);
|
|
9777
|
+
recordOperation({
|
|
9778
|
+
actionType: "agent_execute",
|
|
9779
|
+
durationMs,
|
|
9780
|
+
success: false
|
|
9781
|
+
});
|
|
9782
|
+
return {
|
|
9783
|
+
exitCode: 1,
|
|
9784
|
+
error: "Agent process killed by OOM killer"
|
|
9785
|
+
};
|
|
9786
|
+
}
|
|
9787
|
+
}
|
|
9788
|
+
recordOperation({
|
|
9741
9789
|
actionType: "agent_execute",
|
|
9742
9790
|
durationMs,
|
|
9743
9791
|
success: exitCode === 0
|
|
9744
9792
|
});
|
|
9745
|
-
log(`
|
|
9746
|
-
|
|
9747
|
-
|
|
9748
|
-
|
|
9749
|
-
if (logResult.stdout) {
|
|
9750
|
-
log(
|
|
9751
|
-
`[Executor] Log output (${logResult.stdout.length} chars): ${logResult.stdout.substring(0, 500)}`
|
|
9793
|
+
logger8.log(`Agent finished in ${duration}s with exit code ${exitCode}`);
|
|
9794
|
+
if (exitEvent.stderr) {
|
|
9795
|
+
logger8.log(
|
|
9796
|
+
`Stderr (${exitEvent.stderr.length} chars): ${exitEvent.stderr.substring(0, 500)}`
|
|
9752
9797
|
);
|
|
9753
9798
|
}
|
|
9754
9799
|
return {
|
|
9755
9800
|
exitCode,
|
|
9756
|
-
error: exitCode !== 0 ?
|
|
9801
|
+
error: exitCode !== 0 ? exitEvent.stderr || void 0 : void 0
|
|
9757
9802
|
};
|
|
9758
9803
|
} catch (error) {
|
|
9759
9804
|
const errorMsg = error instanceof Error ? error.message : "Unknown error";
|
|
9760
|
-
|
|
9805
|
+
logger8.error(`Job ${context.runId} failed: ${errorMsg}`);
|
|
9761
9806
|
return {
|
|
9762
9807
|
exitCode: 1,
|
|
9763
9808
|
error: errorMsg
|
|
9764
9809
|
};
|
|
9765
9810
|
} finally {
|
|
9766
9811
|
if (context.experimentalFirewall?.enabled && guestIp) {
|
|
9767
|
-
log(`
|
|
9768
|
-
try {
|
|
9769
|
-
await removeVMProxyRules(guestIp, config.proxy.port, config.name);
|
|
9770
|
-
} catch (err) {
|
|
9771
|
-
console.error(
|
|
9772
|
-
`[Executor] Failed to remove VM proxy rules: ${err instanceof Error ? err.message : "Unknown error"}`
|
|
9773
|
-
);
|
|
9774
|
-
}
|
|
9812
|
+
logger8.log(`Cleaning up network security for VM ${guestIp}`);
|
|
9775
9813
|
getVMRegistry().unregister(guestIp);
|
|
9776
9814
|
if (!options.benchmarkMode) {
|
|
9777
9815
|
try {
|
|
@@ -9781,21 +9819,23 @@ async function executeJob(context, config, options = {}) {
|
|
|
9781
9819
|
context.runId
|
|
9782
9820
|
);
|
|
9783
9821
|
} catch (err) {
|
|
9784
|
-
|
|
9785
|
-
`
|
|
9822
|
+
logger8.error(
|
|
9823
|
+
`Failed to upload network logs: ${err instanceof Error ? err.message : "Unknown error"}`
|
|
9786
9824
|
);
|
|
9787
9825
|
}
|
|
9788
9826
|
}
|
|
9789
9827
|
}
|
|
9790
9828
|
if (vm) {
|
|
9791
|
-
log(`
|
|
9829
|
+
logger8.log(`Cleaning up VM ${vmId}...`);
|
|
9792
9830
|
await withSandboxTiming("cleanup", () => vm.kill());
|
|
9793
9831
|
}
|
|
9832
|
+
await clearSandboxContext();
|
|
9794
9833
|
}
|
|
9795
9834
|
}
|
|
9796
9835
|
|
|
9797
9836
|
// src/lib/runner/status.ts
|
|
9798
9837
|
import { writeFileSync as writeFileSync2 } from "fs";
|
|
9838
|
+
var logger9 = createLogger("Runner");
|
|
9799
9839
|
function writeStatusFile(statusFilePath, mode, activeRuns, startedAt) {
|
|
9800
9840
|
const status = {
|
|
9801
9841
|
mode,
|
|
@@ -9807,7 +9847,7 @@ function writeStatusFile(statusFilePath, mode, activeRuns, startedAt) {
|
|
|
9807
9847
|
try {
|
|
9808
9848
|
writeFileSync2(statusFilePath, JSON.stringify(status, null, 2));
|
|
9809
9849
|
} catch (err) {
|
|
9810
|
-
|
|
9850
|
+
logger9.error(
|
|
9811
9851
|
`Failed to write status file: ${err instanceof Error ? err.message : "Unknown error"}`
|
|
9812
9852
|
);
|
|
9813
9853
|
}
|
|
@@ -9824,37 +9864,26 @@ function createStatusUpdater(statusFilePath, state) {
|
|
|
9824
9864
|
}
|
|
9825
9865
|
|
|
9826
9866
|
// src/lib/runner/setup.ts
|
|
9867
|
+
var logger10 = createLogger("Runner");
|
|
9827
9868
|
async function setupEnvironment(options) {
|
|
9828
9869
|
const { config } = options;
|
|
9829
|
-
const datasetSuffix = process.env.AXIOM_DATASET_SUFFIX;
|
|
9830
|
-
if (!datasetSuffix) {
|
|
9831
|
-
throw new Error(
|
|
9832
|
-
"AXIOM_DATASET_SUFFIX is required. Set to 'dev' or 'prod'."
|
|
9833
|
-
);
|
|
9834
|
-
}
|
|
9835
|
-
initMetrics({
|
|
9836
|
-
serviceName: "vm0-runner",
|
|
9837
|
-
runnerLabel: config.name,
|
|
9838
|
-
axiomToken: process.env.AXIOM_TOKEN,
|
|
9839
|
-
environment: datasetSuffix
|
|
9840
|
-
});
|
|
9841
9870
|
const networkCheck = checkNetworkPrerequisites();
|
|
9842
9871
|
if (!networkCheck.ok) {
|
|
9843
|
-
|
|
9872
|
+
logger10.error("Network prerequisites not met:");
|
|
9844
9873
|
for (const error of networkCheck.errors) {
|
|
9845
|
-
|
|
9874
|
+
logger10.error(` - ${error}`);
|
|
9846
9875
|
}
|
|
9847
9876
|
process.exit(1);
|
|
9848
9877
|
}
|
|
9849
|
-
|
|
9878
|
+
logger10.log("Setting up network bridge...");
|
|
9850
9879
|
await setupBridge();
|
|
9851
|
-
|
|
9880
|
+
logger10.log("Flushing bridge ARP cache...");
|
|
9852
9881
|
await flushBridgeArpCache();
|
|
9853
|
-
|
|
9882
|
+
logger10.log("Cleaning up orphaned proxy rules...");
|
|
9854
9883
|
await cleanupOrphanedProxyRules(config.name);
|
|
9855
|
-
|
|
9884
|
+
logger10.log("Cleaning up orphaned IP allocations...");
|
|
9856
9885
|
await cleanupOrphanedAllocations();
|
|
9857
|
-
|
|
9886
|
+
logger10.log("Initializing network proxy...");
|
|
9858
9887
|
initVMRegistry();
|
|
9859
9888
|
const proxyManager = initProxyManager({
|
|
9860
9889
|
apiUrl: config.server.url,
|
|
@@ -9865,45 +9894,49 @@ async function setupEnvironment(options) {
|
|
|
9865
9894
|
try {
|
|
9866
9895
|
await proxyManager.start();
|
|
9867
9896
|
proxyEnabled = true;
|
|
9868
|
-
|
|
9897
|
+
logger10.log("Network proxy initialized successfully");
|
|
9898
|
+
logger10.log("Setting up CIDR proxy rules...");
|
|
9899
|
+
await setupCIDRProxyRules(config.proxy.port);
|
|
9869
9900
|
} catch (err) {
|
|
9870
|
-
|
|
9901
|
+
logger10.log(
|
|
9871
9902
|
`Network proxy not available: ${err instanceof Error ? err.message : "Unknown error"}`
|
|
9872
9903
|
);
|
|
9873
|
-
|
|
9904
|
+
logger10.log(
|
|
9874
9905
|
"Jobs with experimentalFirewall enabled will run without network interception"
|
|
9875
9906
|
);
|
|
9876
9907
|
}
|
|
9877
|
-
return { proxyEnabled };
|
|
9908
|
+
return { proxyEnabled, proxyPort: config.proxy.port };
|
|
9878
9909
|
}
|
|
9879
9910
|
async function cleanupEnvironment(resources) {
|
|
9880
9911
|
if (resources.proxyEnabled) {
|
|
9881
|
-
|
|
9912
|
+
logger10.log("Cleaning up CIDR proxy rules...");
|
|
9913
|
+
await cleanupCIDRProxyRules(resources.proxyPort);
|
|
9914
|
+
}
|
|
9915
|
+
if (resources.proxyEnabled) {
|
|
9916
|
+
logger10.log("Stopping network proxy...");
|
|
9882
9917
|
await getProxyManager().stop();
|
|
9883
9918
|
}
|
|
9884
|
-
console.log("Flushing metrics...");
|
|
9885
|
-
await flushMetrics();
|
|
9886
|
-
await shutdownMetrics();
|
|
9887
9919
|
}
|
|
9888
9920
|
|
|
9889
9921
|
// src/lib/runner/signals.ts
|
|
9922
|
+
var logger11 = createLogger("Runner");
|
|
9890
9923
|
function setupSignalHandlers(state, handlers) {
|
|
9891
9924
|
process.on("SIGINT", () => {
|
|
9892
|
-
|
|
9925
|
+
logger11.log("\nShutting down...");
|
|
9893
9926
|
handlers.onShutdown();
|
|
9894
9927
|
state.mode = "stopped";
|
|
9895
9928
|
handlers.updateStatus();
|
|
9896
9929
|
});
|
|
9897
9930
|
process.on("SIGTERM", () => {
|
|
9898
|
-
|
|
9931
|
+
logger11.log("\nShutting down...");
|
|
9899
9932
|
handlers.onShutdown();
|
|
9900
9933
|
state.mode = "stopped";
|
|
9901
9934
|
handlers.updateStatus();
|
|
9902
9935
|
});
|
|
9903
9936
|
process.on("SIGUSR1", () => {
|
|
9904
9937
|
if (state.mode === "running") {
|
|
9905
|
-
|
|
9906
|
-
|
|
9938
|
+
logger11.log("\n[Maintenance] Entering drain mode...");
|
|
9939
|
+
logger11.log(
|
|
9907
9940
|
`[Maintenance] Active jobs: ${state.activeRuns.size} (will wait for completion)`
|
|
9908
9941
|
);
|
|
9909
9942
|
state.mode = "draining";
|
|
@@ -9914,6 +9947,7 @@ function setupSignalHandlers(state, handlers) {
|
|
|
9914
9947
|
}
|
|
9915
9948
|
|
|
9916
9949
|
// src/lib/runner/runner.ts
|
|
9950
|
+
var logger12 = createLogger("Runner");
|
|
9917
9951
|
var Runner = class _Runner {
|
|
9918
9952
|
config;
|
|
9919
9953
|
statusFilePath;
|
|
@@ -9952,41 +9986,41 @@ var Runner = class _Runner {
|
|
|
9952
9986
|
onDrain: () => {
|
|
9953
9987
|
this.pendingJobs.length = 0;
|
|
9954
9988
|
if (this.state.activeRuns.size === 0) {
|
|
9955
|
-
|
|
9989
|
+
logger12.log("[Maintenance] No active jobs, exiting immediately");
|
|
9956
9990
|
this.resolveShutdown?.();
|
|
9957
9991
|
}
|
|
9958
9992
|
},
|
|
9959
9993
|
updateStatus: this.updateStatus
|
|
9960
9994
|
});
|
|
9961
|
-
|
|
9995
|
+
logger12.log(
|
|
9962
9996
|
`Starting runner '${this.config.name}' for group '${this.config.group}'...`
|
|
9963
9997
|
);
|
|
9964
|
-
|
|
9965
|
-
|
|
9966
|
-
|
|
9967
|
-
|
|
9998
|
+
logger12.log(`Max concurrent jobs: ${this.config.sandbox.max_concurrent}`);
|
|
9999
|
+
logger12.log(`Status file: ${this.statusFilePath}`);
|
|
10000
|
+
logger12.log("Press Ctrl+C to stop");
|
|
10001
|
+
logger12.log("");
|
|
9968
10002
|
this.updateStatus();
|
|
9969
|
-
|
|
10003
|
+
logger12.log("Checking for pending jobs...");
|
|
9970
10004
|
await this.pollFallback();
|
|
9971
|
-
|
|
10005
|
+
logger12.log("Connecting to realtime job notifications...");
|
|
9972
10006
|
this.subscription = await subscribeToJobs(
|
|
9973
10007
|
this.config.server,
|
|
9974
10008
|
this.config.group,
|
|
9975
10009
|
(notification) => {
|
|
9976
|
-
|
|
10010
|
+
logger12.log(`Ably notification: ${notification.runId}`);
|
|
9977
10011
|
this.processJob(notification.runId).catch(console.error);
|
|
9978
10012
|
},
|
|
9979
10013
|
(connectionState, reason) => {
|
|
9980
|
-
|
|
10014
|
+
logger12.log(
|
|
9981
10015
|
`Ably connection: ${connectionState}${reason ? ` (${reason})` : ""}`
|
|
9982
10016
|
);
|
|
9983
10017
|
}
|
|
9984
10018
|
);
|
|
9985
|
-
|
|
10019
|
+
logger12.log("Connected to realtime job notifications");
|
|
9986
10020
|
this.pollInterval = setInterval(() => {
|
|
9987
10021
|
this.pollFallback().catch(console.error);
|
|
9988
10022
|
}, this.config.sandbox.poll_interval_ms);
|
|
9989
|
-
|
|
10023
|
+
logger12.log(
|
|
9990
10024
|
`Polling fallback enabled (every ${this.config.sandbox.poll_interval_ms / 1e3}s)`
|
|
9991
10025
|
);
|
|
9992
10026
|
await shutdownPromise;
|
|
@@ -9997,7 +10031,7 @@ var Runner = class _Runner {
|
|
|
9997
10031
|
this.subscription.cleanup();
|
|
9998
10032
|
}
|
|
9999
10033
|
if (this.state.jobPromises.size > 0) {
|
|
10000
|
-
|
|
10034
|
+
logger12.log(
|
|
10001
10035
|
`Waiting for ${this.state.jobPromises.size} active job(s) to complete...`
|
|
10002
10036
|
);
|
|
10003
10037
|
await Promise.all(this.state.jobPromises);
|
|
@@ -10005,7 +10039,7 @@ var Runner = class _Runner {
|
|
|
10005
10039
|
await cleanupEnvironment(this.resources);
|
|
10006
10040
|
this.state.mode = "stopped";
|
|
10007
10041
|
this.updateStatus();
|
|
10008
|
-
|
|
10042
|
+
logger12.log("Runner stopped");
|
|
10009
10043
|
process.exit(0);
|
|
10010
10044
|
}
|
|
10011
10045
|
/**
|
|
@@ -10022,13 +10056,12 @@ var Runner = class _Runner {
|
|
|
10022
10056
|
() => pollForJob(this.config.server, this.config.group)
|
|
10023
10057
|
);
|
|
10024
10058
|
if (job) {
|
|
10025
|
-
|
|
10059
|
+
logger12.log(`Poll fallback found job: ${job.runId}`);
|
|
10026
10060
|
await this.processJob(job.runId);
|
|
10027
10061
|
}
|
|
10028
10062
|
} catch (error) {
|
|
10029
|
-
|
|
10030
|
-
`Poll fallback error
|
|
10031
|
-
error instanceof Error ? error.message : "Unknown error"
|
|
10063
|
+
logger12.error(
|
|
10064
|
+
`Poll fallback error: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
10032
10065
|
);
|
|
10033
10066
|
}
|
|
10034
10067
|
}
|
|
@@ -10037,7 +10070,7 @@ var Runner = class _Runner {
|
|
|
10037
10070
|
*/
|
|
10038
10071
|
async processJob(runId) {
|
|
10039
10072
|
if (this.state.mode !== "running") {
|
|
10040
|
-
|
|
10073
|
+
logger12.log(`Not running (${this.state.mode}), ignoring job ${runId}`);
|
|
10041
10074
|
return;
|
|
10042
10075
|
}
|
|
10043
10076
|
if (this.state.activeRuns.has(runId)) {
|
|
@@ -10045,10 +10078,10 @@ var Runner = class _Runner {
|
|
|
10045
10078
|
}
|
|
10046
10079
|
if (this.state.activeRuns.size >= this.config.sandbox.max_concurrent) {
|
|
10047
10080
|
if (!this.pendingJobs.includes(runId) && this.pendingJobs.length < _Runner.MAX_PENDING_QUEUE_SIZE) {
|
|
10048
|
-
|
|
10081
|
+
logger12.log(`At capacity, queueing job ${runId}`);
|
|
10049
10082
|
this.pendingJobs.push(runId);
|
|
10050
10083
|
} else if (this.pendingJobs.length >= _Runner.MAX_PENDING_QUEUE_SIZE) {
|
|
10051
|
-
|
|
10084
|
+
logger12.log(
|
|
10052
10085
|
`Pending queue full (${_Runner.MAX_PENDING_QUEUE_SIZE}), dropping job ${runId}`
|
|
10053
10086
|
);
|
|
10054
10087
|
}
|
|
@@ -10059,20 +10092,19 @@ var Runner = class _Runner {
|
|
|
10059
10092
|
"claim",
|
|
10060
10093
|
() => claimJob(this.config.server, runId)
|
|
10061
10094
|
);
|
|
10062
|
-
|
|
10095
|
+
logger12.log(`Claimed job: ${context.runId}`);
|
|
10063
10096
|
this.state.activeRuns.add(context.runId);
|
|
10064
10097
|
this.updateStatus();
|
|
10065
10098
|
const jobPromise = this.executeJob(context).catch((error) => {
|
|
10066
|
-
|
|
10067
|
-
`Job ${context.runId} failed
|
|
10068
|
-
error instanceof Error ? error.message : "Unknown error"
|
|
10099
|
+
logger12.error(
|
|
10100
|
+
`Job ${context.runId} failed: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
10069
10101
|
);
|
|
10070
10102
|
}).finally(() => {
|
|
10071
10103
|
this.state.activeRuns.delete(context.runId);
|
|
10072
10104
|
this.state.jobPromises.delete(jobPromise);
|
|
10073
10105
|
this.updateStatus();
|
|
10074
10106
|
if (this.state.mode === "draining" && this.state.activeRuns.size === 0) {
|
|
10075
|
-
|
|
10107
|
+
logger12.log("[Maintenance] All jobs completed, exiting");
|
|
10076
10108
|
this.resolveShutdown?.();
|
|
10077
10109
|
return;
|
|
10078
10110
|
}
|
|
@@ -10085,34 +10117,33 @@ var Runner = class _Runner {
|
|
|
10085
10117
|
});
|
|
10086
10118
|
this.state.jobPromises.add(jobPromise);
|
|
10087
10119
|
} catch (error) {
|
|
10088
|
-
|
|
10089
|
-
`Could not claim job ${runId}
|
|
10090
|
-
error instanceof Error ? error.message : "Unknown error"
|
|
10120
|
+
logger12.log(
|
|
10121
|
+
`Could not claim job ${runId}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
10091
10122
|
);
|
|
10092
10123
|
}
|
|
10093
10124
|
}
|
|
10094
10125
|
async executeJob(context) {
|
|
10095
|
-
|
|
10096
|
-
|
|
10097
|
-
|
|
10126
|
+
logger12.log(` Executing job ${context.runId}...`);
|
|
10127
|
+
logger12.log(` Prompt: ${context.prompt.substring(0, 100)}...`);
|
|
10128
|
+
logger12.log(` Compose version: ${context.agentComposeVersionId}`);
|
|
10098
10129
|
try {
|
|
10099
10130
|
const result = await executeJob(context, this.config);
|
|
10100
|
-
|
|
10131
|
+
logger12.log(
|
|
10101
10132
|
` Job ${context.runId} execution completed with exit code ${result.exitCode}`
|
|
10102
10133
|
);
|
|
10103
10134
|
if (result.exitCode !== 0 && result.error) {
|
|
10104
|
-
|
|
10135
|
+
logger12.error(` Job ${context.runId} failed: ${result.error}`);
|
|
10105
10136
|
}
|
|
10106
10137
|
} catch (err) {
|
|
10107
10138
|
const error = err instanceof Error ? err.message : "Unknown execution error";
|
|
10108
|
-
|
|
10139
|
+
logger12.error(` Job ${context.runId} execution failed: ${error}`);
|
|
10109
10140
|
const result = await completeJob(
|
|
10110
10141
|
this.config.server.url,
|
|
10111
10142
|
context,
|
|
10112
10143
|
1,
|
|
10113
10144
|
error
|
|
10114
10145
|
);
|
|
10115
|
-
|
|
10146
|
+
logger12.log(` Job ${context.runId} reported as ${result.status}`);
|
|
10116
10147
|
}
|
|
10117
10148
|
}
|
|
10118
10149
|
};
|
|
@@ -10677,13 +10708,19 @@ function createBenchmarkContext(prompt, options) {
|
|
|
10677
10708
|
environment: null,
|
|
10678
10709
|
resumeSession: null,
|
|
10679
10710
|
secretValues: null,
|
|
10680
|
-
cliAgentType: options.agentType
|
|
10711
|
+
cliAgentType: options.agentType,
|
|
10712
|
+
// Enable firewall and MITM by default for benchmark to test proxy flow
|
|
10713
|
+
experimentalFirewall: {
|
|
10714
|
+
enabled: true,
|
|
10715
|
+
experimental_mitm: true
|
|
10716
|
+
}
|
|
10681
10717
|
};
|
|
10682
10718
|
}
|
|
10683
10719
|
var benchmarkCommand = new Command4("benchmark").description(
|
|
10684
10720
|
"Run a VM performance benchmark (executes bash command directly)"
|
|
10685
10721
|
).argument("<prompt>", "The bash command to execute in the VM").option("--config <path>", "Config file path", "./runner.yaml").option("--working-dir <path>", "Working directory in VM", "/home/user").option("--agent-type <type>", "Agent type", "claude-code").action(async (prompt, options) => {
|
|
10686
10722
|
const timer = new Timer();
|
|
10723
|
+
setGlobalLogger(timer.log.bind(timer));
|
|
10687
10724
|
try {
|
|
10688
10725
|
timer.log("Loading configuration...");
|
|
10689
10726
|
const config = loadDebugConfig(options.config);
|
|
@@ -10702,8 +10739,7 @@ var benchmarkCommand = new Command4("benchmark").description(
|
|
|
10702
10739
|
timer.log(`Executing command: ${prompt}`);
|
|
10703
10740
|
const context = createBenchmarkContext(prompt, options);
|
|
10704
10741
|
const result = await executeJob(context, config, {
|
|
10705
|
-
benchmarkMode: true
|
|
10706
|
-
logger: timer.log.bind(timer)
|
|
10742
|
+
benchmarkMode: true
|
|
10707
10743
|
});
|
|
10708
10744
|
timer.log(`Exit code: ${result.exitCode}`);
|
|
10709
10745
|
if (result.error) {
|
|
@@ -10720,7 +10756,7 @@ var benchmarkCommand = new Command4("benchmark").description(
|
|
|
10720
10756
|
});
|
|
10721
10757
|
|
|
10722
10758
|
// src/index.ts
|
|
10723
|
-
var version = true ? "3.
|
|
10759
|
+
var version = true ? "3.6.1" : "0.1.0";
|
|
10724
10760
|
program.name("vm0-runner").version(version).description("Self-hosted runner for VM0 agents");
|
|
10725
10761
|
program.addCommand(startCommand);
|
|
10726
10762
|
program.addCommand(doctorCommand);
|