@vm0/runner 2.6.1 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +452 -59
- package/package.json +6 -1
package/index.js
CHANGED
|
@@ -37,6 +37,42 @@ var runnerConfigSchema = z.object({
|
|
|
37
37
|
port: z.number().int().min(1024).max(65535).default(8080)
|
|
38
38
|
}).default({})
|
|
39
39
|
});
|
|
40
|
+
var debugConfigSchema = z.object({
|
|
41
|
+
name: z.string().default("debug-runner"),
|
|
42
|
+
group: z.string().default("debug/local"),
|
|
43
|
+
server: z.object({
|
|
44
|
+
url: z.string().url().default("http://localhost:3000"),
|
|
45
|
+
token: z.string().default("debug-token")
|
|
46
|
+
}).default({}),
|
|
47
|
+
sandbox: z.object({
|
|
48
|
+
max_concurrent: z.number().int().min(1).default(1),
|
|
49
|
+
vcpu: z.number().int().min(1).default(2),
|
|
50
|
+
memory_mb: z.number().int().min(128).default(2048),
|
|
51
|
+
poll_interval_ms: z.number().int().min(1e3).default(5e3)
|
|
52
|
+
}).default({}),
|
|
53
|
+
firecracker: z.object({
|
|
54
|
+
binary: z.string().min(1, "Firecracker binary path is required"),
|
|
55
|
+
kernel: z.string().min(1, "Kernel path is required"),
|
|
56
|
+
rootfs: z.string().min(1, "Rootfs path is required")
|
|
57
|
+
}),
|
|
58
|
+
proxy: z.object({
|
|
59
|
+
port: z.number().int().min(1024).max(65535).default(8080)
|
|
60
|
+
}).default({})
|
|
61
|
+
});
|
|
62
|
+
function loadDebugConfig(configPath) {
|
|
63
|
+
if (!fs.existsSync(configPath)) {
|
|
64
|
+
throw new Error(`Config file not found: ${configPath}`);
|
|
65
|
+
}
|
|
66
|
+
const content = fs.readFileSync(configPath, "utf-8");
|
|
67
|
+
const raw = yaml.parse(content);
|
|
68
|
+
const result = debugConfigSchema.safeParse(raw);
|
|
69
|
+
if (!result.success) {
|
|
70
|
+
const errors = result.error.errors.map((e) => ` - ${e.path.join(".")}: ${e.message}`).join("\n");
|
|
71
|
+
throw new Error(`Invalid configuration:
|
|
72
|
+
${errors}`);
|
|
73
|
+
}
|
|
74
|
+
return result.data;
|
|
75
|
+
}
|
|
40
76
|
function loadConfig(configPath) {
|
|
41
77
|
if (!fs.existsSync(configPath)) {
|
|
42
78
|
throw new Error(`runner.yaml not found: ${configPath}`);
|
|
@@ -142,7 +178,7 @@ import path4 from "path";
|
|
|
142
178
|
import fs6 from "fs";
|
|
143
179
|
|
|
144
180
|
// src/lib/firecracker/vm.ts
|
|
145
|
-
import { spawn } from "child_process";
|
|
181
|
+
import { execSync as execSync2, spawn } from "child_process";
|
|
146
182
|
import fs2 from "fs";
|
|
147
183
|
import path from "path";
|
|
148
184
|
import readline from "readline";
|
|
@@ -545,13 +581,13 @@ var FirecrackerVM = class {
|
|
|
545
581
|
state = "created";
|
|
546
582
|
workDir;
|
|
547
583
|
socketPath;
|
|
548
|
-
|
|
549
|
-
// Per-VM
|
|
584
|
+
vmOverlayPath;
|
|
585
|
+
// Per-VM sparse overlay for writes
|
|
550
586
|
constructor(config) {
|
|
551
587
|
this.config = config;
|
|
552
588
|
this.workDir = config.workDir || `/tmp/vm0-vm-${config.vmId}`;
|
|
553
589
|
this.socketPath = path.join(this.workDir, "firecracker.sock");
|
|
554
|
-
this.
|
|
590
|
+
this.vmOverlayPath = path.join(this.workDir, "overlay.ext4");
|
|
555
591
|
}
|
|
556
592
|
/**
|
|
557
593
|
* Get current VM state
|
|
@@ -590,8 +626,12 @@ var FirecrackerVM = class {
|
|
|
590
626
|
if (fs2.existsSync(this.socketPath)) {
|
|
591
627
|
fs2.unlinkSync(this.socketPath);
|
|
592
628
|
}
|
|
593
|
-
console.log(`[VM ${this.config.vmId}]
|
|
594
|
-
|
|
629
|
+
console.log(`[VM ${this.config.vmId}] Creating sparse overlay file...`);
|
|
630
|
+
const overlaySize = 2 * 1024 * 1024 * 1024;
|
|
631
|
+
const fd = fs2.openSync(this.vmOverlayPath, "w");
|
|
632
|
+
fs2.ftruncateSync(fd, overlaySize);
|
|
633
|
+
fs2.closeSync(fd);
|
|
634
|
+
execSync2(`mkfs.ext4 -F -q "${this.vmOverlayPath}"`, { stdio: "ignore" });
|
|
595
635
|
console.log(`[VM ${this.config.vmId}] Setting up network...`);
|
|
596
636
|
this.networkConfig = await createTapDevice(this.config.vmId);
|
|
597
637
|
console.log(`[VM ${this.config.vmId}] Starting Firecracker...`);
|
|
@@ -668,19 +708,27 @@ var FirecrackerVM = class {
|
|
|
668
708
|
mem_size_mib: this.config.memoryMb
|
|
669
709
|
});
|
|
670
710
|
const networkBootArgs = generateNetworkBootArgs(this.networkConfig);
|
|
671
|
-
const bootArgs = `console=ttyS0 reboot=k panic=1 pci=off ${networkBootArgs}`;
|
|
711
|
+
const bootArgs = `console=ttyS0 reboot=k panic=1 pci=off init=/sbin/overlay-init ${networkBootArgs}`;
|
|
672
712
|
console.log(`[VM ${this.config.vmId}] Boot args: ${bootArgs}`);
|
|
673
713
|
await this.client.setBootSource({
|
|
674
714
|
kernel_image_path: this.config.kernelPath,
|
|
675
715
|
boot_args: bootArgs
|
|
676
716
|
});
|
|
677
|
-
console.log(
|
|
717
|
+
console.log(
|
|
718
|
+
`[VM ${this.config.vmId}] Base rootfs: ${this.config.rootfsPath}`
|
|
719
|
+
);
|
|
678
720
|
await this.client.setDrive({
|
|
679
721
|
drive_id: "rootfs",
|
|
680
|
-
path_on_host: this.
|
|
722
|
+
path_on_host: this.config.rootfsPath,
|
|
681
723
|
is_root_device: true,
|
|
724
|
+
is_read_only: true
|
|
725
|
+
});
|
|
726
|
+
console.log(`[VM ${this.config.vmId}] Overlay: ${this.vmOverlayPath}`);
|
|
727
|
+
await this.client.setDrive({
|
|
728
|
+
drive_id: "overlay",
|
|
729
|
+
path_on_host: this.vmOverlayPath,
|
|
730
|
+
is_root_device: false,
|
|
682
731
|
is_read_only: false
|
|
683
|
-
// Need write access for agent execution
|
|
684
732
|
});
|
|
685
733
|
console.log(
|
|
686
734
|
`[VM ${this.config.vmId}] Network: ${this.networkConfig.tapDevice}`
|
|
@@ -770,7 +818,7 @@ var FirecrackerVM = class {
|
|
|
770
818
|
};
|
|
771
819
|
|
|
772
820
|
// src/lib/firecracker/guest.ts
|
|
773
|
-
import { exec as exec2, execSync as
|
|
821
|
+
import { exec as exec2, execSync as execSync3 } from "child_process";
|
|
774
822
|
import { promisify as promisify2 } from "util";
|
|
775
823
|
import fs3 from "fs";
|
|
776
824
|
import path2 from "path";
|
|
@@ -9955,6 +10003,208 @@ function initProxyManager(config) {
|
|
|
9955
10003
|
return globalProxyManager;
|
|
9956
10004
|
}
|
|
9957
10005
|
|
|
10006
|
+
// src/lib/metrics/provider.ts
|
|
10007
|
+
import {
|
|
10008
|
+
MeterProvider,
|
|
10009
|
+
PeriodicExportingMetricReader
|
|
10010
|
+
} from "@opentelemetry/sdk-metrics";
|
|
10011
|
+
import { OTLPMetricExporter } from "@opentelemetry/exporter-metrics-otlp-proto";
|
|
10012
|
+
import { Resource } from "@opentelemetry/resources";
|
|
10013
|
+
import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
|
|
10014
|
+
import { metrics } from "@opentelemetry/api";
|
|
10015
|
+
var meterProvider = null;
|
|
10016
|
+
var initialized = false;
|
|
10017
|
+
var enabled = false;
|
|
10018
|
+
var _runnerLabel = "";
|
|
10019
|
+
function initMetrics(config) {
|
|
10020
|
+
if (initialized) return;
|
|
10021
|
+
initialized = true;
|
|
10022
|
+
_runnerLabel = config.runnerLabel;
|
|
10023
|
+
if (!config.axiomToken) {
|
|
10024
|
+
console.log("[metrics] AXIOM_TOKEN not configured, metrics disabled");
|
|
10025
|
+
return;
|
|
10026
|
+
}
|
|
10027
|
+
const env = config.environment ?? "dev";
|
|
10028
|
+
const exporter = new OTLPMetricExporter({
|
|
10029
|
+
url: "https://api.axiom.co/v1/metrics",
|
|
10030
|
+
headers: {
|
|
10031
|
+
Authorization: `Bearer ${config.axiomToken}`,
|
|
10032
|
+
"X-Axiom-Dataset": `runner-metrics-${env}`
|
|
10033
|
+
}
|
|
10034
|
+
});
|
|
10035
|
+
meterProvider = new MeterProvider({
|
|
10036
|
+
resource: new Resource({
|
|
10037
|
+
[ATTR_SERVICE_NAME]: config.serviceName,
|
|
10038
|
+
"deployment.environment": env,
|
|
10039
|
+
"runner.label": config.runnerLabel
|
|
10040
|
+
}),
|
|
10041
|
+
readers: [
|
|
10042
|
+
new PeriodicExportingMetricReader({
|
|
10043
|
+
exporter,
|
|
10044
|
+
exportIntervalMillis: config.exportIntervalMs ?? 3e4
|
|
10045
|
+
})
|
|
10046
|
+
]
|
|
10047
|
+
});
|
|
10048
|
+
metrics.setGlobalMeterProvider(meterProvider);
|
|
10049
|
+
enabled = true;
|
|
10050
|
+
console.log(
|
|
10051
|
+
`[metrics] initialized for ${config.serviceName} (${env}), runner: ${config.runnerLabel}`
|
|
10052
|
+
);
|
|
10053
|
+
}
|
|
10054
|
+
function isMetricsEnabled() {
|
|
10055
|
+
return enabled;
|
|
10056
|
+
}
|
|
10057
|
+
function getRunnerLabel() {
|
|
10058
|
+
return _runnerLabel;
|
|
10059
|
+
}
|
|
10060
|
+
function getMeter(name) {
|
|
10061
|
+
return metrics.getMeter(name);
|
|
10062
|
+
}
|
|
10063
|
+
async function flushMetrics() {
|
|
10064
|
+
if (meterProvider) {
|
|
10065
|
+
await meterProvider.forceFlush();
|
|
10066
|
+
}
|
|
10067
|
+
}
|
|
10068
|
+
async function shutdownMetrics() {
|
|
10069
|
+
if (meterProvider) {
|
|
10070
|
+
await meterProvider.shutdown();
|
|
10071
|
+
}
|
|
10072
|
+
}
|
|
10073
|
+
|
|
10074
|
+
// src/lib/metrics/instruments.ts
|
|
10075
|
+
var runnerOperationTotal = null;
|
|
10076
|
+
var runnerOperationErrorsTotal = null;
|
|
10077
|
+
var runnerOperationDuration = null;
|
|
10078
|
+
var sandboxOperationTotal = null;
|
|
10079
|
+
var sandboxOperationErrorsTotal = null;
|
|
10080
|
+
var sandboxOperationDuration = null;
|
|
10081
|
+
function getRunnerInstruments() {
|
|
10082
|
+
if (!runnerOperationTotal) {
|
|
10083
|
+
const meter = getMeter("vm0-runner");
|
|
10084
|
+
runnerOperationTotal = meter.createCounter("runner_operation_total", {
|
|
10085
|
+
description: "Total number of runner operations"
|
|
10086
|
+
});
|
|
10087
|
+
runnerOperationErrorsTotal = meter.createCounter(
|
|
10088
|
+
"runner_operation_errors_total",
|
|
10089
|
+
{
|
|
10090
|
+
description: "Total number of runner operation errors"
|
|
10091
|
+
}
|
|
10092
|
+
);
|
|
10093
|
+
runnerOperationDuration = meter.createHistogram(
|
|
10094
|
+
"runner_operation_duration_ms",
|
|
10095
|
+
{
|
|
10096
|
+
description: "Runner operation duration in milliseconds",
|
|
10097
|
+
unit: "ms"
|
|
10098
|
+
}
|
|
10099
|
+
);
|
|
10100
|
+
}
|
|
10101
|
+
return {
|
|
10102
|
+
runnerOperationTotal,
|
|
10103
|
+
runnerOperationErrorsTotal,
|
|
10104
|
+
runnerOperationDuration
|
|
10105
|
+
};
|
|
10106
|
+
}
|
|
10107
|
+
function getSandboxInstruments() {
|
|
10108
|
+
if (!sandboxOperationTotal) {
|
|
10109
|
+
const meter = getMeter("vm0-runner");
|
|
10110
|
+
sandboxOperationTotal = meter.createCounter("sandbox_operation_total", {
|
|
10111
|
+
description: "Total number of sandbox operations"
|
|
10112
|
+
});
|
|
10113
|
+
sandboxOperationErrorsTotal = meter.createCounter(
|
|
10114
|
+
"sandbox_operation_errors_total",
|
|
10115
|
+
{
|
|
10116
|
+
description: "Total number of sandbox operation errors"
|
|
10117
|
+
}
|
|
10118
|
+
);
|
|
10119
|
+
sandboxOperationDuration = meter.createHistogram(
|
|
10120
|
+
"sandbox_operation_duration_ms",
|
|
10121
|
+
{
|
|
10122
|
+
description: "Sandbox operation duration in milliseconds",
|
|
10123
|
+
unit: "ms"
|
|
10124
|
+
}
|
|
10125
|
+
);
|
|
10126
|
+
}
|
|
10127
|
+
return {
|
|
10128
|
+
sandboxOperationTotal,
|
|
10129
|
+
sandboxOperationErrorsTotal,
|
|
10130
|
+
sandboxOperationDuration
|
|
10131
|
+
};
|
|
10132
|
+
}
|
|
10133
|
+
function recordRunnerOperation(attrs) {
|
|
10134
|
+
if (!isMetricsEnabled()) return;
|
|
10135
|
+
const {
|
|
10136
|
+
runnerOperationTotal: runnerOperationTotal2,
|
|
10137
|
+
runnerOperationErrorsTotal: runnerOperationErrorsTotal2,
|
|
10138
|
+
runnerOperationDuration: runnerOperationDuration2
|
|
10139
|
+
} = getRunnerInstruments();
|
|
10140
|
+
const labels = {
|
|
10141
|
+
action_type: attrs.actionType,
|
|
10142
|
+
runner_label: getRunnerLabel()
|
|
10143
|
+
};
|
|
10144
|
+
runnerOperationTotal2.add(1, labels);
|
|
10145
|
+
if (!attrs.success) {
|
|
10146
|
+
runnerOperationErrorsTotal2.add(1, labels);
|
|
10147
|
+
}
|
|
10148
|
+
runnerOperationDuration2.record(attrs.durationMs, {
|
|
10149
|
+
...labels,
|
|
10150
|
+
success: String(attrs.success)
|
|
10151
|
+
});
|
|
10152
|
+
}
|
|
10153
|
+
function recordSandboxOperation(attrs) {
|
|
10154
|
+
if (!isMetricsEnabled()) return;
|
|
10155
|
+
const {
|
|
10156
|
+
sandboxOperationTotal: sandboxOperationTotal2,
|
|
10157
|
+
sandboxOperationErrorsTotal: sandboxOperationErrorsTotal2,
|
|
10158
|
+
sandboxOperationDuration: sandboxOperationDuration2
|
|
10159
|
+
} = getSandboxInstruments();
|
|
10160
|
+
const labels = {
|
|
10161
|
+
sandbox_type: "runner",
|
|
10162
|
+
action_type: attrs.actionType
|
|
10163
|
+
};
|
|
10164
|
+
sandboxOperationTotal2.add(1, labels);
|
|
10165
|
+
if (!attrs.success) {
|
|
10166
|
+
sandboxOperationErrorsTotal2.add(1, labels);
|
|
10167
|
+
}
|
|
10168
|
+
sandboxOperationDuration2.record(attrs.durationMs, {
|
|
10169
|
+
...labels,
|
|
10170
|
+
success: String(attrs.success)
|
|
10171
|
+
});
|
|
10172
|
+
}
|
|
10173
|
+
|
|
10174
|
+
// src/lib/metrics/timing.ts
|
|
10175
|
+
async function withRunnerTiming(actionType, fn) {
|
|
10176
|
+
const startTime = Date.now();
|
|
10177
|
+
let success = true;
|
|
10178
|
+
try {
|
|
10179
|
+
return await fn();
|
|
10180
|
+
} catch (error) {
|
|
10181
|
+
success = false;
|
|
10182
|
+
throw error;
|
|
10183
|
+
} finally {
|
|
10184
|
+
recordRunnerOperation({
|
|
10185
|
+
actionType,
|
|
10186
|
+
durationMs: Date.now() - startTime,
|
|
10187
|
+
success
|
|
10188
|
+
});
|
|
10189
|
+
}
|
|
10190
|
+
}
|
|
10191
|
+
async function withSandboxTiming(actionType, fn) {
|
|
10192
|
+
const startTime = Date.now();
|
|
10193
|
+
let success = true;
|
|
10194
|
+
try {
|
|
10195
|
+
return await fn();
|
|
10196
|
+
} catch (error) {
|
|
10197
|
+
success = false;
|
|
10198
|
+
throw error;
|
|
10199
|
+
} finally {
|
|
10200
|
+
recordSandboxOperation({
|
|
10201
|
+
actionType,
|
|
10202
|
+
durationMs: Date.now() - startTime,
|
|
10203
|
+
success
|
|
10204
|
+
});
|
|
10205
|
+
}
|
|
10206
|
+
}
|
|
10207
|
+
|
|
9958
10208
|
// src/lib/executor.ts
|
|
9959
10209
|
function getVmIdFromRunId(runId) {
|
|
9960
10210
|
return runId.split("-")[0] || runId.substring(0, 8);
|
|
@@ -10142,11 +10392,12 @@ nameserver 1.1.1.1`;
|
|
|
10142
10392
|
`sudo sh -c 'rm -f /etc/resolv.conf && echo "${dnsConfig}" > /etc/resolv.conf'`
|
|
10143
10393
|
);
|
|
10144
10394
|
}
|
|
10145
|
-
async function executeJob(context, config) {
|
|
10395
|
+
async function executeJob(context, config, options = {}) {
|
|
10146
10396
|
const vmId = getVmIdFromRunId(context.runId);
|
|
10147
10397
|
let vm = null;
|
|
10148
10398
|
let guestIp = null;
|
|
10149
|
-
|
|
10399
|
+
const log = options.logger ?? ((msg) => console.log(msg));
|
|
10400
|
+
log(`[Executor] Starting job ${context.runId} in VM ${vmId}`);
|
|
10150
10401
|
try {
|
|
10151
10402
|
const workspacesDir = path4.join(process.cwd(), "workspaces");
|
|
10152
10403
|
const vmConfig = {
|
|
@@ -10158,24 +10409,27 @@ async function executeJob(context, config) {
|
|
|
10158
10409
|
firecrackerBinary: config.firecracker.binary,
|
|
10159
10410
|
workDir: path4.join(workspacesDir, `vm0-${vmId}`)
|
|
10160
10411
|
};
|
|
10161
|
-
|
|
10412
|
+
log(`[Executor] Creating VM ${vmId}...`);
|
|
10162
10413
|
vm = new FirecrackerVM(vmConfig);
|
|
10163
|
-
await vm.start();
|
|
10414
|
+
await withSandboxTiming("vm_create", () => vm.start());
|
|
10164
10415
|
guestIp = vm.getGuestIp();
|
|
10165
10416
|
if (!guestIp) {
|
|
10166
10417
|
throw new Error("VM started but no IP address available");
|
|
10167
10418
|
}
|
|
10168
|
-
|
|
10419
|
+
log(`[Executor] VM ${vmId} started, guest IP: ${guestIp}`);
|
|
10169
10420
|
const privateKeyPath = getRunnerSSHKeyPath();
|
|
10170
10421
|
const ssh = createVMSSHClient(guestIp, "user", privateKeyPath || void 0);
|
|
10171
|
-
|
|
10172
|
-
await
|
|
10173
|
-
|
|
10422
|
+
log(`[Executor] Waiting for SSH on ${guestIp}...`);
|
|
10423
|
+
await withSandboxTiming(
|
|
10424
|
+
"ssh_wait",
|
|
10425
|
+
() => ssh.waitUntilReachable(12e4, 2e3)
|
|
10426
|
+
);
|
|
10427
|
+
log(`[Executor] SSH ready on ${guestIp}`);
|
|
10174
10428
|
const firewallConfig = context.experimentalFirewall;
|
|
10175
10429
|
if (firewallConfig?.enabled) {
|
|
10176
10430
|
const mitmEnabled = firewallConfig.experimental_mitm ?? false;
|
|
10177
10431
|
const sealSecretsEnabled = firewallConfig.experimental_seal_secrets ?? false;
|
|
10178
|
-
|
|
10432
|
+
log(
|
|
10179
10433
|
`[Executor] Setting up network security for VM ${guestIp} (mitm=${mitmEnabled}, sealSecrets=${sealSecretsEnabled})`
|
|
10180
10434
|
);
|
|
10181
10435
|
await setupVMProxyRules(guestIp, config.proxy.port);
|
|
@@ -10188,36 +10442,50 @@ async function executeJob(context, config) {
|
|
|
10188
10442
|
await installProxyCA(ssh);
|
|
10189
10443
|
}
|
|
10190
10444
|
}
|
|
10191
|
-
|
|
10445
|
+
log(`[Executor] Configuring DNS...`);
|
|
10192
10446
|
await configureDNS(ssh);
|
|
10193
|
-
|
|
10194
|
-
await uploadScripts(ssh);
|
|
10195
|
-
|
|
10447
|
+
log(`[Executor] Uploading scripts...`);
|
|
10448
|
+
await withSandboxTiming("script_upload", () => uploadScripts(ssh));
|
|
10449
|
+
log(`[Executor] Scripts uploaded to ${SCRIPT_PATHS.baseDir}`);
|
|
10196
10450
|
if (context.storageManifest) {
|
|
10197
|
-
await
|
|
10451
|
+
await withSandboxTiming(
|
|
10452
|
+
"storage_download",
|
|
10453
|
+
() => downloadStorages(ssh, context.storageManifest)
|
|
10454
|
+
);
|
|
10198
10455
|
}
|
|
10199
10456
|
if (context.resumeSession) {
|
|
10200
|
-
await
|
|
10201
|
-
|
|
10202
|
-
|
|
10203
|
-
|
|
10204
|
-
|
|
10457
|
+
await withSandboxTiming(
|
|
10458
|
+
"session_restore",
|
|
10459
|
+
() => restoreSessionHistory(
|
|
10460
|
+
ssh,
|
|
10461
|
+
context.resumeSession,
|
|
10462
|
+
context.workingDir,
|
|
10463
|
+
context.cliAgentType || "claude-code"
|
|
10464
|
+
)
|
|
10205
10465
|
);
|
|
10206
10466
|
}
|
|
10207
10467
|
const envVars = buildEnvironmentVariables(context, config.server.url);
|
|
10208
10468
|
const envJson = JSON.stringify(envVars);
|
|
10209
|
-
|
|
10469
|
+
log(
|
|
10210
10470
|
`[Executor] Writing env JSON (${envJson.length} bytes) to ${ENV_JSON_PATH}`
|
|
10211
10471
|
);
|
|
10212
10472
|
await ssh.writeFile(ENV_JSON_PATH, envJson);
|
|
10213
10473
|
const systemLogFile = `/tmp/vm0-main-${context.runId}.log`;
|
|
10214
10474
|
const exitCodeFile = `/tmp/vm0-exit-${context.runId}`;
|
|
10215
|
-
console.log(`[Executor] Running agent via env-loader (background)...`);
|
|
10216
10475
|
const startTime = Date.now();
|
|
10217
|
-
|
|
10218
|
-
`
|
|
10219
|
-
|
|
10220
|
-
|
|
10476
|
+
if (options.benchmarkMode) {
|
|
10477
|
+
log(`[Executor] Running command directly (benchmark mode)...`);
|
|
10478
|
+
await ssh.exec(
|
|
10479
|
+
`nohup sh -c '${context.prompt}; echo $? > ${exitCodeFile}' > ${systemLogFile} 2>&1 &`
|
|
10480
|
+
);
|
|
10481
|
+
log(`[Executor] Command started in background`);
|
|
10482
|
+
} else {
|
|
10483
|
+
log(`[Executor] Running agent via env-loader (background)...`);
|
|
10484
|
+
await ssh.exec(
|
|
10485
|
+
`nohup sh -c 'python3 -u ${ENV_LOADER_PATH}; echo $? > ${exitCodeFile}' > ${systemLogFile} 2>&1 &`
|
|
10486
|
+
);
|
|
10487
|
+
log(`[Executor] Agent started in background`);
|
|
10488
|
+
}
|
|
10221
10489
|
const pollIntervalMs = 2e3;
|
|
10222
10490
|
const maxWaitMs = 24 * 60 * 60 * 1e3;
|
|
10223
10491
|
let exitCode = 1;
|
|
@@ -10226,25 +10494,35 @@ async function executeJob(context, config) {
|
|
|
10226
10494
|
await new Promise((resolve) => setTimeout(resolve, pollIntervalMs));
|
|
10227
10495
|
const checkResult = await ssh.exec(`cat ${exitCodeFile} 2>/dev/null`);
|
|
10228
10496
|
if (checkResult.exitCode === 0 && checkResult.stdout.trim()) {
|
|
10229
|
-
|
|
10497
|
+
const parsed = parseInt(checkResult.stdout.trim(), 10);
|
|
10498
|
+
exitCode = Number.isNaN(parsed) ? 1 : parsed;
|
|
10230
10499
|
completed = true;
|
|
10231
10500
|
break;
|
|
10232
10501
|
}
|
|
10233
10502
|
}
|
|
10234
|
-
const
|
|
10503
|
+
const durationMs = Date.now() - startTime;
|
|
10504
|
+
const duration = Math.round(durationMs / 1e3);
|
|
10235
10505
|
if (!completed) {
|
|
10236
|
-
|
|
10506
|
+
log(`[Executor] Agent timed out after ${duration}s`);
|
|
10507
|
+
recordRunnerOperation({
|
|
10508
|
+
actionType: "agent_execute",
|
|
10509
|
+
durationMs,
|
|
10510
|
+
success: false
|
|
10511
|
+
});
|
|
10237
10512
|
return {
|
|
10238
10513
|
exitCode: 1,
|
|
10239
10514
|
error: `Agent execution timed out after ${duration}s`
|
|
10240
10515
|
};
|
|
10241
10516
|
}
|
|
10242
|
-
|
|
10243
|
-
|
|
10244
|
-
|
|
10517
|
+
recordRunnerOperation({
|
|
10518
|
+
actionType: "agent_execute",
|
|
10519
|
+
durationMs,
|
|
10520
|
+
success: exitCode === 0
|
|
10521
|
+
});
|
|
10522
|
+
log(`[Executor] Agent finished in ${duration}s with exit code ${exitCode}`);
|
|
10245
10523
|
const logResult = await ssh.exec(`tail -100 ${systemLogFile} 2>/dev/null`);
|
|
10246
10524
|
if (logResult.stdout) {
|
|
10247
|
-
|
|
10525
|
+
log(
|
|
10248
10526
|
`[Executor] Log output (${logResult.stdout.length} chars): ${logResult.stdout.substring(0, 500)}`
|
|
10249
10527
|
);
|
|
10250
10528
|
}
|
|
@@ -10261,7 +10539,7 @@ async function executeJob(context, config) {
|
|
|
10261
10539
|
};
|
|
10262
10540
|
} finally {
|
|
10263
10541
|
if (context.experimentalFirewall?.enabled && guestIp) {
|
|
10264
|
-
|
|
10542
|
+
log(`[Executor] Cleaning up network security for VM ${guestIp}`);
|
|
10265
10543
|
try {
|
|
10266
10544
|
await removeVMProxyRules(guestIp, config.proxy.port);
|
|
10267
10545
|
} catch (err) {
|
|
@@ -10270,21 +10548,23 @@ async function executeJob(context, config) {
|
|
|
10270
10548
|
);
|
|
10271
10549
|
}
|
|
10272
10550
|
getVMRegistry().unregister(guestIp);
|
|
10273
|
-
|
|
10274
|
-
|
|
10275
|
-
|
|
10276
|
-
|
|
10277
|
-
|
|
10278
|
-
|
|
10279
|
-
|
|
10280
|
-
|
|
10281
|
-
|
|
10282
|
-
|
|
10551
|
+
if (!options.benchmarkMode) {
|
|
10552
|
+
try {
|
|
10553
|
+
await uploadNetworkLogs(
|
|
10554
|
+
config.server.url,
|
|
10555
|
+
context.sandboxToken,
|
|
10556
|
+
context.runId
|
|
10557
|
+
);
|
|
10558
|
+
} catch (err) {
|
|
10559
|
+
console.error(
|
|
10560
|
+
`[Executor] Failed to upload network logs: ${err instanceof Error ? err.message : "Unknown error"}`
|
|
10561
|
+
);
|
|
10562
|
+
}
|
|
10283
10563
|
}
|
|
10284
10564
|
}
|
|
10285
10565
|
if (vm) {
|
|
10286
|
-
|
|
10287
|
-
await vm.kill();
|
|
10566
|
+
log(`[Executor] Cleaning up VM ${vmId}...`);
|
|
10567
|
+
await withSandboxTiming("cleanup", () => vm.kill());
|
|
10288
10568
|
}
|
|
10289
10569
|
}
|
|
10290
10570
|
}
|
|
@@ -10331,6 +10611,18 @@ var startCommand = new Command("start").description("Start the runner").option("
|
|
|
10331
10611
|
const config = loadConfig(options.config);
|
|
10332
10612
|
validateFirecrackerPaths(config.firecracker);
|
|
10333
10613
|
console.log("Config valid");
|
|
10614
|
+
const datasetSuffix = process.env.AXIOM_DATASET_SUFFIX;
|
|
10615
|
+
if (!datasetSuffix) {
|
|
10616
|
+
throw new Error(
|
|
10617
|
+
"AXIOM_DATASET_SUFFIX is required. Set to 'dev' or 'prod'."
|
|
10618
|
+
);
|
|
10619
|
+
}
|
|
10620
|
+
initMetrics({
|
|
10621
|
+
serviceName: "vm0-runner",
|
|
10622
|
+
runnerLabel: config.name,
|
|
10623
|
+
axiomToken: process.env.AXIOM_TOKEN,
|
|
10624
|
+
environment: datasetSuffix
|
|
10625
|
+
});
|
|
10334
10626
|
const networkCheck = checkNetworkPrerequisites();
|
|
10335
10627
|
if (!networkCheck.ok) {
|
|
10336
10628
|
console.error("Network prerequisites not met:");
|
|
@@ -10419,7 +10711,10 @@ var startCommand = new Command("start").description("Start the runner").option("
|
|
|
10419
10711
|
continue;
|
|
10420
10712
|
}
|
|
10421
10713
|
try {
|
|
10422
|
-
const job = await
|
|
10714
|
+
const job = await withRunnerTiming(
|
|
10715
|
+
"poll",
|
|
10716
|
+
() => pollForJob(config.server, config.group)
|
|
10717
|
+
);
|
|
10423
10718
|
if (!job) {
|
|
10424
10719
|
await new Promise(
|
|
10425
10720
|
(resolve) => setTimeout(resolve, config.sandbox.poll_interval_ms)
|
|
@@ -10428,7 +10723,10 @@ var startCommand = new Command("start").description("Start the runner").option("
|
|
|
10428
10723
|
}
|
|
10429
10724
|
console.log(`Found job: ${job.runId}`);
|
|
10430
10725
|
try {
|
|
10431
|
-
const context = await
|
|
10726
|
+
const context = await withRunnerTiming(
|
|
10727
|
+
"claim",
|
|
10728
|
+
() => claimJob(config.server, job.runId)
|
|
10729
|
+
);
|
|
10432
10730
|
console.log(`Claimed job: ${context.runId}`);
|
|
10433
10731
|
activeJobs.add(context.runId);
|
|
10434
10732
|
updateStatus();
|
|
@@ -10467,6 +10765,9 @@ var startCommand = new Command("start").description("Start the runner").option("
|
|
|
10467
10765
|
console.log("Stopping network proxy...");
|
|
10468
10766
|
await getProxyManager().stop();
|
|
10469
10767
|
}
|
|
10768
|
+
console.log("Flushing metrics...");
|
|
10769
|
+
await flushMetrics();
|
|
10770
|
+
await shutdownMetrics();
|
|
10470
10771
|
state.mode = "stopped";
|
|
10471
10772
|
updateStatus();
|
|
10472
10773
|
console.log("Runner stopped");
|
|
@@ -10505,10 +10806,102 @@ var statusCommand = new Command2("status").description("Check runner connectivit
|
|
|
10505
10806
|
}
|
|
10506
10807
|
});
|
|
10507
10808
|
|
|
10809
|
+
// src/commands/benchmark.ts
|
|
10810
|
+
import { Command as Command3 } from "commander";
|
|
10811
|
+
import crypto from "crypto";
|
|
10812
|
+
|
|
10813
|
+
// src/lib/timing.ts
|
|
10814
|
+
var Timer = class {
|
|
10815
|
+
startTime;
|
|
10816
|
+
constructor() {
|
|
10817
|
+
this.startTime = Date.now();
|
|
10818
|
+
}
|
|
10819
|
+
/**
|
|
10820
|
+
* Get elapsed time formatted as [MM:SS.s]
|
|
10821
|
+
*/
|
|
10822
|
+
elapsed() {
|
|
10823
|
+
const ms = Date.now() - this.startTime;
|
|
10824
|
+
const totalSeconds = ms / 1e3;
|
|
10825
|
+
const minutes = Math.floor(totalSeconds / 60);
|
|
10826
|
+
const seconds = (totalSeconds % 60).toFixed(1);
|
|
10827
|
+
return `[${String(minutes).padStart(2, "0")}:${seconds.padStart(4, "0")}]`;
|
|
10828
|
+
}
|
|
10829
|
+
/**
|
|
10830
|
+
* Log message with timestamp
|
|
10831
|
+
*/
|
|
10832
|
+
log(message) {
|
|
10833
|
+
console.log(`${this.elapsed()} ${message}`);
|
|
10834
|
+
}
|
|
10835
|
+
/**
|
|
10836
|
+
* Get total elapsed time in seconds
|
|
10837
|
+
*/
|
|
10838
|
+
totalSeconds() {
|
|
10839
|
+
return (Date.now() - this.startTime) / 1e3;
|
|
10840
|
+
}
|
|
10841
|
+
};
|
|
10842
|
+
|
|
10843
|
+
// src/commands/benchmark.ts
|
|
10844
|
+
function createBenchmarkContext(prompt, options) {
|
|
10845
|
+
return {
|
|
10846
|
+
runId: crypto.randomUUID(),
|
|
10847
|
+
prompt,
|
|
10848
|
+
agentComposeVersionId: "benchmark-local",
|
|
10849
|
+
vars: null,
|
|
10850
|
+
secretNames: null,
|
|
10851
|
+
checkpointId: null,
|
|
10852
|
+
sandboxToken: "benchmark-token-not-used",
|
|
10853
|
+
workingDir: options.workingDir,
|
|
10854
|
+
storageManifest: null,
|
|
10855
|
+
environment: null,
|
|
10856
|
+
resumeSession: null,
|
|
10857
|
+
secretValues: null,
|
|
10858
|
+
cliAgentType: options.agentType
|
|
10859
|
+
};
|
|
10860
|
+
}
|
|
10861
|
+
var benchmarkCommand = new Command3("benchmark").description(
|
|
10862
|
+
"Run a VM performance benchmark (executes bash command directly)"
|
|
10863
|
+
).argument("<prompt>", "The bash command to execute in the VM").option("--config <path>", "Config file path", "./runner.yaml").option("--working-dir <path>", "Working directory in VM", "/home/user").option("--agent-type <type>", "Agent type", "claude-code").action(async (prompt, options) => {
|
|
10864
|
+
const timer = new Timer();
|
|
10865
|
+
try {
|
|
10866
|
+
timer.log("Loading configuration...");
|
|
10867
|
+
const config = loadDebugConfig(options.config);
|
|
10868
|
+
validateFirecrackerPaths(config.firecracker);
|
|
10869
|
+
timer.log("Checking network prerequisites...");
|
|
10870
|
+
const networkCheck = checkNetworkPrerequisites();
|
|
10871
|
+
if (!networkCheck.ok) {
|
|
10872
|
+
console.error("Network prerequisites not met:");
|
|
10873
|
+
for (const error of networkCheck.errors) {
|
|
10874
|
+
console.error(` - ${error}`);
|
|
10875
|
+
}
|
|
10876
|
+
process.exit(1);
|
|
10877
|
+
}
|
|
10878
|
+
timer.log("Setting up network bridge...");
|
|
10879
|
+
await setupBridge();
|
|
10880
|
+
timer.log(`Executing command: ${prompt}`);
|
|
10881
|
+
const context = createBenchmarkContext(prompt, options);
|
|
10882
|
+
const result = await executeJob(context, config, {
|
|
10883
|
+
benchmarkMode: true,
|
|
10884
|
+
logger: timer.log.bind(timer)
|
|
10885
|
+
});
|
|
10886
|
+
timer.log(`Exit code: ${result.exitCode}`);
|
|
10887
|
+
if (result.error) {
|
|
10888
|
+
timer.log(`Error: ${result.error}`);
|
|
10889
|
+
}
|
|
10890
|
+
timer.log(`Total time: ${timer.totalSeconds().toFixed(1)}s`);
|
|
10891
|
+
process.exit(result.exitCode);
|
|
10892
|
+
} catch (error) {
|
|
10893
|
+
timer.log(
|
|
10894
|
+
`Error: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
10895
|
+
);
|
|
10896
|
+
process.exit(1);
|
|
10897
|
+
}
|
|
10898
|
+
});
|
|
10899
|
+
|
|
10508
10900
|
// src/index.ts
|
|
10509
|
-
var version = true ? "2.
|
|
10901
|
+
var version = true ? "2.7.0" : "0.1.0";
|
|
10510
10902
|
program.name("vm0-runner").version(version).description("Self-hosted runner for VM0 agents");
|
|
10511
10903
|
program.addCommand(startCommand);
|
|
10512
10904
|
program.addCommand(statusCommand);
|
|
10905
|
+
program.addCommand(benchmarkCommand);
|
|
10513
10906
|
program.parse();
|
|
10514
10907
|
//# sourceMappingURL=index.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vm0/runner",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.7.0",
|
|
4
4
|
"description": "Self-hosted runner for VM0 agents",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -15,6 +15,11 @@
|
|
|
15
15
|
"."
|
|
16
16
|
],
|
|
17
17
|
"dependencies": {
|
|
18
|
+
"@opentelemetry/api": "^1.9.0",
|
|
19
|
+
"@opentelemetry/exporter-metrics-otlp-proto": "^0.52.0",
|
|
20
|
+
"@opentelemetry/resources": "^1.25.0",
|
|
21
|
+
"@opentelemetry/sdk-metrics": "^1.25.0",
|
|
22
|
+
"@opentelemetry/semantic-conventions": "^1.25.0",
|
|
18
23
|
"commander": "^14.0.0",
|
|
19
24
|
"yaml": "^2.3.4",
|
|
20
25
|
"zod": "^3.25.64"
|