@vm0/runner 3.12.0 → 3.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +169 -573
  2. package/package.json +1 -1
package/index.js CHANGED
@@ -48,7 +48,9 @@ var runnerPaths = {
48
48
  /** Check if a directory name is a VM workspace */
49
49
  isVmWorkspace: (dirname) => dirname.startsWith(VM_WORKSPACE_PREFIX),
50
50
  /** Extract vmId from workspace directory name */
51
- extractVmId: (dirname) => createVmId(dirname.replace(VM_WORKSPACE_PREFIX, ""))
51
+ extractVmId: (dirname) => createVmId(dirname.replace(VM_WORKSPACE_PREFIX, "")),
52
+ /** VM registry file for proxy IP → run mapping */
53
+ vmRegistry: (baseDir) => path.join(baseDir, "vm-registry.json")
52
54
  };
53
55
  var vmPaths = {
54
56
  /** Firecracker config file (used with --config-file) */
@@ -73,8 +75,6 @@ var snapshotOutputPaths = {
73
75
  var tempPaths = {
74
76
  /** Default proxy CA directory */
75
77
  proxyDir: `${VM0_TMP_PREFIX}-proxy`,
76
- /** VM registry for proxy */
77
- vmRegistry: `${VM0_TMP_PREFIX}-vm-registry.json`,
78
78
  /** Network log file for a run */
79
79
  networkLog: (runId) => `${VM0_TMP_PREFIX}-network-${runId}.jsonl`
80
80
  };
@@ -416,6 +416,37 @@ async function withFileLock(path9, fn, options) {
416
416
  }
417
417
  }
418
418
 
419
+ // src/lib/utils/process.ts
420
+ import { execSync } from "child_process";
421
+ function isProcessRunning(pid) {
422
+ try {
423
+ process.kill(pid, 0);
424
+ return true;
425
+ } catch (err) {
426
+ if (err instanceof Error && "code" in err && err.code === "EPERM") {
427
+ return true;
428
+ }
429
+ return false;
430
+ }
431
+ }
432
+ function killProcessTree(pid) {
433
+ try {
434
+ const childPidsStr = execSync(`pgrep -P ${pid} 2>/dev/null || true`, {
435
+ encoding: "utf-8"
436
+ }).trim();
437
+ if (childPidsStr) {
438
+ const childPids = childPidsStr.split("\n").map((p) => parseInt(p, 10));
439
+ for (const childPid of childPids) {
440
+ if (!isNaN(childPid)) {
441
+ killProcessTree(childPid);
442
+ }
443
+ }
444
+ }
445
+ process.kill(pid, "SIGKILL");
446
+ } catch {
447
+ }
448
+ }
449
+
419
450
  // src/lib/utils/exec.ts
420
451
  import { exec } from "child_process";
421
452
  import { promisify } from "util";
@@ -549,14 +580,6 @@ function makeNsName(runnerIdx, nsIdx) {
549
580
  function makeVethName(runnerIdx, nsIdx) {
550
581
  return `${VETH_PREFIX}${runnerIdx}-${nsIdx}`;
551
582
  }
552
- function isPidAlive(pid) {
553
- try {
554
- process.kill(pid, 0);
555
- return true;
556
- } catch {
557
- return false;
558
- }
559
- }
560
583
  async function deleteIptablesRulesByComment(comment) {
561
584
  const deleteFromTable = async (table) => {
562
585
  try {
@@ -625,7 +648,7 @@ var NetnsPool = class _NetnsPool {
625
648
  const data = read();
626
649
  const orphaned = [];
627
650
  for (const [runnerIdx, runner] of Object.entries(data.runners)) {
628
- if (!isPidAlive(runner.pid)) {
651
+ if (!isProcessRunning(runner.pid)) {
629
652
  orphaned.push({
630
653
  runnerIdx,
631
654
  namespaces: Object.entries(runner.namespaces).map(
@@ -662,7 +685,7 @@ var NetnsPool = class _NetnsPool {
662
685
  const data = read();
663
686
  for (const { runnerIdx } of orphanedData) {
664
687
  const runner = data.runners[runnerIdx];
665
- if (runner && !isPidAlive(runner.pid)) {
688
+ if (runner && !isProcessRunning(runner.pid)) {
666
689
  delete data.runners[runnerIdx];
667
690
  }
668
691
  }
@@ -1671,8 +1694,8 @@ var FirecrackerVM = class {
1671
1694
  * since we want to clean up as much as possible even if some parts fail.
1672
1695
  */
1673
1696
  async cleanup() {
1674
- if (this.process && !this.process.killed) {
1675
- this.process.kill("SIGKILL");
1697
+ if (this.process && !this.process.killed && this.process.pid) {
1698
+ killProcessTree(this.process.pid);
1676
1699
  this.process = null;
1677
1700
  }
1678
1701
  if (this.netns) {
@@ -7884,6 +7907,7 @@ var modelProviderTypeSchema = z19.enum([
7884
7907
  "minimax-api-key",
7885
7908
  "deepseek-api-key",
7886
7909
  "zai-api-key",
7910
+ "azure-foundry",
7887
7911
  "aws-bedrock"
7888
7912
  ]);
7889
7913
  var modelProviderFrameworkSchema = z19.enum(["claude-code", "codex"]);
@@ -9171,11 +9195,10 @@ var ENV_LOADER_PATH = "/usr/local/bin/vm0-agent/env-loader.mjs";
9171
9195
  // src/lib/proxy/vm-registry.ts
9172
9196
  import fs6 from "fs";
9173
9197
  var logger5 = createLogger("VMRegistry");
9174
- var DEFAULT_REGISTRY_PATH = tempPaths.vmRegistry;
9175
9198
  var VMRegistry = class {
9176
9199
  registryPath;
9177
9200
  data;
9178
- constructor(registryPath = DEFAULT_REGISTRY_PATH) {
9201
+ constructor(registryPath) {
9179
9202
  this.registryPath = registryPath;
9180
9203
  this.data = this.load();
9181
9204
  }
@@ -9262,7 +9285,9 @@ var VMRegistry = class {
9262
9285
  var globalRegistry = null;
9263
9286
  function getVMRegistry() {
9264
9287
  if (!globalRegistry) {
9265
- globalRegistry = new VMRegistry();
9288
+ throw new Error(
9289
+ "VMRegistry not initialized. Call initVMRegistry(registryPath) first."
9290
+ );
9266
9291
  }
9267
9292
  return globalRegistry;
9268
9293
  }
@@ -9275,502 +9300,16 @@ function initVMRegistry(registryPath) {
9275
9300
  import { spawn as spawn2 } from "child_process";
9276
9301
  import fs7 from "fs";
9277
9302
  import path5 from "path";
9278
-
9279
- // src/lib/proxy/mitm-addon-script.ts
9280
- var RUNNER_MITM_ADDON_SCRIPT = `#!/usr/bin/env python3
9281
- """
9282
- mitmproxy addon for VM0 runner-level network security mode.
9283
-
9284
- This addon runs on the runner HOST (not inside VMs) and:
9285
- 1. Intercepts all HTTPS requests from VMs
9286
- 2. Looks up the source VM's runId and firewall rules from the VM registry
9287
- 3. Evaluates firewall rules (first-match-wins) to ALLOW or DENY
9288
- 4. For MITM mode: Rewrites requests to go through VM0 Proxy endpoint
9289
- 5. For SNI-only mode: Passes through or blocks without decryption
9290
- 6. Logs network activity per-run to JSONL files
9291
- """
9292
- import os
9293
- import json
9294
- import time
9295
- import urllib.parse
9296
- import ipaddress
9297
- import socket
9298
- from mitmproxy import http, ctx, tls
9299
-
9300
-
9301
- # VM0 Proxy configuration from environment
9302
- API_URL = os.environ.get("VM0_API_URL", "https://www.vm0.ai")
9303
- REGISTRY_PATH = os.environ.get("VM0_REGISTRY_PATH", "/tmp/vm0-vm-registry.json")
9304
- VERCEL_BYPASS = os.environ.get("VERCEL_AUTOMATION_BYPASS_SECRET", "")
9305
-
9306
- # Construct proxy URL
9307
- PROXY_URL = f"{API_URL}/api/webhooks/agent/proxy"
9308
-
9309
- # Cache for VM registry (reloaded periodically)
9310
- _registry_cache = {}
9311
- _registry_cache_time = 0
9312
- REGISTRY_CACHE_TTL = 2 # seconds
9313
-
9314
- # Track request start times for latency calculation
9315
- request_start_times = {}
9316
-
9317
-
9318
- def load_registry() -> dict:
9319
- """Load the VM registry from file, with caching."""
9320
- global _registry_cache, _registry_cache_time
9321
-
9322
- now = time.time()
9323
- if now - _registry_cache_time < REGISTRY_CACHE_TTL:
9324
- return _registry_cache
9325
-
9326
- try:
9327
- if os.path.exists(REGISTRY_PATH):
9328
- with open(REGISTRY_PATH, "r") as f:
9329
- data = json.load(f)
9330
- _registry_cache = data.get("vms", {})
9331
- _registry_cache_time = now
9332
- return _registry_cache
9333
- except Exception as e:
9334
- ctx.log.warn(f"Failed to load VM registry: {e}")
9335
-
9336
- return _registry_cache
9337
-
9338
-
9339
- def get_vm_info(client_ip: str) -> dict | None:
9340
- """Look up VM info by client IP address."""
9341
- registry = load_registry()
9342
- return registry.get(client_ip)
9343
-
9344
-
9345
- def get_network_log_path(run_id: str) -> str:
9346
- """Get the network log file path for a run."""
9347
- return f"/tmp/vm0-network-{run_id}.jsonl"
9348
-
9349
-
9350
- def log_network_entry(run_id: str, entry: dict) -> None:
9351
- """Write a network log entry to the per-run JSONL file."""
9352
- if not run_id:
9353
- return
9354
-
9355
- log_path = get_network_log_path(run_id)
9356
- try:
9357
- fd = os.open(log_path, os.O_CREAT | os.O_APPEND | os.O_WRONLY, 0o644)
9358
- try:
9359
- os.write(fd, (json.dumps(entry) + "\\n").encode())
9360
- finally:
9361
- os.close(fd)
9362
- except Exception as e:
9363
- ctx.log.warn(f"Failed to write network log: {e}")
9364
-
9365
-
9366
- def get_original_url(flow: http.HTTPFlow) -> str:
9367
- """Reconstruct the original target URL from the request."""
9368
- scheme = "https" if flow.request.port == 443 else "http"
9369
- host = flow.request.pretty_host
9370
- port = flow.request.port
9371
-
9372
- if (scheme == "https" and port != 443) or (scheme == "http" and port != 80):
9373
- host_with_port = f"{host}:{port}"
9374
- else:
9375
- host_with_port = host
9376
-
9377
- path = flow.request.path
9378
- return f"{scheme}://{host_with_port}{path}"
9379
-
9380
-
9381
- # ============================================================================
9382
- # Firewall Rule Matching
9383
- # ============================================================================
9384
-
9385
- def match_domain(pattern: str, hostname: str) -> bool:
9386
- """
9387
- Match hostname against domain pattern.
9388
- Supports exact match and wildcard prefix (*.example.com).
9389
- """
9390
- if not pattern or not hostname:
9391
- return False
9392
-
9393
- pattern = pattern.lower()
9394
- hostname = hostname.lower()
9395
-
9396
- if pattern.startswith("*."):
9397
- # Wildcard: *.example.com matches sub.example.com, www.example.com
9398
- # Also matches example.com itself (without subdomain)
9399
- suffix = pattern[1:] # .example.com
9400
- base = pattern[2:] # example.com
9401
- return hostname.endswith(suffix) or hostname == base
9402
-
9403
- return hostname == pattern
9404
-
9405
-
9406
- def match_ip(cidr: str, ip_str: str) -> bool:
9407
- """
9408
- Match IP address against CIDR range.
9409
- Supports single IPs (1.2.3.4) and ranges (10.0.0.0/8).
9410
- """
9411
- if not cidr or not ip_str:
9412
- return False
9413
-
9414
- try:
9415
- # Parse CIDR (automatically handles single IPs as /32)
9416
- if "/" not in cidr:
9417
- cidr = f"{cidr}/32"
9418
- network = ipaddress.ip_network(cidr, strict=False)
9419
- ip = ipaddress.ip_address(ip_str)
9420
- return ip in network
9421
- except ValueError:
9422
- return False
9423
-
9424
-
9425
- def resolve_hostname_to_ip(hostname: str) -> str | None:
9426
- """Resolve hostname to IP address for IP-based rule matching."""
9427
- try:
9428
- return socket.gethostbyname(hostname)
9429
- except socket.gaierror:
9430
- return None
9431
-
9432
-
9433
- def evaluate_rules(rules: list, hostname: str, ip_str: str = None) -> tuple[str, str | None]:
9434
- """
9435
- Evaluate firewall rules against hostname/IP.
9436
- Returns (action, matched_rule_description).
9437
-
9438
- Rule evaluation is first-match-wins (top to bottom).
9439
-
9440
- Rule formats:
9441
- - Domain/IP rule: { domain: "*.example.com", action: "ALLOW" }
9442
- - Terminal rule: { final: "DENY" }
9443
- """
9444
- if not rules:
9445
- return ("ALLOW", None) # No rules = allow all
9446
-
9447
- for rule in rules:
9448
- # Final/terminal rule - value is the action
9449
- final_action = rule.get("final")
9450
- if final_action:
9451
- return (final_action, "final")
9452
-
9453
- # Domain rule
9454
- domain = rule.get("domain")
9455
- if domain and match_domain(domain, hostname):
9456
- return (rule.get("action", "DENY"), f"domain:{domain}")
9457
-
9458
- # IP rule
9459
- ip_pattern = rule.get("ip")
9460
- if ip_pattern:
9461
- target_ip = ip_str
9462
- if not target_ip:
9463
- target_ip = resolve_hostname_to_ip(hostname)
9464
- if target_ip and match_ip(ip_pattern, target_ip):
9465
- return (rule.get("action", "DENY"), f"ip:{ip_pattern}")
9466
-
9467
- # No rule matched - default deny (zero-trust)
9468
- return ("DENY", "default")
9469
-
9470
-
9471
- # ============================================================================
9472
- # TLS ClientHello Handler (SNI-only mode)
9473
- # ============================================================================
9474
-
9475
- def tls_clienthello(data: tls.ClientHelloData) -> None:
9476
- """
9477
- Handle TLS ClientHello for SNI-based filtering.
9478
- This is called BEFORE TLS decryption, allowing SNI-only filtering.
9479
- """
9480
- client_ip = data.context.client.peername[0] if data.context.client.peername else None
9481
- if not client_ip:
9482
- return
9483
-
9484
- vm_info = get_vm_info(client_ip)
9485
- if not vm_info:
9486
- # Not a registered VM - pass through without MITM interception
9487
- # This is critical for CIDR-based rules where all VM traffic is redirected
9488
- data.ignore_connection = True
9489
- return
9490
-
9491
- # If MITM is enabled, let the normal flow handle it
9492
- if vm_info.get("mitmEnabled", False):
9493
- return
9494
-
9495
- # SNI-only mode: check rules based on SNI
9496
- sni = data.context.client.sni
9497
- run_id = vm_info.get("runId", "")
9498
- rules = vm_info.get("firewallRules", [])
9499
-
9500
- # Auto-allow VM0 API requests - the agent MUST be able to communicate with VM0
9501
- if API_URL and sni:
9502
- parsed_api = urllib.parse.urlparse(API_URL)
9503
- api_hostname = parsed_api.hostname.lower() if parsed_api.hostname else ""
9504
- sni_lower = sni.lower()
9505
- if api_hostname and (sni_lower == api_hostname or sni_lower.endswith(f".{api_hostname}")):
9506
- ctx.log.info(f"[{run_id}] SNI-only auto-allow VM0 API: {sni}")
9507
- log_network_entry(run_id, {
9508
- "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()),
9509
- "mode": "sni",
9510
- "action": "ALLOW",
9511
- "host": sni,
9512
- "port": 443,
9513
- "rule_matched": "vm0-api",
9514
- })
9515
- data.ignore_connection = True # Pass through without MITM
9516
- return
9517
-
9518
- if not sni:
9519
- # No SNI, can't determine target - block for security
9520
- ctx.log.warn(f"[{run_id}] SNI-only: No SNI in ClientHello, blocking")
9521
- log_network_entry(run_id, {
9522
- "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()),
9523
- "mode": "sni",
9524
- "action": "DENY",
9525
- "host": "",
9526
- "port": 443,
9527
- "rule_matched": "no-sni",
9528
- })
9529
- # Don't set ignore_connection - mitmproxy will attempt MITM handshake
9530
- # Since VM doesn't have CA cert (SNI-only mode), TLS will fail immediately
9531
- return
9532
-
9533
- # Evaluate rules
9534
- action, matched_rule = evaluate_rules(rules, sni)
9535
-
9536
- # Log the connection
9537
- log_network_entry(run_id, {
9538
- "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()),
9539
- "mode": "sni",
9540
- "action": action,
9541
- "host": sni,
9542
- "port": 443,
9543
- "rule_matched": matched_rule,
9544
- })
9545
-
9546
- if action == "ALLOW":
9547
- # Pass through without MITM - mitmproxy will relay without decryption
9548
- ctx.log.info(f"[{run_id}] SNI-only ALLOW: {sni} (rule: {matched_rule})")
9549
- data.ignore_connection = True
9550
- else:
9551
- # Block the connection by NOT setting ignore_connection
9552
- # mitmproxy will attempt MITM handshake, but since VM doesn't have
9553
- # our CA certificate installed (SNI-only mode), the TLS handshake
9554
- # will fail immediately with a certificate error.
9555
- ctx.log.warn(f"[{run_id}] SNI-only DENY: {sni} (rule: {matched_rule})")
9556
- # Client will see: SSL certificate problem / certificate verify failed
9557
-
9558
-
9559
- # ============================================================================
9560
- # HTTP Request Handler (MITM mode)
9561
- # ============================================================================
9562
-
9563
- def request(flow: http.HTTPFlow) -> None:
9564
- """
9565
- Intercept request and apply firewall rules.
9566
- For MITM mode, rewrites allowed requests to VM0 Proxy.
9567
- """
9568
- # Track request start time
9569
- request_start_times[flow.id] = time.time()
9570
-
9571
- # Get client IP (source VM)
9572
- client_ip = flow.client_conn.peername[0] if flow.client_conn.peername else None
9573
-
9574
- if not client_ip:
9575
- ctx.log.warn("No client IP available, passing through")
9576
- return
9577
-
9578
- # Look up VM info from registry
9579
- vm_info = get_vm_info(client_ip)
9580
-
9581
- if not vm_info:
9582
- # Not a registered VM, pass through without proxying
9583
- ctx.log.info(f"No VM registration for {client_ip}, passing through")
9584
- return
9585
-
9586
- run_id = vm_info.get("runId", "")
9587
- sandbox_token = vm_info.get("sandboxToken", "")
9588
- mitm_enabled = vm_info.get("mitmEnabled", False)
9589
- rules = vm_info.get("firewallRules", [])
9590
-
9591
- # Store info for response handler
9592
- flow.metadata["vm_run_id"] = run_id
9593
- flow.metadata["vm_client_ip"] = client_ip
9594
- flow.metadata["vm_mitm_enabled"] = mitm_enabled
9595
-
9596
- # Get target hostname
9597
- hostname = flow.request.pretty_host.lower()
9598
-
9599
- # Auto-allow VM0 API requests - the agent MUST be able to communicate with VM0
9600
- # This is checked before user firewall rules to ensure agent functionality
9601
- if API_URL:
9602
- parsed_api = urllib.parse.urlparse(API_URL)
9603
- api_hostname = parsed_api.hostname.lower() if parsed_api.hostname else ""
9604
- if api_hostname and (hostname == api_hostname or hostname.endswith(f".{api_hostname}")):
9605
- ctx.log.info(f"[{run_id}] Auto-allow VM0 API: {hostname}")
9606
- flow.metadata["firewall_action"] = "ALLOW"
9607
- flow.metadata["firewall_rule"] = "vm0-api"
9608
- # Continue to skip rewrite check below
9609
- flow.metadata["original_url"] = get_original_url(flow)
9610
- flow.metadata["skip_rewrite"] = True
9611
- return
9612
-
9613
- # Evaluate firewall rules
9614
- action, matched_rule = evaluate_rules(rules, hostname)
9615
- flow.metadata["firewall_action"] = action
9616
- flow.metadata["firewall_rule"] = matched_rule
9617
-
9618
- if action == "DENY":
9619
- ctx.log.warn(f"[{run_id}] Firewall DENY: {hostname} (rule: {matched_rule})")
9620
- # Kill the flow and return error response
9621
- flow.response = http.Response.make(
9622
- 403,
9623
- b"Blocked by firewall",
9624
- {"Content-Type": "text/plain"}
9625
- )
9626
- return
9627
-
9628
- # Request is ALLOWED - proceed with processing
9629
-
9630
- # Skip if no API URL configured
9631
- if not API_URL:
9632
- ctx.log.warn("VM0_API_URL not set, passing through")
9633
- return
9634
-
9635
- # Skip rewriting requests already going to VM0 (avoid loops)
9636
- if API_URL in flow.request.pretty_url:
9637
- flow.metadata["original_url"] = flow.request.pretty_url
9638
- flow.metadata["skip_rewrite"] = True
9639
- return
9640
-
9641
- # Skip rewriting requests to trusted storage domains (S3, etc.)
9642
- # S3 presigned URLs have signatures that break when proxied
9643
- TRUSTED_DOMAINS = [
9644
- ".s3.amazonaws.com",
9645
- ".s3-", # Regional S3 endpoints like s3-us-west-2.amazonaws.com
9646
- "s3.amazonaws.com",
9647
- ".r2.cloudflarestorage.com",
9648
- ".storage.googleapis.com",
9649
- ]
9650
- for domain in TRUSTED_DOMAINS:
9651
- if domain in hostname or hostname.endswith(domain.lstrip(".")):
9652
- ctx.log.info(f"[{run_id}] Skipping trusted storage domain: {hostname}")
9653
- flow.metadata["original_url"] = get_original_url(flow)
9654
- flow.metadata["skip_rewrite"] = True
9655
- return
9656
-
9657
- # Get original target URL
9658
- original_url = get_original_url(flow)
9659
- flow.metadata["original_url"] = original_url
9660
-
9661
- # If MITM is not enabled, just allow the request through without rewriting
9662
- if not mitm_enabled:
9663
- ctx.log.info(f"[{run_id}] Firewall ALLOW (no MITM): {hostname}")
9664
- return
9665
-
9666
- # MITM mode: rewrite to VM0 Proxy
9667
- ctx.log.info(f"[{run_id}] Proxying via MITM: {original_url}")
9668
-
9669
- # Parse proxy URL
9670
- parsed = urllib.parse.urlparse(PROXY_URL)
9671
-
9672
- # Build query params
9673
- query_params = {"url": original_url}
9674
- if run_id:
9675
- query_params["runId"] = run_id
9676
- query_string = urllib.parse.urlencode(query_params)
9677
-
9678
- # Rewrite request to proxy
9679
- flow.request.host = parsed.hostname
9680
- flow.request.port = 443 if parsed.scheme == "https" else 80
9681
- flow.request.scheme = parsed.scheme
9682
- flow.request.path = f"{parsed.path}?{query_string}"
9683
-
9684
- # Save original Authorization header before overwriting
9685
- if "Authorization" in flow.request.headers:
9686
- flow.request.headers["x-vm0-original-authorization"] = flow.request.headers["Authorization"]
9687
-
9688
- # Add sandbox authentication token
9689
- if sandbox_token:
9690
- flow.request.headers["Authorization"] = f"Bearer {sandbox_token}"
9691
-
9692
- # Add Vercel bypass header if configured
9693
- if VERCEL_BYPASS:
9694
- flow.request.headers["x-vercel-protection-bypass"] = VERCEL_BYPASS
9695
-
9696
-
9697
- def response(flow: http.HTTPFlow) -> None:
9698
- """
9699
- Handle response and log network activity.
9700
- """
9701
- # Calculate latency
9702
- start_time = request_start_times.pop(flow.id, None)
9703
- latency_ms = int((time.time() - start_time) * 1000) if start_time else 0
9704
-
9705
- # Get stored info
9706
- run_id = flow.metadata.get("vm_run_id", "")
9707
- original_url = flow.metadata.get("original_url", flow.request.pretty_url)
9708
- mitm_enabled = flow.metadata.get("vm_mitm_enabled", False)
9709
- firewall_action = flow.metadata.get("firewall_action", "ALLOW")
9710
- firewall_rule = flow.metadata.get("firewall_rule")
9711
-
9712
- # Calculate sizes
9713
- request_size = len(flow.request.content) if flow.request.content else 0
9714
- response_size = len(flow.response.content) if flow.response and flow.response.content else 0
9715
- status_code = flow.response.status_code if flow.response else 0
9716
-
9717
- # Parse URL for host
9718
- try:
9719
- parsed_url = urllib.parse.urlparse(original_url)
9720
- host = parsed_url.hostname or flow.request.pretty_host
9721
- port = parsed_url.port or (443 if parsed_url.scheme == "https" else 80)
9722
- except:
9723
- host = flow.request.pretty_host
9724
- port = flow.request.port
9725
-
9726
- # Log network entry for this run
9727
- if run_id:
9728
- log_entry = {
9729
- "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()),
9730
- "mode": "mitm" if mitm_enabled else "sni",
9731
- "action": firewall_action,
9732
- "host": host,
9733
- "port": port,
9734
- "rule_matched": firewall_rule,
9735
- }
9736
-
9737
- # Add HTTP details only in MITM mode
9738
- if mitm_enabled:
9739
- log_entry.update({
9740
- "method": flow.request.method,
9741
- "path": flow.request.path.split("?")[0], # Path without query
9742
- "url": original_url,
9743
- "status": status_code,
9744
- "latency_ms": latency_ms,
9745
- "request_size": request_size,
9746
- "response_size": response_size,
9747
- })
9748
-
9749
- log_network_entry(run_id, log_entry)
9750
-
9751
- # Log errors to mitmproxy console
9752
- if flow.response and flow.response.status_code >= 400:
9753
- ctx.log.warn(
9754
- f"[{run_id}] Response {flow.response.status_code}: {original_url}"
9755
- )
9756
-
9757
-
9758
- # mitmproxy addon registration
9759
- addons = [tls_clienthello, request, response]
9760
- `;
9761
-
9762
- // src/lib/proxy/proxy-manager.ts
9763
9303
  var logger6 = createLogger("ProxyManager");
9764
9304
  var DEFAULT_PROXY_OPTIONS = {
9765
- port: 8080,
9766
- registryPath: DEFAULT_REGISTRY_PATH
9305
+ port: 8080
9767
9306
  };
9768
9307
  var ProxyManager = class {
9769
9308
  config;
9770
9309
  process = null;
9771
9310
  isRunning = false;
9772
9311
  constructor(config) {
9773
- const addonPath = path5.join(config.caDir, "mitm_addon.py");
9312
+ const addonPath = path5.join(config.caDir, "mitm-addon.py");
9774
9313
  this.config = {
9775
9314
  ...DEFAULT_PROXY_OPTIONS,
9776
9315
  ...config,
@@ -9793,19 +9332,6 @@ var ProxyManager = class {
9793
9332
  });
9794
9333
  });
9795
9334
  }
9796
- /**
9797
- * Ensure the addon script exists at the configured path
9798
- */
9799
- ensureAddonScript() {
9800
- const addonDir = path5.dirname(this.config.addonPath);
9801
- if (!fs7.existsSync(addonDir)) {
9802
- fs7.mkdirSync(addonDir, { recursive: true });
9803
- }
9804
- fs7.writeFileSync(this.config.addonPath, RUNNER_MITM_ADDON_SCRIPT, {
9805
- mode: 493
9806
- });
9807
- logger6.log(`Addon script written to ${this.config.addonPath}`);
9808
- }
9809
9335
  /**
9810
9336
  * Validate proxy configuration
9811
9337
  */
@@ -9817,7 +9343,9 @@ var ProxyManager = class {
9817
9343
  if (!fs7.existsSync(caCertPath)) {
9818
9344
  throw new Error(`Proxy CA certificate not found: ${caCertPath}`);
9819
9345
  }
9820
- this.ensureAddonScript();
9346
+ if (!fs7.existsSync(this.config.addonPath)) {
9347
+ throw new Error(`Addon script not found: ${this.config.addonPath}`);
9348
+ }
9821
9349
  }
9822
9350
  /**
9823
9351
  * Start mitmproxy
@@ -10472,12 +10000,12 @@ function createStatusUpdater(statusFilePath, state) {
10472
10000
  }
10473
10001
 
10474
10002
  // src/lib/firecracker/network.ts
10475
- import { execSync, exec as exec3 } from "child_process";
10003
+ import { execSync as execSync2, exec as exec3 } from "child_process";
10476
10004
  import { promisify as promisify3 } from "util";
10477
10005
  var execAsync3 = promisify3(exec3);
10478
10006
  function commandExists(cmd) {
10479
10007
  try {
10480
- execSync(`which ${cmd}`, { stdio: "ignore" });
10008
+ execSync2(`which ${cmd}`, { stdio: "ignore" });
10481
10009
  return true;
10482
10010
  } catch {
10483
10011
  return false;
@@ -10492,7 +10020,7 @@ function checkNetworkPrerequisites() {
10492
10020
  }
10493
10021
  }
10494
10022
  try {
10495
- execSync("sudo -n true 2>/dev/null", { stdio: "ignore" });
10023
+ execSync2("sudo -n true 2>/dev/null", { stdio: "ignore" });
10496
10024
  } catch {
10497
10025
  errors.push(
10498
10026
  "Root/sudo access required for network configuration. Please run with sudo or configure sudoers."
@@ -10518,17 +10046,6 @@ import path7 from "path";
10518
10046
  var logger11 = createLogger("RunnerLock");
10519
10047
  var DEFAULT_PID_FILE = runtimePaths.runnerPid;
10520
10048
  var currentPidFile = null;
10521
- function isProcessRunning(pid) {
10522
- try {
10523
- process.kill(pid, 0);
10524
- return true;
10525
- } catch (err) {
10526
- if (err instanceof Error && "code" in err && err.code === "EPERM") {
10527
- return true;
10528
- }
10529
- return false;
10530
- }
10531
- }
10532
10049
  function acquireRunnerLock(options = {}) {
10533
10050
  const pidFile = options.pidFile ?? DEFAULT_PID_FILE;
10534
10051
  const runDir = path7.dirname(pidFile);
@@ -10575,11 +10092,13 @@ async function setupEnvironment(options) {
10575
10092
  process.exit(1);
10576
10093
  }
10577
10094
  logger12.log("Initializing network proxy...");
10578
- initVMRegistry();
10095
+ const registryPath = runnerPaths.vmRegistry(config.base_dir);
10096
+ initVMRegistry(registryPath);
10579
10097
  const proxyManager = initProxyManager({
10580
10098
  apiUrl: config.server.url,
10581
10099
  port: config.proxy.port,
10582
- caDir: config.proxy.ca_dir
10100
+ caDir: config.proxy.ca_dir,
10101
+ registryPath
10583
10102
  });
10584
10103
  let proxyEnabled = false;
10585
10104
  try {
@@ -10909,6 +10428,7 @@ var startCommand = new Command("start").description("Start the runner").option("
10909
10428
  // src/commands/doctor.ts
10910
10429
  import { Command as Command2 } from "commander";
10911
10430
  import { existsSync as existsSync5, readFileSync as readFileSync3, readdirSync as readdirSync2 } from "fs";
10431
+ import { execSync as execSync3 } from "child_process";
10912
10432
 
10913
10433
  // src/lib/firecracker/process.ts
10914
10434
  import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync4 } from "fs";
@@ -10916,12 +10436,21 @@ import path8 from "path";
10916
10436
  function parseFirecrackerCmdline(cmdline) {
10917
10437
  const args = cmdline.split("\0");
10918
10438
  if (!args[0]?.includes("firecracker")) return null;
10439
+ let filePath;
10919
10440
  const sockIdx = args.indexOf("--api-sock");
10920
- const socketPath = args[sockIdx + 1];
10921
- if (sockIdx === -1 || !socketPath) return null;
10922
- const match = socketPath.match(/vm0-([a-f0-9]+)\/firecracker\.sock$/);
10441
+ if (sockIdx !== -1) {
10442
+ filePath = args[sockIdx + 1];
10443
+ }
10444
+ if (!filePath) {
10445
+ const configIdx = args.indexOf("--config-file");
10446
+ if (configIdx !== -1) {
10447
+ filePath = args[configIdx + 1];
10448
+ }
10449
+ }
10450
+ if (!filePath) return null;
10451
+ const match = filePath.match(/vm0-([a-f0-9]+)\//);
10923
10452
  if (!match?.[1]) return null;
10924
- return { vmId: createVmId(match[1]), socketPath };
10453
+ return createVmId(match[1]);
10925
10454
  }
10926
10455
  function parseMitmproxyCmdline(cmdline) {
10927
10456
  if (!cmdline.includes("mitmproxy") && !cmdline.includes("mitmdump")) {
@@ -10949,9 +10478,9 @@ function findFirecrackerProcesses() {
10949
10478
  if (!existsSync4(cmdlinePath)) continue;
10950
10479
  try {
10951
10480
  const cmdline = readFileSync2(cmdlinePath, "utf-8");
10952
- const parsed = parseFirecrackerCmdline(cmdline);
10953
- if (parsed) {
10954
- processes.push({ pid, ...parsed });
10481
+ const vmId = parseFirecrackerCmdline(cmdline);
10482
+ if (vmId) {
10483
+ processes.push({ pid, vmId });
10955
10484
  }
10956
10485
  } catch {
10957
10486
  continue;
@@ -10964,33 +10493,25 @@ function findProcessByVmId(vmId) {
10964
10493
  const vmIdStr = vmIdValue(vmId);
10965
10494
  return processes.find((p) => vmIdValue(p.vmId) === vmIdStr) || null;
10966
10495
  }
10967
- function isProcessRunning2(pid) {
10968
- try {
10969
- process.kill(pid, 0);
10970
- return true;
10971
- } catch {
10972
- return false;
10973
- }
10974
- }
10975
10496
  async function killProcess(pid, timeoutMs = 5e3) {
10976
- if (!isProcessRunning2(pid)) return true;
10497
+ if (!isProcessRunning(pid)) return true;
10977
10498
  try {
10978
10499
  process.kill(pid, "SIGTERM");
10979
10500
  } catch {
10980
- return !isProcessRunning2(pid);
10501
+ return !isProcessRunning(pid);
10981
10502
  }
10982
10503
  const startTime = Date.now();
10983
10504
  while (Date.now() - startTime < timeoutMs) {
10984
- if (!isProcessRunning2(pid)) return true;
10505
+ if (!isProcessRunning(pid)) return true;
10985
10506
  await new Promise((resolve) => setTimeout(resolve, 100));
10986
10507
  }
10987
- if (isProcessRunning2(pid)) {
10508
+ if (isProcessRunning(pid)) {
10988
10509
  try {
10989
10510
  process.kill(pid, "SIGKILL");
10990
10511
  } catch {
10991
10512
  }
10992
10513
  }
10993
- return !isProcessRunning2(pid);
10514
+ return !isProcessRunning(pid);
10994
10515
  }
10995
10516
  function findMitmproxyProcess() {
10996
10517
  const procDir = "/proc";
@@ -11018,15 +10539,26 @@ function findMitmproxyProcess() {
11018
10539
  return null;
11019
10540
  }
11020
10541
 
10542
+ // src/lib/runner/types.ts
10543
+ import { z as z30 } from "zod";
10544
+ var RunnerModeSchema = z30.enum(["running", "draining", "stopping", "stopped"]);
10545
+ var RunnerStatusSchema = z30.object({
10546
+ mode: RunnerModeSchema,
10547
+ active_runs: z30.number(),
10548
+ active_run_ids: z30.array(z30.string()),
10549
+ started_at: z30.string(),
10550
+ updated_at: z30.string()
10551
+ });
10552
+
11021
10553
  // src/commands/doctor.ts
11022
- function displayRunnerStatus(statusFilePath) {
10554
+ function displayRunnerStatus(statusFilePath, warnings) {
11023
10555
  if (!existsSync5(statusFilePath)) {
11024
10556
  console.log("Mode: unknown (no status.json)");
11025
10557
  return null;
11026
10558
  }
11027
10559
  try {
11028
- const status = JSON.parse(
11029
- readFileSync3(statusFilePath, "utf-8")
10560
+ const status = RunnerStatusSchema.parse(
10561
+ JSON.parse(readFileSync3(statusFilePath, "utf-8"))
11030
10562
  );
11031
10563
  console.log(`Mode: ${status.mode}`);
11032
10564
  if (status.started_at) {
@@ -11037,10 +10569,11 @@ function displayRunnerStatus(statusFilePath) {
11037
10569
  return status;
11038
10570
  } catch {
11039
10571
  console.log("Mode: unknown (status.json unreadable)");
10572
+ warnings.push({ message: "status.json exists but cannot be parsed" });
11040
10573
  return null;
11041
10574
  }
11042
10575
  }
11043
- async function checkApiConnectivity(config) {
10576
+ async function checkApiConnectivity(config, warnings) {
11044
10577
  console.log("API Connectivity:");
11045
10578
  try {
11046
10579
  await pollForJob(config.server, config.group);
@@ -11051,6 +10584,9 @@ async function checkApiConnectivity(config) {
11051
10584
  console.log(
11052
10585
  ` Error: ${error instanceof Error ? error.message : "Unknown error"}`
11053
10586
  );
10587
+ warnings.push({
10588
+ message: `Cannot connect to API: ${error instanceof Error ? error.message : "Unknown error"}`
10589
+ });
11054
10590
  }
11055
10591
  }
11056
10592
  async function checkNetwork(config, warnings) {
@@ -11086,8 +10622,7 @@ function buildJobInfo(status, processes) {
11086
10622
  jobs.push({
11087
10623
  runId,
11088
10624
  vmId,
11089
- hasProcess: !!proc,
11090
- pid: proc?.pid
10625
+ firecrackerPid: proc?.pid
11091
10626
  });
11092
10627
  }
11093
10628
  }
@@ -11101,13 +10636,61 @@ function displayRuns(jobs, maxConcurrent) {
11101
10636
  }
11102
10637
  console.log(" Run ID VM ID Status");
11103
10638
  for (const job of jobs) {
11104
- const statusText = job.hasProcess ? `\u2713 Running (PID ${job.pid})` : "\u26A0\uFE0F No process";
10639
+ const statusText = job.firecrackerPid ? `\u2713 Running (PID ${job.firecrackerPid})` : "\u26A0\uFE0F No process";
11105
10640
  console.log(` ${job.runId} ${job.vmId} ${statusText}`);
11106
10641
  }
11107
10642
  }
11108
- function detectOrphanResources(jobs, processes, workspaces, statusVmIds, warnings) {
10643
+ async function findOrphanNetworkNamespaces(warnings) {
10644
+ let allNamespaces = [];
10645
+ try {
10646
+ const output = execSync3("ip netns list 2>/dev/null || true", {
10647
+ encoding: "utf-8"
10648
+ });
10649
+ allNamespaces = output.split("\n").map((line) => line.split(" ")[0] ?? "").filter((ns) => ns.startsWith(NS_PREFIX));
10650
+ } catch (err) {
10651
+ warnings.push({
10652
+ message: `Failed to list network namespaces: ${err instanceof Error ? err.message : "Unknown error"}`
10653
+ });
10654
+ return [];
10655
+ }
10656
+ if (allNamespaces.length === 0) {
10657
+ return [];
10658
+ }
10659
+ const registryPath = runtimePaths.netnsRegistry;
10660
+ if (!existsSync5(registryPath)) {
10661
+ return allNamespaces;
10662
+ }
10663
+ try {
10664
+ return await withFileLock(registryPath, async () => {
10665
+ const registry = RegistrySchema.parse(
10666
+ JSON.parse(readFileSync3(registryPath, "utf-8"))
10667
+ );
10668
+ const aliveNamespaces = /* @__PURE__ */ new Set();
10669
+ for (const [runnerIdx, runner] of Object.entries(registry.runners)) {
10670
+ if (isProcessRunning(runner.pid)) {
10671
+ for (const nsIdx of Object.keys(runner.namespaces)) {
10672
+ aliveNamespaces.add(`${NS_PREFIX}${runnerIdx}-${nsIdx}`);
10673
+ }
10674
+ }
10675
+ }
10676
+ const orphans = [];
10677
+ for (const ns of allNamespaces) {
10678
+ if (!aliveNamespaces.has(ns)) {
10679
+ orphans.push(ns);
10680
+ }
10681
+ }
10682
+ return orphans;
10683
+ });
10684
+ } catch (err) {
10685
+ warnings.push({
10686
+ message: `Failed to read netns registry: ${err instanceof Error ? err.message : "Unknown error"}`
10687
+ });
10688
+ return [];
10689
+ }
10690
+ }
10691
+ async function detectOrphanResources(jobs, processes, workspaces, statusVmIds, warnings) {
11109
10692
  for (const job of jobs) {
11110
- if (!job.hasProcess) {
10693
+ if (!job.firecrackerPid) {
11111
10694
  warnings.push({
11112
10695
  message: `Run ${job.vmId} in status.json but no Firecracker process running`
11113
10696
  });
@@ -11121,6 +10704,12 @@ function detectOrphanResources(jobs, processes, workspaces, statusVmIds, warning
11121
10704
  });
11122
10705
  }
11123
10706
  }
10707
+ const orphanNetns = await findOrphanNetworkNamespaces(warnings);
10708
+ for (const ns of orphanNetns) {
10709
+ warnings.push({
10710
+ message: `Orphan network namespace: ${ns} (runner process not running)`
10711
+ });
10712
+ }
11124
10713
  for (const ws of workspaces) {
11125
10714
  const vmId = runnerPaths.extractVmId(ws);
11126
10715
  if (!processVmIds.has(vmId) && !statusVmIds.has(vmId)) {
@@ -11157,9 +10746,9 @@ var doctorCommand = new Command2("doctor").description("Diagnose runner health,
11157
10746
  const workspacesDir = runnerPaths.workspacesDir(config.base_dir);
11158
10747
  const warnings = [];
11159
10748
  console.log(`Runner: ${config.name}`);
11160
- const status = displayRunnerStatus(statusFilePath);
10749
+ const status = displayRunnerStatus(statusFilePath, warnings);
11161
10750
  console.log("");
11162
- await checkApiConnectivity(config);
10751
+ await checkApiConnectivity(config, warnings);
11163
10752
  console.log("");
11164
10753
  await checkNetwork(config, warnings);
11165
10754
  console.log("");
@@ -11168,7 +10757,13 @@ var doctorCommand = new Command2("doctor").description("Diagnose runner health,
11168
10757
  const { jobs, statusVmIds } = buildJobInfo(status, processes);
11169
10758
  displayRuns(jobs, config.sandbox.max_concurrent);
11170
10759
  console.log("");
11171
- detectOrphanResources(jobs, processes, workspaces, statusVmIds, warnings);
10760
+ await detectOrphanResources(
10761
+ jobs,
10762
+ processes,
10763
+ workspaces,
10764
+ statusVmIds,
10765
+ warnings
10766
+ );
11172
10767
  displayWarnings(warnings);
11173
10768
  process.exit(warnings.length > 0 ? 1 : 0);
11174
10769
  } catch (error) {
@@ -11251,8 +10846,8 @@ var killCommand = new Command3("kill").description("Force terminate a run and cl
11251
10846
  }
11252
10847
  if (runId && existsSync6(statusFilePath)) {
11253
10848
  try {
11254
- const status = JSON.parse(
11255
- readFileSync4(statusFilePath, "utf-8")
10849
+ const status = RunnerStatusSchema.parse(
10850
+ JSON.parse(readFileSync4(statusFilePath, "utf-8"))
11256
10851
  );
11257
10852
  const oldCount = status.active_runs;
11258
10853
  status.active_run_ids = status.active_run_ids.filter(
@@ -11309,8 +10904,8 @@ function resolveRunId(input, statusFilePath) {
11309
10904
  }
11310
10905
  if (existsSync6(statusFilePath)) {
11311
10906
  try {
11312
- const status = JSON.parse(
11313
- readFileSync4(statusFilePath, "utf-8")
10907
+ const status = RunnerStatusSchema.parse(
10908
+ JSON.parse(readFileSync4(statusFilePath, "utf-8"))
11314
10909
  );
11315
10910
  const match = status.active_run_ids.find(
11316
10911
  (id) => id.startsWith(input)
@@ -11413,6 +11008,7 @@ var benchmarkCommand = new Command4("benchmark").description(
11413
11008
  }
11414
11009
  process.exit(1);
11415
11010
  }
11011
+ initVMRegistry(runnerPaths.vmRegistry(config.base_dir));
11416
11012
  timer.log("Initializing pools...");
11417
11013
  const snapshotConfig = config.firecracker.snapshot;
11418
11014
  await initOverlayPool({
@@ -11658,7 +11254,7 @@ var snapshotCommand = new Command5("snapshot").description("Generate a Firecrack
11658
11254
  );
11659
11255
 
11660
11256
  // src/index.ts
11661
- var version = true ? "3.12.0" : "0.1.0";
11257
+ var version = true ? "3.12.2" : "0.1.0";
11662
11258
  program.name("vm0-runner").version(version).description("Self-hosted runner for VM0 agents");
11663
11259
  program.addCommand(startCommand);
11664
11260
  program.addCommand(doctorCommand);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vm0/runner",
3
- "version": "3.12.0",
3
+ "version": "3.12.2",
4
4
  "description": "Self-hosted runner for VM0 agents",
5
5
  "repository": {
6
6
  "type": "git",