npm - @vm0/runner - Versions diffs - 3.12.0 → 3.12.2 - Mend

@vm0/runner 3.12.0 → 3.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/index.js +169 -573
package/package.json +1 -1

package/index.js CHANGED Viewed

@@ -48,7 +48,9 @@ var runnerPaths = {
   /** Check if a directory name is a VM workspace */
   isVmWorkspace: (dirname) => dirname.startsWith(VM_WORKSPACE_PREFIX),
   /** Extract vmId from workspace directory name */
-  extractVmId: (dirname) => createVmId(dirname.replace(VM_WORKSPACE_PREFIX, ""))
+  extractVmId: (dirname) => createVmId(dirname.replace(VM_WORKSPACE_PREFIX, "")),
+  /** VM registry file for proxy IP → run mapping */
+  vmRegistry: (baseDir) => path.join(baseDir, "vm-registry.json")
 };
 var vmPaths = {
   /** Firecracker config file (used with --config-file) */
@@ -73,8 +75,6 @@ var snapshotOutputPaths = {
 var tempPaths = {
   /** Default proxy CA directory */
   proxyDir: `${VM0_TMP_PREFIX}-proxy`,
-  /** VM registry for proxy */
-  vmRegistry: `${VM0_TMP_PREFIX}-vm-registry.json`,
   /** Network log file for a run */
   networkLog: (runId) => `${VM0_TMP_PREFIX}-network-${runId}.jsonl`
 };
@@ -416,6 +416,37 @@ async function withFileLock(path9, fn, options) {
   }
 }
+// src/lib/utils/process.ts
+import { execSync } from "child_process";
+function isProcessRunning(pid) {
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch (err) {
+    if (err instanceof Error && "code" in err && err.code === "EPERM") {
+      return true;
+    }
+    return false;
+  }
+}
+function killProcessTree(pid) {
+  try {
+    const childPidsStr = execSync(`pgrep -P ${pid} 2>/dev/null || true`, {
+      encoding: "utf-8"
+    }).trim();
+    if (childPidsStr) {
+      const childPids = childPidsStr.split("\n").map((p) => parseInt(p, 10));
+      for (const childPid of childPids) {
+        if (!isNaN(childPid)) {
+          killProcessTree(childPid);
+        }
+      }
+    }
+    process.kill(pid, "SIGKILL");
+  } catch {
+  }
+}
 // src/lib/utils/exec.ts
 import { exec } from "child_process";
 import { promisify } from "util";
@@ -549,14 +580,6 @@ function makeNsName(runnerIdx, nsIdx) {
 function makeVethName(runnerIdx, nsIdx) {
   return `${VETH_PREFIX}${runnerIdx}-${nsIdx}`;
 }
-function isPidAlive(pid) {
-  try {
-    process.kill(pid, 0);
-    return true;
-  } catch {
-    return false;
-  }
-}
 async function deleteIptablesRulesByComment(comment) {
   const deleteFromTable = async (table) => {
     try {
@@ -625,7 +648,7 @@ var NetnsPool = class _NetnsPool {
       const data = read();
       const orphaned = [];
       for (const [runnerIdx, runner] of Object.entries(data.runners)) {
-        if (!isPidAlive(runner.pid)) {
+        if (!isProcessRunning(runner.pid)) {
           orphaned.push({
             runnerIdx,
             namespaces: Object.entries(runner.namespaces).map(
@@ -662,7 +685,7 @@ var NetnsPool = class _NetnsPool {
       const data = read();
       for (const { runnerIdx } of orphanedData) {
         const runner = data.runners[runnerIdx];
-        if (runner && !isPidAlive(runner.pid)) {
+        if (runner && !isProcessRunning(runner.pid)) {
           delete data.runners[runnerIdx];
         }
       }
@@ -1671,8 +1694,8 @@ var FirecrackerVM = class {
    * since we want to clean up as much as possible even if some parts fail.
    */
   async cleanup() {
-    if (this.process && !this.process.killed) {
-      this.process.kill("SIGKILL");
+    if (this.process && !this.process.killed && this.process.pid) {
+      killProcessTree(this.process.pid);
       this.process = null;
     }
     if (this.netns) {
@@ -7884,6 +7907,7 @@ var modelProviderTypeSchema = z19.enum([
   "minimax-api-key",
   "deepseek-api-key",
   "zai-api-key",
+  "azure-foundry",
   "aws-bedrock"
 ]);
 var modelProviderFrameworkSchema = z19.enum(["claude-code", "codex"]);
@@ -9171,11 +9195,10 @@ var ENV_LOADER_PATH = "/usr/local/bin/vm0-agent/env-loader.mjs";
 // src/lib/proxy/vm-registry.ts
 import fs6 from "fs";
 var logger5 = createLogger("VMRegistry");
-var DEFAULT_REGISTRY_PATH = tempPaths.vmRegistry;
 var VMRegistry = class {
   registryPath;
   data;
-  constructor(registryPath = DEFAULT_REGISTRY_PATH) {
+  constructor(registryPath) {
     this.registryPath = registryPath;
     this.data = this.load();
   }
@@ -9262,7 +9285,9 @@ var VMRegistry = class {
 var globalRegistry = null;
 function getVMRegistry() {
   if (!globalRegistry) {
-    globalRegistry = new VMRegistry();
+    throw new Error(
+      "VMRegistry not initialized. Call initVMRegistry(registryPath) first."
+    );
   }
   return globalRegistry;
 }
@@ -9275,502 +9300,16 @@ function initVMRegistry(registryPath) {
 import { spawn as spawn2 } from "child_process";
 import fs7 from "fs";
 import path5 from "path";
-// src/lib/proxy/mitm-addon-script.ts
-var RUNNER_MITM_ADDON_SCRIPT = `#!/usr/bin/env python3
-"""
-mitmproxy addon for VM0 runner-level network security mode.
-This addon runs on the runner HOST (not inside VMs) and:
-1. Intercepts all HTTPS requests from VMs
-2. Looks up the source VM's runId and firewall rules from the VM registry
-3. Evaluates firewall rules (first-match-wins) to ALLOW or DENY
-4. For MITM mode: Rewrites requests to go through VM0 Proxy endpoint
-5. For SNI-only mode: Passes through or blocks without decryption
-6. Logs network activity per-run to JSONL files
-"""
-import os
-import json
-import time
-import urllib.parse
-import ipaddress
-import socket
-from mitmproxy import http, ctx, tls
-# VM0 Proxy configuration from environment
-API_URL = os.environ.get("VM0_API_URL", "https://www.vm0.ai")
-REGISTRY_PATH = os.environ.get("VM0_REGISTRY_PATH", "/tmp/vm0-vm-registry.json")
-VERCEL_BYPASS = os.environ.get("VERCEL_AUTOMATION_BYPASS_SECRET", "")
-# Construct proxy URL
-PROXY_URL = f"{API_URL}/api/webhooks/agent/proxy"
-# Cache for VM registry (reloaded periodically)
-_registry_cache = {}
-_registry_cache_time = 0
-REGISTRY_CACHE_TTL = 2  # seconds
-# Track request start times for latency calculation
-request_start_times = {}
-def load_registry() -> dict:
-    """Load the VM registry from file, with caching."""
-    global _registry_cache, _registry_cache_time
-    now = time.time()
-    if now - _registry_cache_time < REGISTRY_CACHE_TTL:
-        return _registry_cache
-    try:
-        if os.path.exists(REGISTRY_PATH):
-            with open(REGISTRY_PATH, "r") as f:
-                data = json.load(f)
-                _registry_cache = data.get("vms", {})
-                _registry_cache_time = now
-                return _registry_cache
-    except Exception as e:
-        ctx.log.warn(f"Failed to load VM registry: {e}")
-    return _registry_cache
-def get_vm_info(client_ip: str) -> dict | None:
-    """Look up VM info by client IP address."""
-    registry = load_registry()
-    return registry.get(client_ip)
-def get_network_log_path(run_id: str) -> str:
-    """Get the network log file path for a run."""
-    return f"/tmp/vm0-network-{run_id}.jsonl"
-def log_network_entry(run_id: str, entry: dict) -> None:
-    """Write a network log entry to the per-run JSONL file."""
-    if not run_id:
-        return
-    log_path = get_network_log_path(run_id)
-    try:
-        fd = os.open(log_path, os.O_CREAT | os.O_APPEND | os.O_WRONLY, 0o644)
-        try:
-            os.write(fd, (json.dumps(entry) + "\\n").encode())
-        finally:
-            os.close(fd)
-    except Exception as e:
-        ctx.log.warn(f"Failed to write network log: {e}")
-def get_original_url(flow: http.HTTPFlow) -> str:
-    """Reconstruct the original target URL from the request."""
-    scheme = "https" if flow.request.port == 443 else "http"
-    host = flow.request.pretty_host
-    port = flow.request.port
-    if (scheme == "https" and port != 443) or (scheme == "http" and port != 80):
-        host_with_port = f"{host}:{port}"
-    else:
-        host_with_port = host
-    path = flow.request.path
-    return f"{scheme}://{host_with_port}{path}"
-# ============================================================================
-# Firewall Rule Matching
-# ============================================================================
-def match_domain(pattern: str, hostname: str) -> bool:
-    """
-    Match hostname against domain pattern.
-    Supports exact match and wildcard prefix (*.example.com).
-    """
-    if not pattern or not hostname:
-        return False
-    pattern = pattern.lower()
-    hostname = hostname.lower()
-    if pattern.startswith("*."):
-        # Wildcard: *.example.com matches sub.example.com, www.example.com
-        # Also matches example.com itself (without subdomain)
-        suffix = pattern[1:]  # .example.com
-        base = pattern[2:]    # example.com
-        return hostname.endswith(suffix) or hostname == base
-    return hostname == pattern
-def match_ip(cidr: str, ip_str: str) -> bool:
-    """
-    Match IP address against CIDR range.
-    Supports single IPs (1.2.3.4) and ranges (10.0.0.0/8).
-    """
-    if not cidr or not ip_str:
-        return False
-    try:
-        # Parse CIDR (automatically handles single IPs as /32)
-        if "/" not in cidr:
-            cidr = f"{cidr}/32"
-        network = ipaddress.ip_network(cidr, strict=False)
-        ip = ipaddress.ip_address(ip_str)
-        return ip in network
-    except ValueError:
-        return False
-def resolve_hostname_to_ip(hostname: str) -> str | None:
-    """Resolve hostname to IP address for IP-based rule matching."""
-    try:
-        return socket.gethostbyname(hostname)
-    except socket.gaierror:
-        return None
-def evaluate_rules(rules: list, hostname: str, ip_str: str = None) -> tuple[str, str | None]:
-    """
-    Evaluate firewall rules against hostname/IP.
-    Returns (action, matched_rule_description).
-    Rule evaluation is first-match-wins (top to bottom).
-    Rule formats:
-    - Domain/IP rule: { domain: "*.example.com", action: "ALLOW" }
-    - Terminal rule: { final: "DENY" }
-    """
-    if not rules:
-        return ("ALLOW", None)  # No rules = allow all
-    for rule in rules:
-        # Final/terminal rule - value is the action
-        final_action = rule.get("final")
-        if final_action:
-            return (final_action, "final")
-        # Domain rule
-        domain = rule.get("domain")
-        if domain and match_domain(domain, hostname):
-            return (rule.get("action", "DENY"), f"domain:{domain}")
-        # IP rule
-        ip_pattern = rule.get("ip")
-        if ip_pattern:
-            target_ip = ip_str
-            if not target_ip:
-                target_ip = resolve_hostname_to_ip(hostname)
-            if target_ip and match_ip(ip_pattern, target_ip):
-                return (rule.get("action", "DENY"), f"ip:{ip_pattern}")
-    # No rule matched - default deny (zero-trust)
-    return ("DENY", "default")
-# ============================================================================
-# TLS ClientHello Handler (SNI-only mode)
-# ============================================================================
-def tls_clienthello(data: tls.ClientHelloData) -> None:
-    """
-    Handle TLS ClientHello for SNI-based filtering.
-    This is called BEFORE TLS decryption, allowing SNI-only filtering.
-    """
-    client_ip = data.context.client.peername[0] if data.context.client.peername else None
-    if not client_ip:
-        return
-    vm_info = get_vm_info(client_ip)
-    if not vm_info:
-        # Not a registered VM - pass through without MITM interception
-        # This is critical for CIDR-based rules where all VM traffic is redirected
-        data.ignore_connection = True
-        return
-    # If MITM is enabled, let the normal flow handle it
-    if vm_info.get("mitmEnabled", False):
-        return
-    # SNI-only mode: check rules based on SNI
-    sni = data.context.client.sni
-    run_id = vm_info.get("runId", "")
-    rules = vm_info.get("firewallRules", [])
-    # Auto-allow VM0 API requests - the agent MUST be able to communicate with VM0
-    if API_URL and sni:
-        parsed_api = urllib.parse.urlparse(API_URL)
-        api_hostname = parsed_api.hostname.lower() if parsed_api.hostname else ""
-        sni_lower = sni.lower()
-        if api_hostname and (sni_lower == api_hostname or sni_lower.endswith(f".{api_hostname}")):
-            ctx.log.info(f"[{run_id}] SNI-only auto-allow VM0 API: {sni}")
-            log_network_entry(run_id, {
-                "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()),
-                "mode": "sni",
-                "action": "ALLOW",
-                "host": sni,
-                "port": 443,
-                "rule_matched": "vm0-api",
-            })
-            data.ignore_connection = True  # Pass through without MITM
-            return
-    if not sni:
-        # No SNI, can't determine target - block for security
-        ctx.log.warn(f"[{run_id}] SNI-only: No SNI in ClientHello, blocking")
-        log_network_entry(run_id, {
-            "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()),
-            "mode": "sni",
-            "action": "DENY",
-            "host": "",
-            "port": 443,
-            "rule_matched": "no-sni",
-        })
-        # Don't set ignore_connection - mitmproxy will attempt MITM handshake
-        # Since VM doesn't have CA cert (SNI-only mode), TLS will fail immediately
-        return
-    # Evaluate rules
-    action, matched_rule = evaluate_rules(rules, sni)
-    # Log the connection
-    log_network_entry(run_id, {
-        "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()),
-        "mode": "sni",
-        "action": action,
-        "host": sni,
-        "port": 443,
-        "rule_matched": matched_rule,
-    })
-    if action == "ALLOW":
-        # Pass through without MITM - mitmproxy will relay without decryption
-        ctx.log.info(f"[{run_id}] SNI-only ALLOW: {sni} (rule: {matched_rule})")
-        data.ignore_connection = True
-    else:
-        # Block the connection by NOT setting ignore_connection
-        # mitmproxy will attempt MITM handshake, but since VM doesn't have
-        # our CA certificate installed (SNI-only mode), the TLS handshake
-        # will fail immediately with a certificate error.
-        ctx.log.warn(f"[{run_id}] SNI-only DENY: {sni} (rule: {matched_rule})")
-        # Client will see: SSL certificate problem / certificate verify failed
-# ============================================================================
-# HTTP Request Handler (MITM mode)
-# ============================================================================
-def request(flow: http.HTTPFlow) -> None:
-    """
-    Intercept request and apply firewall rules.
-    For MITM mode, rewrites allowed requests to VM0 Proxy.
-    """
-    # Track request start time
-    request_start_times[flow.id] = time.time()
-    # Get client IP (source VM)
-    client_ip = flow.client_conn.peername[0] if flow.client_conn.peername else None
-    if not client_ip:
-        ctx.log.warn("No client IP available, passing through")
-        return
-    # Look up VM info from registry
-    vm_info = get_vm_info(client_ip)
-    if not vm_info:
-        # Not a registered VM, pass through without proxying
-        ctx.log.info(f"No VM registration for {client_ip}, passing through")
-        return
-    run_id = vm_info.get("runId", "")
-    sandbox_token = vm_info.get("sandboxToken", "")
-    mitm_enabled = vm_info.get("mitmEnabled", False)
-    rules = vm_info.get("firewallRules", [])
-    # Store info for response handler
-    flow.metadata["vm_run_id"] = run_id
-    flow.metadata["vm_client_ip"] = client_ip
-    flow.metadata["vm_mitm_enabled"] = mitm_enabled
-    # Get target hostname
-    hostname = flow.request.pretty_host.lower()
-    # Auto-allow VM0 API requests - the agent MUST be able to communicate with VM0
-    # This is checked before user firewall rules to ensure agent functionality
-    if API_URL:
-        parsed_api = urllib.parse.urlparse(API_URL)
-        api_hostname = parsed_api.hostname.lower() if parsed_api.hostname else ""
-        if api_hostname and (hostname == api_hostname or hostname.endswith(f".{api_hostname}")):
-            ctx.log.info(f"[{run_id}] Auto-allow VM0 API: {hostname}")
-            flow.metadata["firewall_action"] = "ALLOW"
-            flow.metadata["firewall_rule"] = "vm0-api"
-            # Continue to skip rewrite check below
-            flow.metadata["original_url"] = get_original_url(flow)
-            flow.metadata["skip_rewrite"] = True
-            return
-    # Evaluate firewall rules
-    action, matched_rule = evaluate_rules(rules, hostname)
-    flow.metadata["firewall_action"] = action
-    flow.metadata["firewall_rule"] = matched_rule
-    if action == "DENY":
-        ctx.log.warn(f"[{run_id}] Firewall DENY: {hostname} (rule: {matched_rule})")
-        # Kill the flow and return error response
-        flow.response = http.Response.make(
-            403,
-            b"Blocked by firewall",
-            {"Content-Type": "text/plain"}
-        )
-        return
-    # Request is ALLOWED - proceed with processing
-    # Skip if no API URL configured
-    if not API_URL:
-        ctx.log.warn("VM0_API_URL not set, passing through")
-        return
-    # Skip rewriting requests already going to VM0 (avoid loops)
-    if API_URL in flow.request.pretty_url:
-        flow.metadata["original_url"] = flow.request.pretty_url
-        flow.metadata["skip_rewrite"] = True
-        return
-    # Skip rewriting requests to trusted storage domains (S3, etc.)
-    # S3 presigned URLs have signatures that break when proxied
-    TRUSTED_DOMAINS = [
-        ".s3.amazonaws.com",
-        ".s3-",  # Regional S3 endpoints like s3-us-west-2.amazonaws.com
-        "s3.amazonaws.com",
-        ".r2.cloudflarestorage.com",
-        ".storage.googleapis.com",
-    ]
-    for domain in TRUSTED_DOMAINS:
-        if domain in hostname or hostname.endswith(domain.lstrip(".")):
-            ctx.log.info(f"[{run_id}] Skipping trusted storage domain: {hostname}")
-            flow.metadata["original_url"] = get_original_url(flow)
-            flow.metadata["skip_rewrite"] = True
-            return
-    # Get original target URL
-    original_url = get_original_url(flow)
-    flow.metadata["original_url"] = original_url
-    # If MITM is not enabled, just allow the request through without rewriting
-    if not mitm_enabled:
-        ctx.log.info(f"[{run_id}] Firewall ALLOW (no MITM): {hostname}")
-        return
-    # MITM mode: rewrite to VM0 Proxy
-    ctx.log.info(f"[{run_id}] Proxying via MITM: {original_url}")
-    # Parse proxy URL
-    parsed = urllib.parse.urlparse(PROXY_URL)
-    # Build query params
-    query_params = {"url": original_url}
-    if run_id:
-        query_params["runId"] = run_id
-    query_string = urllib.parse.urlencode(query_params)
-    # Rewrite request to proxy
-    flow.request.host = parsed.hostname
-    flow.request.port = 443 if parsed.scheme == "https" else 80
-    flow.request.scheme = parsed.scheme
-    flow.request.path = f"{parsed.path}?{query_string}"
-    # Save original Authorization header before overwriting
-    if "Authorization" in flow.request.headers:
-        flow.request.headers["x-vm0-original-authorization"] = flow.request.headers["Authorization"]
-    # Add sandbox authentication token
-    if sandbox_token:
-        flow.request.headers["Authorization"] = f"Bearer {sandbox_token}"
-    # Add Vercel bypass header if configured
-    if VERCEL_BYPASS:
-        flow.request.headers["x-vercel-protection-bypass"] = VERCEL_BYPASS
-def response(flow: http.HTTPFlow) -> None:
-    """
-    Handle response and log network activity.
-    """
-    # Calculate latency
-    start_time = request_start_times.pop(flow.id, None)
-    latency_ms = int((time.time() - start_time) * 1000) if start_time else 0
-    # Get stored info
-    run_id = flow.metadata.get("vm_run_id", "")
-    original_url = flow.metadata.get("original_url", flow.request.pretty_url)
-    mitm_enabled = flow.metadata.get("vm_mitm_enabled", False)
-    firewall_action = flow.metadata.get("firewall_action", "ALLOW")
-    firewall_rule = flow.metadata.get("firewall_rule")
-    # Calculate sizes
-    request_size = len(flow.request.content) if flow.request.content else 0
-    response_size = len(flow.response.content) if flow.response and flow.response.content else 0
-    status_code = flow.response.status_code if flow.response else 0
-    # Parse URL for host
-    try:
-        parsed_url = urllib.parse.urlparse(original_url)
-        host = parsed_url.hostname or flow.request.pretty_host
-        port = parsed_url.port or (443 if parsed_url.scheme == "https" else 80)
-    except:
-        host = flow.request.pretty_host
-        port = flow.request.port
-    # Log network entry for this run
-    if run_id:
-        log_entry = {
-            "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()),
-            "mode": "mitm" if mitm_enabled else "sni",
-            "action": firewall_action,
-            "host": host,
-            "port": port,
-            "rule_matched": firewall_rule,
-        }
-        # Add HTTP details only in MITM mode
-        if mitm_enabled:
-            log_entry.update({
-                "method": flow.request.method,
-                "path": flow.request.path.split("?")[0],  # Path without query
-                "url": original_url,
-                "status": status_code,
-                "latency_ms": latency_ms,
-                "request_size": request_size,
-                "response_size": response_size,
-            })
-        log_network_entry(run_id, log_entry)
-    # Log errors to mitmproxy console
-    if flow.response and flow.response.status_code >= 400:
-        ctx.log.warn(
-            f"[{run_id}] Response {flow.response.status_code}: {original_url}"
-        )
-# mitmproxy addon registration
-addons = [tls_clienthello, request, response]
-`;
-// src/lib/proxy/proxy-manager.ts
 var logger6 = createLogger("ProxyManager");
 var DEFAULT_PROXY_OPTIONS = {
-  port: 8080,
-  registryPath: DEFAULT_REGISTRY_PATH
+  port: 8080
 };
 var ProxyManager = class {
   config;
   process = null;
   isRunning = false;
   constructor(config) {
-    const addonPath = path5.join(config.caDir, "mitm_addon.py");
+    const addonPath = path5.join(config.caDir, "mitm-addon.py");
     this.config = {
       ...DEFAULT_PROXY_OPTIONS,
       ...config,
@@ -9793,19 +9332,6 @@ var ProxyManager = class {
       });
     });
   }
-  /**
-   * Ensure the addon script exists at the configured path
-   */
-  ensureAddonScript() {
-    const addonDir = path5.dirname(this.config.addonPath);
-    if (!fs7.existsSync(addonDir)) {
-      fs7.mkdirSync(addonDir, { recursive: true });
-    }
-    fs7.writeFileSync(this.config.addonPath, RUNNER_MITM_ADDON_SCRIPT, {
-      mode: 493
-    });
-    logger6.log(`Addon script written to ${this.config.addonPath}`);
-  }
   /**
    * Validate proxy configuration
    */
@@ -9817,7 +9343,9 @@ var ProxyManager = class {
     if (!fs7.existsSync(caCertPath)) {
       throw new Error(`Proxy CA certificate not found: ${caCertPath}`);
     }
-    this.ensureAddonScript();
+    if (!fs7.existsSync(this.config.addonPath)) {
+      throw new Error(`Addon script not found: ${this.config.addonPath}`);
+    }
   }
   /**
    * Start mitmproxy
@@ -10472,12 +10000,12 @@ function createStatusUpdater(statusFilePath, state) {
 }
 // src/lib/firecracker/network.ts
-import { execSync, exec as exec3 } from "child_process";
+import { execSync as execSync2, exec as exec3 } from "child_process";
 import { promisify as promisify3 } from "util";
 var execAsync3 = promisify3(exec3);
 function commandExists(cmd) {
   try {
-    execSync(`which ${cmd}`, { stdio: "ignore" });
+    execSync2(`which ${cmd}`, { stdio: "ignore" });
     return true;
   } catch {
     return false;
@@ -10492,7 +10020,7 @@ function checkNetworkPrerequisites() {
     }
   }
   try {
-    execSync("sudo -n true 2>/dev/null", { stdio: "ignore" });
+    execSync2("sudo -n true 2>/dev/null", { stdio: "ignore" });
   } catch {
     errors.push(
       "Root/sudo access required for network configuration. Please run with sudo or configure sudoers."
@@ -10518,17 +10046,6 @@ import path7 from "path";
 var logger11 = createLogger("RunnerLock");
 var DEFAULT_PID_FILE = runtimePaths.runnerPid;
 var currentPidFile = null;
-function isProcessRunning(pid) {
-  try {
-    process.kill(pid, 0);
-    return true;
-  } catch (err) {
-    if (err instanceof Error && "code" in err && err.code === "EPERM") {
-      return true;
-    }
-    return false;
-  }
-}
 function acquireRunnerLock(options = {}) {
   const pidFile = options.pidFile ?? DEFAULT_PID_FILE;
   const runDir = path7.dirname(pidFile);
@@ -10575,11 +10092,13 @@ async function setupEnvironment(options) {
     process.exit(1);
   }
   logger12.log("Initializing network proxy...");
-  initVMRegistry();
+  const registryPath = runnerPaths.vmRegistry(config.base_dir);
+  initVMRegistry(registryPath);
   const proxyManager = initProxyManager({
     apiUrl: config.server.url,
     port: config.proxy.port,
-    caDir: config.proxy.ca_dir
+    caDir: config.proxy.ca_dir,
+    registryPath
   });
   let proxyEnabled = false;
   try {
@@ -10909,6 +10428,7 @@ var startCommand = new Command("start").description("Start the runner").option("
 // src/commands/doctor.ts
 import { Command as Command2 } from "commander";
 import { existsSync as existsSync5, readFileSync as readFileSync3, readdirSync as readdirSync2 } from "fs";
+import { execSync as execSync3 } from "child_process";
 // src/lib/firecracker/process.ts
 import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync4 } from "fs";
@@ -10916,12 +10436,21 @@ import path8 from "path";
 function parseFirecrackerCmdline(cmdline) {
   const args = cmdline.split("\0");
   if (!args[0]?.includes("firecracker")) return null;
+  let filePath;
   const sockIdx = args.indexOf("--api-sock");
-  const socketPath = args[sockIdx + 1];
-  if (sockIdx === -1 || !socketPath) return null;
-  const match = socketPath.match(/vm0-([a-f0-9]+)\/firecracker\.sock$/);
+  if (sockIdx !== -1) {
+    filePath = args[sockIdx + 1];
+  }
+  if (!filePath) {
+    const configIdx = args.indexOf("--config-file");
+    if (configIdx !== -1) {
+      filePath = args[configIdx + 1];
+    }
+  }
+  if (!filePath) return null;
+  const match = filePath.match(/vm0-([a-f0-9]+)\//);
   if (!match?.[1]) return null;
-  return { vmId: createVmId(match[1]), socketPath };
+  return createVmId(match[1]);
 }
 function parseMitmproxyCmdline(cmdline) {
   if (!cmdline.includes("mitmproxy") && !cmdline.includes("mitmdump")) {
@@ -10949,9 +10478,9 @@ function findFirecrackerProcesses() {
     if (!existsSync4(cmdlinePath)) continue;
     try {
       const cmdline = readFileSync2(cmdlinePath, "utf-8");
-      const parsed = parseFirecrackerCmdline(cmdline);
-      if (parsed) {
-        processes.push({ pid, ...parsed });
+      const vmId = parseFirecrackerCmdline(cmdline);
+      if (vmId) {
+        processes.push({ pid, vmId });
       }
     } catch {
       continue;
@@ -10964,33 +10493,25 @@ function findProcessByVmId(vmId) {
   const vmIdStr = vmIdValue(vmId);
   return processes.find((p) => vmIdValue(p.vmId) === vmIdStr) || null;
 }
-function isProcessRunning2(pid) {
-  try {
-    process.kill(pid, 0);
-    return true;
-  } catch {
-    return false;
-  }
-}
 async function killProcess(pid, timeoutMs = 5e3) {
-  if (!isProcessRunning2(pid)) return true;
+  if (!isProcessRunning(pid)) return true;
   try {
     process.kill(pid, "SIGTERM");
   } catch {
-    return !isProcessRunning2(pid);
+    return !isProcessRunning(pid);
   }
   const startTime = Date.now();
   while (Date.now() - startTime < timeoutMs) {
-    if (!isProcessRunning2(pid)) return true;
+    if (!isProcessRunning(pid)) return true;
     await new Promise((resolve) => setTimeout(resolve, 100));
   }
-  if (isProcessRunning2(pid)) {
+  if (isProcessRunning(pid)) {
     try {
       process.kill(pid, "SIGKILL");
     } catch {
     }
   }
-  return !isProcessRunning2(pid);
+  return !isProcessRunning(pid);
 }
 function findMitmproxyProcess() {
   const procDir = "/proc";
@@ -11018,15 +10539,26 @@ function findMitmproxyProcess() {
   return null;
 }
+// src/lib/runner/types.ts
+import { z as z30 } from "zod";
+var RunnerModeSchema = z30.enum(["running", "draining", "stopping", "stopped"]);
+var RunnerStatusSchema = z30.object({
+  mode: RunnerModeSchema,
+  active_runs: z30.number(),
+  active_run_ids: z30.array(z30.string()),
+  started_at: z30.string(),
+  updated_at: z30.string()
+});
 // src/commands/doctor.ts
-function displayRunnerStatus(statusFilePath) {
+function displayRunnerStatus(statusFilePath, warnings) {
   if (!existsSync5(statusFilePath)) {
     console.log("Mode: unknown (no status.json)");
     return null;
   }
   try {
-    const status = JSON.parse(
-      readFileSync3(statusFilePath, "utf-8")
+    const status = RunnerStatusSchema.parse(
+      JSON.parse(readFileSync3(statusFilePath, "utf-8"))
     );
     console.log(`Mode: ${status.mode}`);
     if (status.started_at) {
@@ -11037,10 +10569,11 @@ function displayRunnerStatus(statusFilePath) {
     return status;
   } catch {
     console.log("Mode: unknown (status.json unreadable)");
+    warnings.push({ message: "status.json exists but cannot be parsed" });
     return null;
   }
 }
-async function checkApiConnectivity(config) {
+async function checkApiConnectivity(config, warnings) {
   console.log("API Connectivity:");
   try {
     await pollForJob(config.server, config.group);
@@ -11051,6 +10584,9 @@ async function checkApiConnectivity(config) {
     console.log(
       `    Error: ${error instanceof Error ? error.message : "Unknown error"}`
     );
+    warnings.push({
+      message: `Cannot connect to API: ${error instanceof Error ? error.message : "Unknown error"}`
+    });
   }
 }
 async function checkNetwork(config, warnings) {
@@ -11086,8 +10622,7 @@ function buildJobInfo(status, processes) {
       jobs.push({
         runId,
         vmId,
-        hasProcess: !!proc,
-        pid: proc?.pid
+        firecrackerPid: proc?.pid
       });
     }
   }
@@ -11101,13 +10636,61 @@ function displayRuns(jobs, maxConcurrent) {
   }
   console.log("  Run ID                                VM ID       Status");
   for (const job of jobs) {
-    const statusText = job.hasProcess ? `\u2713 Running (PID ${job.pid})` : "\u26A0\uFE0F No process";
+    const statusText = job.firecrackerPid ? `\u2713 Running (PID ${job.firecrackerPid})` : "\u26A0\uFE0F No process";
     console.log(`  ${job.runId}  ${job.vmId}    ${statusText}`);
   }
 }
-function detectOrphanResources(jobs, processes, workspaces, statusVmIds, warnings) {
+async function findOrphanNetworkNamespaces(warnings) {
+  let allNamespaces = [];
+  try {
+    const output = execSync3("ip netns list 2>/dev/null || true", {
+      encoding: "utf-8"
+    });
+    allNamespaces = output.split("\n").map((line) => line.split(" ")[0] ?? "").filter((ns) => ns.startsWith(NS_PREFIX));
+  } catch (err) {
+    warnings.push({
+      message: `Failed to list network namespaces: ${err instanceof Error ? err.message : "Unknown error"}`
+    });
+    return [];
+  }
+  if (allNamespaces.length === 0) {
+    return [];
+  }
+  const registryPath = runtimePaths.netnsRegistry;
+  if (!existsSync5(registryPath)) {
+    return allNamespaces;
+  }
+  try {
+    return await withFileLock(registryPath, async () => {
+      const registry = RegistrySchema.parse(
+        JSON.parse(readFileSync3(registryPath, "utf-8"))
+      );
+      const aliveNamespaces = /* @__PURE__ */ new Set();
+      for (const [runnerIdx, runner] of Object.entries(registry.runners)) {
+        if (isProcessRunning(runner.pid)) {
+          for (const nsIdx of Object.keys(runner.namespaces)) {
+            aliveNamespaces.add(`${NS_PREFIX}${runnerIdx}-${nsIdx}`);
+          }
+        }
+      }
+      const orphans = [];
+      for (const ns of allNamespaces) {
+        if (!aliveNamespaces.has(ns)) {
+          orphans.push(ns);
+        }
+      }
+      return orphans;
+    });
+  } catch (err) {
+    warnings.push({
+      message: `Failed to read netns registry: ${err instanceof Error ? err.message : "Unknown error"}`
+    });
+    return [];
+  }
+}
+async function detectOrphanResources(jobs, processes, workspaces, statusVmIds, warnings) {
   for (const job of jobs) {
-    if (!job.hasProcess) {
+    if (!job.firecrackerPid) {
       warnings.push({
         message: `Run ${job.vmId} in status.json but no Firecracker process running`
       });
@@ -11121,6 +10704,12 @@ function detectOrphanResources(jobs, processes, workspaces, statusVmIds, warning
       });
     }
   }
+  const orphanNetns = await findOrphanNetworkNamespaces(warnings);
+  for (const ns of orphanNetns) {
+    warnings.push({
+      message: `Orphan network namespace: ${ns} (runner process not running)`
+    });
+  }
   for (const ws of workspaces) {
     const vmId = runnerPaths.extractVmId(ws);
     if (!processVmIds.has(vmId) && !statusVmIds.has(vmId)) {
@@ -11157,9 +10746,9 @@ var doctorCommand = new Command2("doctor").description("Diagnose runner health,
     const workspacesDir = runnerPaths.workspacesDir(config.base_dir);
     const warnings = [];
     console.log(`Runner: ${config.name}`);
-    const status = displayRunnerStatus(statusFilePath);
+    const status = displayRunnerStatus(statusFilePath, warnings);
     console.log("");
-    await checkApiConnectivity(config);
+    await checkApiConnectivity(config, warnings);
     console.log("");
     await checkNetwork(config, warnings);
     console.log("");
@@ -11168,7 +10757,13 @@ var doctorCommand = new Command2("doctor").description("Diagnose runner health,
     const { jobs, statusVmIds } = buildJobInfo(status, processes);
     displayRuns(jobs, config.sandbox.max_concurrent);
     console.log("");
-    detectOrphanResources(jobs, processes, workspaces, statusVmIds, warnings);
+    await detectOrphanResources(
+      jobs,
+      processes,
+      workspaces,
+      statusVmIds,
+      warnings
+    );
     displayWarnings(warnings);
     process.exit(warnings.length > 0 ? 1 : 0);
   } catch (error) {
@@ -11251,8 +10846,8 @@ var killCommand = new Command3("kill").description("Force terminate a run and cl
       }
       if (runId && existsSync6(statusFilePath)) {
         try {
-          const status = JSON.parse(
-            readFileSync4(statusFilePath, "utf-8")
+          const status = RunnerStatusSchema.parse(
+            JSON.parse(readFileSync4(statusFilePath, "utf-8"))
           );
           const oldCount = status.active_runs;
           status.active_run_ids = status.active_run_ids.filter(
@@ -11309,8 +10904,8 @@ function resolveRunId(input, statusFilePath) {
   }
   if (existsSync6(statusFilePath)) {
     try {
-      const status = JSON.parse(
-        readFileSync4(statusFilePath, "utf-8")
+      const status = RunnerStatusSchema.parse(
+        JSON.parse(readFileSync4(statusFilePath, "utf-8"))
       );
       const match = status.active_run_ids.find(
         (id) => id.startsWith(input)
@@ -11413,6 +11008,7 @@ var benchmarkCommand = new Command4("benchmark").description(
       }
       process.exit(1);
     }
+    initVMRegistry(runnerPaths.vmRegistry(config.base_dir));
     timer.log("Initializing pools...");
     const snapshotConfig = config.firecracker.snapshot;
     await initOverlayPool({
@@ -11658,7 +11254,7 @@ var snapshotCommand = new Command5("snapshot").description("Generate a Firecrack
 );
 // src/index.ts
-var version = true ? "3.12.0" : "0.1.0";
+var version = true ? "3.12.2" : "0.1.0";
 program.name("vm0-runner").version(version).description("Self-hosted runner for VM0 agents");
 program.addCommand(startCommand);
 program.addCommand(doctorCommand);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vm0/runner",
-  "version": "3.12.0",
+  "version": "3.12.2",
   "description": "Self-hosted runner for VM0 agents",
   "repository": {
     "type": "git",