@vm0/runner 2.8.1 → 2.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +230 -29
  2. package/package.json +2 -2
package/index.js CHANGED
@@ -27,7 +27,12 @@ var runnerConfigSchema = z.object({
27
27
  vcpu: z.number().int().min(1).default(2),
28
28
  memory_mb: z.number().int().min(128).default(2048),
29
29
  poll_interval_ms: z.number().int().min(1e3).default(5e3)
30
- }).default({}),
30
+ }).default({
31
+ max_concurrent: 1,
32
+ vcpu: 2,
33
+ memory_mb: 2048,
34
+ poll_interval_ms: 5e3
35
+ }),
31
36
  firecracker: z.object({
32
37
  binary: z.string().min(1, "Firecracker binary path is required"),
33
38
  kernel: z.string().min(1, "Kernel path is required"),
@@ -35,7 +40,9 @@ var runnerConfigSchema = z.object({
35
40
  }),
36
41
  proxy: z.object({
37
42
  port: z.number().int().min(1024).max(65535).default(8080)
38
- }).default({})
43
+ }).default({
44
+ port: 8080
45
+ })
39
46
  });
40
47
  var debugConfigSchema = z.object({
41
48
  name: z.string().default("debug-runner"),
@@ -43,13 +50,21 @@ var debugConfigSchema = z.object({
43
50
  server: z.object({
44
51
  url: z.string().url().default("http://localhost:3000"),
45
52
  token: z.string().default("debug-token")
46
- }).default({}),
53
+ }).default({
54
+ url: "http://localhost:3000",
55
+ token: "debug-token"
56
+ }),
47
57
  sandbox: z.object({
48
58
  max_concurrent: z.number().int().min(1).default(1),
49
59
  vcpu: z.number().int().min(1).default(2),
50
60
  memory_mb: z.number().int().min(128).default(2048),
51
61
  poll_interval_ms: z.number().int().min(1e3).default(5e3)
52
- }).default({}),
62
+ }).default({
63
+ max_concurrent: 1,
64
+ vcpu: 2,
65
+ memory_mb: 2048,
66
+ poll_interval_ms: 5e3
67
+ }),
53
68
  firecracker: z.object({
54
69
  binary: z.string().min(1, "Firecracker binary path is required"),
55
70
  kernel: z.string().min(1, "Kernel path is required"),
@@ -57,7 +72,9 @@ var debugConfigSchema = z.object({
57
72
  }),
58
73
  proxy: z.object({
59
74
  port: z.number().int().min(1024).max(65535).default(8080)
60
- }).default({})
75
+ }).default({
76
+ port: 8080
77
+ })
61
78
  });
62
79
  function loadDebugConfig(configPath) {
63
80
  if (!fs.existsSync(configPath)) {
@@ -67,7 +84,7 @@ function loadDebugConfig(configPath) {
67
84
  const raw = yaml.parse(content);
68
85
  const result = debugConfigSchema.safeParse(raw);
69
86
  if (!result.success) {
70
- const errors = result.error.errors.map((e) => ` - ${e.path.join(".")}: ${e.message}`).join("\n");
87
+ const errors = result.error.issues.map((e) => ` - ${e.path.join(".")}: ${e.message}`).join("\n");
71
88
  throw new Error(`Invalid configuration:
72
89
  ${errors}`);
73
90
  }
@@ -81,7 +98,7 @@ function loadConfig(configPath) {
81
98
  const raw = yaml.parse(content);
82
99
  const result = runnerConfigSchema.safeParse(raw);
83
100
  if (!result.success) {
84
- const errors = result.error.errors.map((e) => ` - ${e.path.join(".")}: ${e.message}`).join("\n");
101
+ const errors = result.error.issues.map((e) => ` - ${e.path.join(".")}: ${e.message}`).join("\n");
85
102
  throw new Error(`Invalid configuration:
86
103
  ${errors}`);
87
104
  }
@@ -5897,6 +5914,13 @@ var metricDataSchema = z8.object({
5897
5914
  disk_used: z8.number(),
5898
5915
  disk_total: z8.number()
5899
5916
  });
5917
+ var sandboxOperationSchema = z8.object({
5918
+ ts: z8.string(),
5919
+ action_type: z8.string(),
5920
+ duration_ms: z8.number(),
5921
+ success: z8.boolean(),
5922
+ error: z8.string().optional()
5923
+ });
5900
5924
  var networkLogSchema = z8.object({
5901
5925
  timestamp: z8.string(),
5902
5926
  // Common fields (all modes)
@@ -5916,7 +5940,7 @@ var networkLogSchema = z8.object({
5916
5940
  var webhookTelemetryContract = c5.router({
5917
5941
  /**
5918
5942
  * POST /api/webhooks/agent/telemetry
5919
- * Receive telemetry data (system log, metrics, and network logs) from sandbox
5943
+ * Receive telemetry data (system log, metrics, network logs, and sandbox operations) from sandbox
5920
5944
  */
5921
5945
  send: {
5922
5946
  method: "POST",
@@ -5925,7 +5949,8 @@ var webhookTelemetryContract = c5.router({
5925
5949
  runId: z8.string().min(1, "runId is required"),
5926
5950
  systemLog: z8.string().optional(),
5927
5951
  metrics: z8.array(metricDataSchema).optional(),
5928
- networkLogs: z8.array(networkLogSchema).optional()
5952
+ networkLogs: z8.array(networkLogSchema).optional(),
5953
+ sandboxOperations: z8.array(sandboxOperationSchema).optional()
5929
5954
  }),
5930
5955
  responses: {
5931
5956
  200: z8.object({
@@ -7124,6 +7149,10 @@ NETWORK_LOG_FILE = f"/tmp/vm0-network-{RUN_ID}.jsonl"
7124
7149
  TELEMETRY_LOG_POS_FILE = f"/tmp/vm0-telemetry-log-pos-{RUN_ID}.txt"
7125
7150
  TELEMETRY_METRICS_POS_FILE = f"/tmp/vm0-telemetry-metrics-pos-{RUN_ID}.txt"
7126
7151
  TELEMETRY_NETWORK_POS_FILE = f"/tmp/vm0-telemetry-network-pos-{RUN_ID}.txt"
7152
+ TELEMETRY_SANDBOX_OPS_POS_FILE = f"/tmp/vm0-telemetry-sandbox-ops-pos-{RUN_ID}.txt"
7153
+
7154
+ # Sandbox operations log file (JSONL format)
7155
+ SANDBOX_OPS_LOG_FILE = f"/tmp/vm0-sandbox-ops-{RUN_ID}.jsonl"
7127
7156
 
7128
7157
  # Metrics collection configuration
7129
7158
  METRICS_INTERVAL = 5 # seconds
@@ -7137,6 +7166,36 @@ def validate_config() -> bool:
7137
7166
  if not WORKING_DIR:
7138
7167
  raise ValueError("VM0_WORKING_DIR is required but not set")
7139
7168
  return True
7169
+
7170
+ def record_sandbox_op(
7171
+ action_type: str,
7172
+ duration_ms: int,
7173
+ success: bool,
7174
+ error: str = None
7175
+ ) -> None:
7176
+ """
7177
+ Record a sandbox operation to JSONL file for telemetry upload.
7178
+
7179
+ Args:
7180
+ action_type: Operation name (e.g., "init_total", "storage_download", "cli_execution")
7181
+ duration_ms: Duration in milliseconds
7182
+ success: Whether the operation succeeded
7183
+ error: Optional error message if failed
7184
+ """
7185
+ from datetime import datetime, timezone
7186
+ import json
7187
+
7188
+ entry = {
7189
+ "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z",
7190
+ "action_type": action_type,
7191
+ "duration_ms": duration_ms,
7192
+ "success": success,
7193
+ }
7194
+ if error:
7195
+ entry["error"] = error
7196
+
7197
+ with open(SANDBOX_OPS_LOG_FILE, "a") as f:
7198
+ f.write(json.dumps(entry) + "\\n")
7140
7199
  `;
7141
7200
 
7142
7201
  // ../../packages/core/src/sandbox/scripts/lib/log.py.ts
@@ -7575,10 +7634,11 @@ import hashlib
7575
7634
  import tarfile
7576
7635
  import tempfile
7577
7636
  import shutil
7637
+ import time
7578
7638
  from typing import Optional, Dict, Any, List
7579
7639
  from datetime import datetime
7580
7640
 
7581
- from common import RUN_ID, STORAGE_PREPARE_URL, STORAGE_COMMIT_URL
7641
+ from common import RUN_ID, STORAGE_PREPARE_URL, STORAGE_COMMIT_URL, record_sandbox_op
7582
7642
  from log import log_info, log_warn, log_error, log_debug
7583
7643
  from http_client import http_post_json, http_put_presigned
7584
7644
 
@@ -7719,7 +7779,9 @@ def create_direct_upload_snapshot(
7719
7779
 
7720
7780
  # Step 1: Collect file metadata
7721
7781
  log_info("Computing file hashes...")
7782
+ hash_start = time.time()
7722
7783
  files = collect_file_metadata(mount_path)
7784
+ record_sandbox_op("artifact_hash_compute", int((time.time() - hash_start) * 1000), True)
7723
7785
  log_info(f"Found {len(files)} files")
7724
7786
 
7725
7787
  if not files:
@@ -7727,6 +7789,7 @@ def create_direct_upload_snapshot(
7727
7789
 
7728
7790
  # Step 2: Call prepare endpoint
7729
7791
  log_info("Calling prepare endpoint...")
7792
+ prepare_start = time.time()
7730
7793
  prepare_payload = {
7731
7794
  "storageName": storage_name,
7732
7795
  "storageType": storage_type,
@@ -7738,12 +7801,15 @@ def create_direct_upload_snapshot(
7738
7801
  prepare_response = http_post_json(STORAGE_PREPARE_URL, prepare_payload)
7739
7802
  if not prepare_response:
7740
7803
  log_error("Failed to call prepare endpoint")
7804
+ record_sandbox_op("artifact_prepare_api", int((time.time() - prepare_start) * 1000), False)
7741
7805
  return None
7742
7806
 
7743
7807
  version_id = prepare_response.get("versionId")
7744
7808
  if not version_id:
7745
7809
  log_error(f"Invalid prepare response: {prepare_response}")
7810
+ record_sandbox_op("artifact_prepare_api", int((time.time() - prepare_start) * 1000), False)
7746
7811
  return None
7812
+ record_sandbox_op("artifact_prepare_api", int((time.time() - prepare_start) * 1000), True)
7747
7813
 
7748
7814
  # Step 3: Check if version already exists (deduplication)
7749
7815
  # Still call commit to update HEAD pointer (fixes #649)
@@ -7786,10 +7852,13 @@ def create_direct_upload_snapshot(
7786
7852
  try:
7787
7853
  # Create archive
7788
7854
  log_info("Creating archive...")
7855
+ archive_start = time.time()
7789
7856
  archive_path = os.path.join(temp_dir, "archive.tar.gz")
7790
7857
  if not create_archive(mount_path, archive_path):
7791
7858
  log_error("Failed to create archive")
7859
+ record_sandbox_op("artifact_archive_create", int((time.time() - archive_start) * 1000), False)
7792
7860
  return None
7861
+ record_sandbox_op("artifact_archive_create", int((time.time() - archive_start) * 1000), True)
7793
7862
 
7794
7863
  # Create manifest
7795
7864
  log_info("Creating manifest...")
@@ -7800,12 +7869,14 @@ def create_direct_upload_snapshot(
7800
7869
 
7801
7870
  # Upload archive to S3
7802
7871
  log_info("Uploading archive to S3...")
7872
+ s3_upload_start = time.time()
7803
7873
  if not http_put_presigned(
7804
7874
  archive_info["presignedUrl"],
7805
7875
  archive_path,
7806
7876
  "application/gzip"
7807
7877
  ):
7808
7878
  log_error("Failed to upload archive to S3")
7879
+ record_sandbox_op("artifact_s3_upload", int((time.time() - s3_upload_start) * 1000), False)
7809
7880
  return None
7810
7881
 
7811
7882
  # Upload manifest to S3
@@ -7816,10 +7887,13 @@ def create_direct_upload_snapshot(
7816
7887
  "application/json"
7817
7888
  ):
7818
7889
  log_error("Failed to upload manifest to S3")
7890
+ record_sandbox_op("artifact_s3_upload", int((time.time() - s3_upload_start) * 1000), False)
7819
7891
  return None
7892
+ record_sandbox_op("artifact_s3_upload", int((time.time() - s3_upload_start) * 1000), True)
7820
7893
 
7821
7894
  # Step 6: Call commit endpoint
7822
7895
  log_info("Calling commit endpoint...")
7896
+ commit_start = time.time()
7823
7897
  commit_payload = {
7824
7898
  "storageName": storage_name,
7825
7899
  "storageType": storage_type,
@@ -7834,11 +7908,14 @@ def create_direct_upload_snapshot(
7834
7908
  commit_response = http_post_json(STORAGE_COMMIT_URL, commit_payload)
7835
7909
  if not commit_response:
7836
7910
  log_error("Failed to call commit endpoint")
7911
+ record_sandbox_op("artifact_commit_api", int((time.time() - commit_start) * 1000), False)
7837
7912
  return None
7838
7913
 
7839
7914
  if not commit_response.get("success"):
7840
7915
  log_error(f"Commit failed: {commit_response}")
7916
+ record_sandbox_op("artifact_commit_api", int((time.time() - commit_start) * 1000), False)
7841
7917
  return None
7918
+ record_sandbox_op("artifact_commit_api", int((time.time() - commit_start) * 1000), True)
7842
7919
 
7843
7920
  log_info(f"Direct upload snapshot created: {version_id[:8]}")
7844
7921
  return {"versionId": version_id}
@@ -7861,11 +7938,12 @@ import sys
7861
7938
  import json
7862
7939
  import tarfile
7863
7940
  import tempfile
7941
+ import time
7864
7942
 
7865
7943
  # Add lib to path for imports
7866
7944
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
7867
7945
 
7868
- from common import validate_config
7946
+ from common import validate_config, record_sandbox_op
7869
7947
  from log import log_info, log_error
7870
7948
  from http_client import http_download
7871
7949
 
@@ -7945,13 +8023,21 @@ def main():
7945
8023
 
7946
8024
  log_info(f"Found {storage_count} storages, artifact: {has_artifact}")
7947
8025
 
8026
+ # Track total download time
8027
+ download_total_start = time.time()
8028
+ download_success = True
8029
+
7948
8030
  # Process storages
7949
8031
  for storage in storages:
7950
8032
  mount_path = storage.get("mountPath")
7951
8033
  archive_url = storage.get("archiveUrl")
7952
8034
 
7953
8035
  if archive_url and archive_url != "null":
7954
- download_storage(mount_path, archive_url)
8036
+ storage_start = time.time()
8037
+ success = download_storage(mount_path, archive_url)
8038
+ record_sandbox_op("storage_download", int((time.time() - storage_start) * 1000), success)
8039
+ if not success:
8040
+ download_success = False
7955
8041
 
7956
8042
  # Process artifact
7957
8043
  if artifact:
@@ -7959,8 +8045,14 @@ def main():
7959
8045
  artifact_url = artifact.get("archiveUrl")
7960
8046
 
7961
8047
  if artifact_url and artifact_url != "null":
7962
- download_storage(artifact_mount, artifact_url)
7963
-
8048
+ artifact_start = time.time()
8049
+ success = download_storage(artifact_mount, artifact_url)
8050
+ record_sandbox_op("artifact_download", int((time.time() - artifact_start) * 1000), success)
8051
+ if not success:
8052
+ download_success = False
8053
+
8054
+ # Record total download time
8055
+ record_sandbox_op("download_total", int((time.time() - download_total_start) * 1000), download_success)
7964
8056
  log_info("All storages downloaded successfully")
7965
8057
 
7966
8058
 
@@ -7977,12 +8069,14 @@ Uses direct S3 upload exclusively (no fallback to legacy methods).
7977
8069
  """
7978
8070
  import os
7979
8071
  import glob
8072
+ import time
7980
8073
  from typing import Optional, Dict, Any
7981
8074
 
7982
8075
  from common import (
7983
8076
  RUN_ID, CHECKPOINT_URL,
7984
8077
  SESSION_ID_FILE, SESSION_HISTORY_PATH_FILE,
7985
- ARTIFACT_DRIVER, ARTIFACT_MOUNT_PATH, ARTIFACT_VOLUME_NAME
8078
+ ARTIFACT_DRIVER, ARTIFACT_MOUNT_PATH, ARTIFACT_VOLUME_NAME,
8079
+ record_sandbox_op
7986
8080
  )
7987
8081
  from log import log_info, log_error
7988
8082
  from http_client import http_post_json
@@ -8038,19 +8132,27 @@ def create_checkpoint() -> bool:
8038
8132
  Returns:
8039
8133
  True on success, False on failure
8040
8134
  """
8135
+ checkpoint_start = time.time()
8041
8136
  log_info("Creating checkpoint...")
8042
8137
 
8043
8138
  # Read session ID from temp file
8139
+ session_id_start = time.time()
8044
8140
  if not os.path.exists(SESSION_ID_FILE):
8045
8141
  log_error("No session ID found, checkpoint creation failed")
8142
+ record_sandbox_op("session_id_read", int((time.time() - session_id_start) * 1000), False, "Session ID file not found")
8143
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8046
8144
  return False
8047
8145
 
8048
8146
  with open(SESSION_ID_FILE) as f:
8049
8147
  cli_agent_session_id = f.read().strip()
8148
+ record_sandbox_op("session_id_read", int((time.time() - session_id_start) * 1000), True)
8050
8149
 
8051
8150
  # Read session history path from temp file
8151
+ session_history_start = time.time()
8052
8152
  if not os.path.exists(SESSION_HISTORY_PATH_FILE):
8053
8153
  log_error("No session history path found, checkpoint creation failed")
8154
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Session history path file not found")
8155
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8054
8156
  return False
8055
8157
 
8056
8158
  with open(SESSION_HISTORY_PATH_FILE) as f:
@@ -8061,6 +8163,8 @@ def create_checkpoint() -> bool:
8061
8163
  parts = session_history_path_raw.split(":", 2)
8062
8164
  if len(parts) != 3:
8063
8165
  log_error(f"Invalid Codex search marker format: {session_history_path_raw}")
8166
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Invalid Codex search marker")
8167
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8064
8168
  return False
8065
8169
  sessions_dir = parts[1]
8066
8170
  codex_session_id = parts[2]
@@ -8068,6 +8172,8 @@ def create_checkpoint() -> bool:
8068
8172
  session_history_path = find_codex_session_file(sessions_dir, codex_session_id)
8069
8173
  if not session_history_path:
8070
8174
  log_error(f"Could not find Codex session file for {codex_session_id} in {sessions_dir}")
8175
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Codex session file not found")
8176
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8071
8177
  return False
8072
8178
  else:
8073
8179
  session_history_path = session_history_path_raw
@@ -8075,6 +8181,8 @@ def create_checkpoint() -> bool:
8075
8181
  # Check if session history file exists
8076
8182
  if not os.path.exists(session_history_path):
8077
8183
  log_error(f"Session history file not found at {session_history_path}, checkpoint creation failed")
8184
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Session history file not found")
8185
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8078
8186
  return False
8079
8187
 
8080
8188
  # Read session history
@@ -8083,14 +8191,19 @@ def create_checkpoint() -> bool:
8083
8191
  cli_agent_session_history = f.read()
8084
8192
  except IOError as e:
8085
8193
  log_error(f"Failed to read session history: {e}")
8194
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, str(e))
8195
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8086
8196
  return False
8087
8197
 
8088
8198
  if not cli_agent_session_history.strip():
8089
8199
  log_error("Session history is empty, checkpoint creation failed")
8200
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Session history empty")
8201
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8090
8202
  return False
8091
8203
 
8092
8204
  line_count = len(cli_agent_session_history.strip().split("\\n"))
8093
8205
  log_info(f"Session history loaded ({line_count} lines)")
8206
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), True)
8094
8207
 
8095
8208
  # CLI agent type (default to claude-code)
8096
8209
  cli_agent_type = os.environ.get("CLI_AGENT_TYPE", "claude-code")
@@ -8104,6 +8217,7 @@ def create_checkpoint() -> bool:
8104
8217
 
8105
8218
  if ARTIFACT_DRIVER != "vas":
8106
8219
  log_error(f"Unknown artifact driver: {ARTIFACT_DRIVER} (only 'vas' is supported)")
8220
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8107
8221
  return False
8108
8222
 
8109
8223
  # VAS artifact: create snapshot using direct S3 upload (bypasses Vercel 4.5MB limit)
@@ -8120,12 +8234,14 @@ def create_checkpoint() -> bool:
8120
8234
 
8121
8235
  if not snapshot:
8122
8236
  log_error("Failed to create VAS snapshot for artifact")
8237
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8123
8238
  return False
8124
8239
 
8125
8240
  # Extract versionId from snapshot response
8126
8241
  artifact_version = snapshot.get("versionId")
8127
8242
  if not artifact_version:
8128
8243
  log_error("Failed to extract versionId from snapshot")
8244
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8129
8245
  return False
8130
8246
 
8131
8247
  # Build artifact snapshot JSON with new format (artifactName + artifactVersion)
@@ -8153,6 +8269,7 @@ def create_checkpoint() -> bool:
8153
8269
  checkpoint_payload["artifactSnapshot"] = artifact_snapshot
8154
8270
 
8155
8271
  # Call checkpoint API
8272
+ api_call_start = time.time()
8156
8273
  result = http_post_json(CHECKPOINT_URL, checkpoint_payload)
8157
8274
 
8158
8275
  # Validate response contains checkpointId to confirm checkpoint was actually created
@@ -8160,9 +8277,13 @@ def create_checkpoint() -> bool:
8160
8277
  if result and result.get("checkpointId"):
8161
8278
  checkpoint_id = result.get("checkpointId")
8162
8279
  log_info(f"Checkpoint created successfully: {checkpoint_id}")
8280
+ record_sandbox_op("checkpoint_api_call", int((time.time() - api_call_start) * 1000), True)
8281
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), True)
8163
8282
  return True
8164
8283
  else:
8165
8284
  log_error(f"Checkpoint API returned invalid response: {result}")
8285
+ record_sandbox_op("checkpoint_api_call", int((time.time() - api_call_start) * 1000), False, "Invalid API response")
8286
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8166
8287
  return False
8167
8288
  `;
8168
8289
 
@@ -8556,8 +8677,9 @@ from typing import List, Dict, Any
8556
8677
 
8557
8678
  from common import (
8558
8679
  RUN_ID, TELEMETRY_URL, TELEMETRY_INTERVAL,
8559
- SYSTEM_LOG_FILE, METRICS_LOG_FILE, NETWORK_LOG_FILE,
8560
- TELEMETRY_LOG_POS_FILE, TELEMETRY_METRICS_POS_FILE, TELEMETRY_NETWORK_POS_FILE
8680
+ SYSTEM_LOG_FILE, METRICS_LOG_FILE, NETWORK_LOG_FILE, SANDBOX_OPS_LOG_FILE,
8681
+ TELEMETRY_LOG_POS_FILE, TELEMETRY_METRICS_POS_FILE, TELEMETRY_NETWORK_POS_FILE,
8682
+ TELEMETRY_SANDBOX_OPS_POS_FILE
8561
8683
  )
8562
8684
  from log import log_info, log_error, log_debug, log_warn
8563
8685
  from http_client import http_post_json
@@ -8660,6 +8782,19 @@ def read_network_logs_from_position(pos_file: str) -> tuple[List[Dict[str, Any]]
8660
8782
  return read_jsonl_from_position(NETWORK_LOG_FILE, pos_file)
8661
8783
 
8662
8784
 
8785
+ def read_sandbox_ops_from_position(pos_file: str) -> tuple[List[Dict[str, Any]], int]:
8786
+ """
8787
+ Read new sandbox operations from JSONL file starting from last position.
8788
+
8789
+ Args:
8790
+ pos_file: Path to position tracking file
8791
+
8792
+ Returns:
8793
+ Tuple of (sandbox operations list, new_position)
8794
+ """
8795
+ return read_jsonl_from_position(SANDBOX_OPS_LOG_FILE, pos_file)
8796
+
8797
+
8663
8798
  def upload_telemetry() -> bool:
8664
8799
  """
8665
8800
  Upload telemetry data to VM0 API.
@@ -8676,8 +8811,11 @@ def upload_telemetry() -> bool:
8676
8811
  # Read new network logs
8677
8812
  network_logs, network_pos = read_network_logs_from_position(TELEMETRY_NETWORK_POS_FILE)
8678
8813
 
8814
+ # Read new sandbox operations
8815
+ sandbox_ops, sandbox_ops_pos = read_sandbox_ops_from_position(TELEMETRY_SANDBOX_OPS_POS_FILE)
8816
+
8679
8817
  # Skip if nothing new
8680
- if not system_log and not metrics and not network_logs:
8818
+ if not system_log and not metrics and not network_logs and not sandbox_ops:
8681
8819
  log_debug("No new telemetry data to upload")
8682
8820
  return True
8683
8821
 
@@ -8691,10 +8829,11 @@ def upload_telemetry() -> bool:
8691
8829
  "runId": RUN_ID,
8692
8830
  "systemLog": masked_system_log,
8693
8831
  "metrics": metrics, # Metrics don't contain secrets (just numbers)
8694
- "networkLogs": masked_network_logs
8832
+ "networkLogs": masked_network_logs,
8833
+ "sandboxOperations": sandbox_ops # Sandbox ops don't contain secrets (just timing data)
8695
8834
  }
8696
8835
 
8697
- log_debug(f"Uploading telemetry: {len(system_log)} bytes log, {len(metrics)} metrics, {len(network_logs)} network logs")
8836
+ log_debug(f"Uploading telemetry: {len(system_log)} bytes log, {len(metrics)} metrics, {len(network_logs)} network logs, {len(sandbox_ops)} sandbox ops")
8698
8837
 
8699
8838
  result = http_post_json(TELEMETRY_URL, payload, max_retries=1)
8700
8839
 
@@ -8703,6 +8842,7 @@ def upload_telemetry() -> bool:
8703
8842
  save_position(TELEMETRY_LOG_POS_FILE, log_pos)
8704
8843
  save_position(TELEMETRY_METRICS_POS_FILE, metrics_pos)
8705
8844
  save_position(TELEMETRY_NETWORK_POS_FILE, network_pos)
8845
+ save_position(TELEMETRY_SANDBOX_OPS_POS_FILE, sandbox_ops_pos)
8706
8846
  log_debug(f"Telemetry uploaded successfully: {result.get('id', 'unknown')}")
8707
8847
  return True
8708
8848
  else:
@@ -8940,7 +9080,7 @@ sys.path.insert(0, "/usr/local/bin/vm0-agent/lib")
8940
9080
  from common import (
8941
9081
  WORKING_DIR, PROMPT, RESUME_SESSION_ID, COMPLETE_URL, RUN_ID,
8942
9082
  EVENT_ERROR_FLAG, HEARTBEAT_URL, HEARTBEAT_INTERVAL, AGENT_LOG_FILE,
8943
- CLI_AGENT_TYPE, OPENAI_MODEL, validate_config
9083
+ CLI_AGENT_TYPE, OPENAI_MODEL, validate_config, record_sandbox_op
8944
9084
  )
8945
9085
  from log import log_info, log_error, log_warn
8946
9086
  from events import send_event
@@ -8976,10 +9116,14 @@ def _cleanup(exit_code: int, error_message: str):
8976
9116
 
8977
9117
  # Perform final telemetry upload before completion
8978
9118
  # This ensures all remaining data is captured
9119
+ telemetry_start = time.time()
9120
+ telemetry_success = True
8979
9121
  try:
8980
9122
  final_telemetry_upload()
8981
9123
  except Exception as e:
9124
+ telemetry_success = False
8982
9125
  log_error(f"Final telemetry upload failed: {e}")
9126
+ record_sandbox_op("final_telemetry_upload", int((time.time() - telemetry_start) * 1000), telemetry_success)
8983
9127
 
8984
9128
  # Always call complete API at the end
8985
9129
  # This sends vm0_result (on success) or vm0_error (on failure) and kills the sandbox
@@ -8992,13 +9136,17 @@ def _cleanup(exit_code: int, error_message: str):
8992
9136
  if error_message:
8993
9137
  complete_payload["error"] = error_message
8994
9138
 
9139
+ complete_start = time.time()
9140
+ complete_success = False
8995
9141
  try:
8996
9142
  if http_post_json(COMPLETE_URL, complete_payload):
8997
9143
  log_info("Complete API called successfully")
9144
+ complete_success = True
8998
9145
  else:
8999
9146
  log_error("Failed to call complete API (sandbox may not be cleaned up)")
9000
9147
  except Exception as e:
9001
9148
  log_error(f"Complete API call failed: {e}")
9149
+ record_sandbox_op("complete_api_call", int((time.time() - complete_start) * 1000), complete_success)
9002
9150
 
9003
9151
  # Stop heartbeat thread
9004
9152
  shutdown_event.set()
@@ -9030,25 +9178,36 @@ def _run() -> tuple[int, str]:
9030
9178
  log_info(f"Working directory: {WORKING_DIR}")
9031
9179
 
9032
9180
  # Start heartbeat thread
9181
+ heartbeat_start = time.time()
9033
9182
  heartbeat_thread = threading.Thread(target=heartbeat_loop, daemon=True)
9034
9183
  heartbeat_thread.start()
9035
9184
  log_info("Heartbeat thread started")
9185
+ record_sandbox_op("heartbeat_start", int((time.time() - heartbeat_start) * 1000), True)
9036
9186
 
9037
9187
  # Start metrics collector thread
9188
+ metrics_start = time.time()
9038
9189
  start_metrics_collector(shutdown_event)
9039
9190
  log_info("Metrics collector thread started")
9191
+ record_sandbox_op("metrics_collector_start", int((time.time() - metrics_start) * 1000), True)
9040
9192
 
9041
9193
  # Start telemetry upload thread
9194
+ telemetry_start = time.time()
9042
9195
  start_telemetry_upload(shutdown_event)
9043
9196
  log_info("Telemetry upload thread started")
9197
+ record_sandbox_op("telemetry_upload_start", int((time.time() - telemetry_start) * 1000), True)
9044
9198
 
9045
9199
  # Create and change to working directory - raises RuntimeError if fails
9046
9200
  # Directory may not exist if no artifact/storage was downloaded (e.g., first run)
9201
+ working_dir_start = time.time()
9202
+ working_dir_success = True
9047
9203
  try:
9048
9204
  os.makedirs(WORKING_DIR, exist_ok=True)
9049
9205
  os.chdir(WORKING_DIR)
9050
9206
  except OSError as e:
9207
+ working_dir_success = False
9208
+ record_sandbox_op("working_dir_setup", int((time.time() - working_dir_start) * 1000), False, str(e))
9051
9209
  raise RuntimeError(f"Failed to create/change to working directory: {WORKING_DIR} - {e}") from e
9210
+ record_sandbox_op("working_dir_setup", int((time.time() - working_dir_start) * 1000), working_dir_success)
9052
9211
 
9053
9212
  # Set up Codex configuration if using Codex CLI
9054
9213
  # Claude Code uses ~/.claude by default (no configuration needed)
@@ -9061,6 +9220,8 @@ def _run() -> tuple[int, str]:
9061
9220
  log_info(f"Codex home directory: {codex_home}")
9062
9221
 
9063
9222
  # Login with API key via stdin (recommended method)
9223
+ codex_login_start = time.time()
9224
+ codex_login_success = False
9064
9225
  api_key = os.environ.get("OPENAI_API_KEY", "")
9065
9226
  if api_key:
9066
9227
  result = subprocess.run(
@@ -9071,13 +9232,16 @@ def _run() -> tuple[int, str]:
9071
9232
  )
9072
9233
  if result.returncode == 0:
9073
9234
  log_info("Codex authenticated with API key")
9235
+ codex_login_success = True
9074
9236
  else:
9075
9237
  log_error(f"Codex login failed: {result.stderr}")
9076
9238
  else:
9077
9239
  log_error("OPENAI_API_KEY not set")
9240
+ record_sandbox_op("codex_login", int((time.time() - codex_login_start) * 1000), codex_login_success)
9078
9241
 
9079
- init_duration = int(time.time() - init_start_time)
9080
- log_info(f"\u2713 Initialization complete ({init_duration}s)")
9242
+ init_duration_ms = int((time.time() - init_start_time) * 1000)
9243
+ record_sandbox_op("init_total", init_duration_ms, True)
9244
+ log_info(f"\u2713 Initialization complete ({init_duration_ms // 1000}s)")
9081
9245
 
9082
9246
  # Lifecycle: Execution
9083
9247
  log_info("\u25B7 Execution")
@@ -9241,12 +9405,13 @@ def _run() -> tuple[int, str]:
9241
9405
  final_exit_code = 1
9242
9406
  error_message = "Some events failed to send"
9243
9407
 
9244
- # Log execution result
9245
- exec_duration = int(time.time() - exec_start_time)
9408
+ # Log execution result and record metric
9409
+ exec_duration_ms = int((time.time() - exec_start_time) * 1000)
9410
+ record_sandbox_op("cli_execution", exec_duration_ms, agent_exit_code == 0)
9246
9411
  if agent_exit_code == 0 and final_exit_code == 0:
9247
- log_info(f"\u2713 Execution complete ({exec_duration}s)")
9412
+ log_info(f"\u2713 Execution complete ({exec_duration_ms // 1000}s)")
9248
9413
  else:
9249
- log_info(f"\u2717 Execution failed ({exec_duration}s)")
9414
+ log_info(f"\u2717 Execution failed ({exec_duration_ms // 1000}s)")
9250
9415
 
9251
9416
  # Handle completion
9252
9417
  if agent_exit_code == 0 and final_exit_code == 0:
@@ -10230,7 +10395,7 @@ function initMetrics(config) {
10230
10395
  url: "https://api.axiom.co/v1/metrics",
10231
10396
  headers: {
10232
10397
  Authorization: `Bearer ${config.axiomToken}`,
10233
- "X-Axiom-Dataset": `runner-metrics-${env}`
10398
+ "X-Axiom-Dataset": `vm0-sandbox-op-log-${env}`
10234
10399
  }
10235
10400
  });
10236
10401
  meterProvider = new MeterProvider({
@@ -10700,6 +10865,42 @@ async function executeJob(context, config, options = {}) {
10700
10865
  completed = true;
10701
10866
  break;
10702
10867
  }
10868
+ if (!options.benchmarkMode) {
10869
+ const processCheck = await ssh.exec(
10870
+ `pgrep -f "env-loader.py" > /dev/null 2>&1 && echo "RUNNING" || echo "DEAD"`
10871
+ );
10872
+ if (processCheck.stdout.trim() === "DEAD") {
10873
+ log(
10874
+ `[Executor] Agent process died unexpectedly without writing exit code`
10875
+ );
10876
+ const logContent = await ssh.exec(
10877
+ `tail -50 ${systemLogFile} 2>/dev/null`
10878
+ );
10879
+ const dmesgCheck = await ssh.exec(
10880
+ `dmesg | tail -20 | grep -iE "killed|oom" 2>/dev/null`
10881
+ );
10882
+ let errorMsg = "Agent process terminated unexpectedly";
10883
+ if (dmesgCheck.stdout.toLowerCase().includes("oom") || dmesgCheck.stdout.toLowerCase().includes("killed")) {
10884
+ errorMsg = "Agent process killed by OOM killer";
10885
+ log(`[Executor] OOM detected: ${dmesgCheck.stdout}`);
10886
+ }
10887
+ if (logContent.stdout) {
10888
+ log(
10889
+ `[Executor] Last log output: ${logContent.stdout.substring(0, 500)}`
10890
+ );
10891
+ }
10892
+ const durationMs2 = Date.now() - startTime;
10893
+ recordRunnerOperation({
10894
+ actionType: "agent_execute",
10895
+ durationMs: durationMs2,
10896
+ success: false
10897
+ });
10898
+ return {
10899
+ exitCode: 1,
10900
+ error: errorMsg
10901
+ };
10902
+ }
10903
+ }
10703
10904
  }
10704
10905
  const durationMs = Date.now() - startTime;
10705
10906
  const duration = Math.round(durationMs / 1e3);
@@ -11099,7 +11300,7 @@ var benchmarkCommand = new Command3("benchmark").description(
11099
11300
  });
11100
11301
 
11101
11302
  // src/index.ts
11102
- var version = true ? "2.8.1" : "0.1.0";
11303
+ var version = true ? "2.8.3" : "0.1.0";
11103
11304
  program.name("vm0-runner").version(version).description("Self-hosted runner for VM0 agents");
11104
11305
  program.addCommand(startCommand);
11105
11306
  program.addCommand(statusCommand);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vm0/runner",
3
- "version": "2.8.1",
3
+ "version": "2.8.3",
4
4
  "description": "Self-hosted runner for VM0 agents",
5
5
  "repository": {
6
6
  "type": "git",
@@ -22,6 +22,6 @@
22
22
  "@opentelemetry/semantic-conventions": "^1.25.0",
23
23
  "commander": "^14.0.0",
24
24
  "yaml": "^2.3.4",
25
- "zod": "^3.25.64"
25
+ "zod": "^4.1.12"
26
26
  }
27
27
  }