@vm0/runner 2.8.0 → 2.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +176 -25
  2. package/package.json +1 -1
package/index.js CHANGED
@@ -866,8 +866,10 @@ var SSHClient = class {
866
866
  }
867
867
  /**
868
868
  * Execute a command on the remote VM
869
+ * @param command - The command to execute
870
+ * @param timeoutMs - Optional timeout in milliseconds (default: 300000ms = 5 minutes)
869
871
  */
870
- async exec(command) {
872
+ async exec(command, timeoutMs) {
871
873
  const sshCmd = this.buildSSHCommand();
872
874
  const escapedCommand = command.replace(/'/g, "'\\''");
873
875
  const fullCmd = [...sshCmd, `'${escapedCommand}'`].join(" ");
@@ -875,8 +877,8 @@ var SSHClient = class {
875
877
  const { stdout, stderr } = await execAsync2(fullCmd, {
876
878
  maxBuffer: 50 * 1024 * 1024,
877
879
  // 50MB buffer
878
- timeout: 3e5
879
- // 5 minute timeout
880
+ timeout: timeoutMs ?? 3e5
881
+ // Default 5 minutes, customizable per call
880
882
  });
881
883
  return {
882
884
  exitCode: 0,
@@ -958,10 +960,11 @@ var SSHClient = class {
958
960
  }
959
961
  /**
960
962
  * Check if SSH connection is available
963
+ * Uses a short timeout (15s) to ensure waitUntilReachable() respects its outer timeout
961
964
  */
962
965
  async isReachable() {
963
966
  try {
964
- const result = await this.exec("echo ok");
967
+ const result = await this.exec("echo ok", 15e3);
965
968
  return result.exitCode === 0 && result.stdout.trim() === "ok";
966
969
  } catch {
967
970
  return false;
@@ -5894,6 +5897,13 @@ var metricDataSchema = z8.object({
5894
5897
  disk_used: z8.number(),
5895
5898
  disk_total: z8.number()
5896
5899
  });
5900
+ var sandboxOperationSchema = z8.object({
5901
+ ts: z8.string(),
5902
+ action_type: z8.string(),
5903
+ duration_ms: z8.number(),
5904
+ success: z8.boolean(),
5905
+ error: z8.string().optional()
5906
+ });
5897
5907
  var networkLogSchema = z8.object({
5898
5908
  timestamp: z8.string(),
5899
5909
  // Common fields (all modes)
@@ -5913,7 +5923,7 @@ var networkLogSchema = z8.object({
5913
5923
  var webhookTelemetryContract = c5.router({
5914
5924
  /**
5915
5925
  * POST /api/webhooks/agent/telemetry
5916
- * Receive telemetry data (system log, metrics, and network logs) from sandbox
5926
+ * Receive telemetry data (system log, metrics, network logs, and sandbox operations) from sandbox
5917
5927
  */
5918
5928
  send: {
5919
5929
  method: "POST",
@@ -5922,7 +5932,8 @@ var webhookTelemetryContract = c5.router({
5922
5932
  runId: z8.string().min(1, "runId is required"),
5923
5933
  systemLog: z8.string().optional(),
5924
5934
  metrics: z8.array(metricDataSchema).optional(),
5925
- networkLogs: z8.array(networkLogSchema).optional()
5935
+ networkLogs: z8.array(networkLogSchema).optional(),
5936
+ sandboxOperations: z8.array(sandboxOperationSchema).optional()
5926
5937
  }),
5927
5938
  responses: {
5928
5939
  200: z8.object({
@@ -7121,6 +7132,10 @@ NETWORK_LOG_FILE = f"/tmp/vm0-network-{RUN_ID}.jsonl"
7121
7132
  TELEMETRY_LOG_POS_FILE = f"/tmp/vm0-telemetry-log-pos-{RUN_ID}.txt"
7122
7133
  TELEMETRY_METRICS_POS_FILE = f"/tmp/vm0-telemetry-metrics-pos-{RUN_ID}.txt"
7123
7134
  TELEMETRY_NETWORK_POS_FILE = f"/tmp/vm0-telemetry-network-pos-{RUN_ID}.txt"
7135
+ TELEMETRY_SANDBOX_OPS_POS_FILE = f"/tmp/vm0-telemetry-sandbox-ops-pos-{RUN_ID}.txt"
7136
+
7137
+ # Sandbox operations log file (JSONL format)
7138
+ SANDBOX_OPS_LOG_FILE = f"/tmp/vm0-sandbox-ops-{RUN_ID}.jsonl"
7124
7139
 
7125
7140
  # Metrics collection configuration
7126
7141
  METRICS_INTERVAL = 5 # seconds
@@ -7134,6 +7149,36 @@ def validate_config() -> bool:
7134
7149
  if not WORKING_DIR:
7135
7150
  raise ValueError("VM0_WORKING_DIR is required but not set")
7136
7151
  return True
7152
+
7153
+ def record_sandbox_op(
7154
+ action_type: str,
7155
+ duration_ms: int,
7156
+ success: bool,
7157
+ error: str = None
7158
+ ) -> None:
7159
+ """
7160
+ Record a sandbox operation to JSONL file for telemetry upload.
7161
+
7162
+ Args:
7163
+ action_type: Operation name (e.g., "init_total", "storage_download", "cli_execution")
7164
+ duration_ms: Duration in milliseconds
7165
+ success: Whether the operation succeeded
7166
+ error: Optional error message if failed
7167
+ """
7168
+ from datetime import datetime, timezone
7169
+ import json
7170
+
7171
+ entry = {
7172
+ "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z",
7173
+ "action_type": action_type,
7174
+ "duration_ms": duration_ms,
7175
+ "success": success,
7176
+ }
7177
+ if error:
7178
+ entry["error"] = error
7179
+
7180
+ with open(SANDBOX_OPS_LOG_FILE, "a") as f:
7181
+ f.write(json.dumps(entry) + "\\n")
7137
7182
  `;
7138
7183
 
7139
7184
  // ../../packages/core/src/sandbox/scripts/lib/log.py.ts
@@ -7572,10 +7617,11 @@ import hashlib
7572
7617
  import tarfile
7573
7618
  import tempfile
7574
7619
  import shutil
7620
+ import time
7575
7621
  from typing import Optional, Dict, Any, List
7576
7622
  from datetime import datetime
7577
7623
 
7578
- from common import RUN_ID, STORAGE_PREPARE_URL, STORAGE_COMMIT_URL
7624
+ from common import RUN_ID, STORAGE_PREPARE_URL, STORAGE_COMMIT_URL, record_sandbox_op
7579
7625
  from log import log_info, log_warn, log_error, log_debug
7580
7626
  from http_client import http_post_json, http_put_presigned
7581
7627
 
@@ -7716,7 +7762,9 @@ def create_direct_upload_snapshot(
7716
7762
 
7717
7763
  # Step 1: Collect file metadata
7718
7764
  log_info("Computing file hashes...")
7765
+ hash_start = time.time()
7719
7766
  files = collect_file_metadata(mount_path)
7767
+ record_sandbox_op("artifact_hash_compute", int((time.time() - hash_start) * 1000), True)
7720
7768
  log_info(f"Found {len(files)} files")
7721
7769
 
7722
7770
  if not files:
@@ -7724,6 +7772,7 @@ def create_direct_upload_snapshot(
7724
7772
 
7725
7773
  # Step 2: Call prepare endpoint
7726
7774
  log_info("Calling prepare endpoint...")
7775
+ prepare_start = time.time()
7727
7776
  prepare_payload = {
7728
7777
  "storageName": storage_name,
7729
7778
  "storageType": storage_type,
@@ -7735,12 +7784,15 @@ def create_direct_upload_snapshot(
7735
7784
  prepare_response = http_post_json(STORAGE_PREPARE_URL, prepare_payload)
7736
7785
  if not prepare_response:
7737
7786
  log_error("Failed to call prepare endpoint")
7787
+ record_sandbox_op("artifact_prepare_api", int((time.time() - prepare_start) * 1000), False)
7738
7788
  return None
7739
7789
 
7740
7790
  version_id = prepare_response.get("versionId")
7741
7791
  if not version_id:
7742
7792
  log_error(f"Invalid prepare response: {prepare_response}")
7793
+ record_sandbox_op("artifact_prepare_api", int((time.time() - prepare_start) * 1000), False)
7743
7794
  return None
7795
+ record_sandbox_op("artifact_prepare_api", int((time.time() - prepare_start) * 1000), True)
7744
7796
 
7745
7797
  # Step 3: Check if version already exists (deduplication)
7746
7798
  # Still call commit to update HEAD pointer (fixes #649)
@@ -7783,10 +7835,13 @@ def create_direct_upload_snapshot(
7783
7835
  try:
7784
7836
  # Create archive
7785
7837
  log_info("Creating archive...")
7838
+ archive_start = time.time()
7786
7839
  archive_path = os.path.join(temp_dir, "archive.tar.gz")
7787
7840
  if not create_archive(mount_path, archive_path):
7788
7841
  log_error("Failed to create archive")
7842
+ record_sandbox_op("artifact_archive_create", int((time.time() - archive_start) * 1000), False)
7789
7843
  return None
7844
+ record_sandbox_op("artifact_archive_create", int((time.time() - archive_start) * 1000), True)
7790
7845
 
7791
7846
  # Create manifest
7792
7847
  log_info("Creating manifest...")
@@ -7797,12 +7852,14 @@ def create_direct_upload_snapshot(
7797
7852
 
7798
7853
  # Upload archive to S3
7799
7854
  log_info("Uploading archive to S3...")
7855
+ s3_upload_start = time.time()
7800
7856
  if not http_put_presigned(
7801
7857
  archive_info["presignedUrl"],
7802
7858
  archive_path,
7803
7859
  "application/gzip"
7804
7860
  ):
7805
7861
  log_error("Failed to upload archive to S3")
7862
+ record_sandbox_op("artifact_s3_upload", int((time.time() - s3_upload_start) * 1000), False)
7806
7863
  return None
7807
7864
 
7808
7865
  # Upload manifest to S3
@@ -7813,10 +7870,13 @@ def create_direct_upload_snapshot(
7813
7870
  "application/json"
7814
7871
  ):
7815
7872
  log_error("Failed to upload manifest to S3")
7873
+ record_sandbox_op("artifact_s3_upload", int((time.time() - s3_upload_start) * 1000), False)
7816
7874
  return None
7875
+ record_sandbox_op("artifact_s3_upload", int((time.time() - s3_upload_start) * 1000), True)
7817
7876
 
7818
7877
  # Step 6: Call commit endpoint
7819
7878
  log_info("Calling commit endpoint...")
7879
+ commit_start = time.time()
7820
7880
  commit_payload = {
7821
7881
  "storageName": storage_name,
7822
7882
  "storageType": storage_type,
@@ -7831,11 +7891,14 @@ def create_direct_upload_snapshot(
7831
7891
  commit_response = http_post_json(STORAGE_COMMIT_URL, commit_payload)
7832
7892
  if not commit_response:
7833
7893
  log_error("Failed to call commit endpoint")
7894
+ record_sandbox_op("artifact_commit_api", int((time.time() - commit_start) * 1000), False)
7834
7895
  return None
7835
7896
 
7836
7897
  if not commit_response.get("success"):
7837
7898
  log_error(f"Commit failed: {commit_response}")
7899
+ record_sandbox_op("artifact_commit_api", int((time.time() - commit_start) * 1000), False)
7838
7900
  return None
7901
+ record_sandbox_op("artifact_commit_api", int((time.time() - commit_start) * 1000), True)
7839
7902
 
7840
7903
  log_info(f"Direct upload snapshot created: {version_id[:8]}")
7841
7904
  return {"versionId": version_id}
@@ -7858,11 +7921,12 @@ import sys
7858
7921
  import json
7859
7922
  import tarfile
7860
7923
  import tempfile
7924
+ import time
7861
7925
 
7862
7926
  # Add lib to path for imports
7863
7927
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
7864
7928
 
7865
- from common import validate_config
7929
+ from common import validate_config, record_sandbox_op
7866
7930
  from log import log_info, log_error
7867
7931
  from http_client import http_download
7868
7932
 
@@ -7942,13 +8006,21 @@ def main():
7942
8006
 
7943
8007
  log_info(f"Found {storage_count} storages, artifact: {has_artifact}")
7944
8008
 
8009
+ # Track total download time
8010
+ download_total_start = time.time()
8011
+ download_success = True
8012
+
7945
8013
  # Process storages
7946
8014
  for storage in storages:
7947
8015
  mount_path = storage.get("mountPath")
7948
8016
  archive_url = storage.get("archiveUrl")
7949
8017
 
7950
8018
  if archive_url and archive_url != "null":
7951
- download_storage(mount_path, archive_url)
8019
+ storage_start = time.time()
8020
+ success = download_storage(mount_path, archive_url)
8021
+ record_sandbox_op("storage_download", int((time.time() - storage_start) * 1000), success)
8022
+ if not success:
8023
+ download_success = False
7952
8024
 
7953
8025
  # Process artifact
7954
8026
  if artifact:
@@ -7956,8 +8028,14 @@ def main():
7956
8028
  artifact_url = artifact.get("archiveUrl")
7957
8029
 
7958
8030
  if artifact_url and artifact_url != "null":
7959
- download_storage(artifact_mount, artifact_url)
7960
-
8031
+ artifact_start = time.time()
8032
+ success = download_storage(artifact_mount, artifact_url)
8033
+ record_sandbox_op("artifact_download", int((time.time() - artifact_start) * 1000), success)
8034
+ if not success:
8035
+ download_success = False
8036
+
8037
+ # Record total download time
8038
+ record_sandbox_op("download_total", int((time.time() - download_total_start) * 1000), download_success)
7961
8039
  log_info("All storages downloaded successfully")
7962
8040
 
7963
8041
 
@@ -7974,12 +8052,14 @@ Uses direct S3 upload exclusively (no fallback to legacy methods).
7974
8052
  """
7975
8053
  import os
7976
8054
  import glob
8055
+ import time
7977
8056
  from typing import Optional, Dict, Any
7978
8057
 
7979
8058
  from common import (
7980
8059
  RUN_ID, CHECKPOINT_URL,
7981
8060
  SESSION_ID_FILE, SESSION_HISTORY_PATH_FILE,
7982
- ARTIFACT_DRIVER, ARTIFACT_MOUNT_PATH, ARTIFACT_VOLUME_NAME
8061
+ ARTIFACT_DRIVER, ARTIFACT_MOUNT_PATH, ARTIFACT_VOLUME_NAME,
8062
+ record_sandbox_op
7983
8063
  )
7984
8064
  from log import log_info, log_error
7985
8065
  from http_client import http_post_json
@@ -8035,19 +8115,27 @@ def create_checkpoint() -> bool:
8035
8115
  Returns:
8036
8116
  True on success, False on failure
8037
8117
  """
8118
+ checkpoint_start = time.time()
8038
8119
  log_info("Creating checkpoint...")
8039
8120
 
8040
8121
  # Read session ID from temp file
8122
+ session_id_start = time.time()
8041
8123
  if not os.path.exists(SESSION_ID_FILE):
8042
8124
  log_error("No session ID found, checkpoint creation failed")
8125
+ record_sandbox_op("session_id_read", int((time.time() - session_id_start) * 1000), False, "Session ID file not found")
8126
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8043
8127
  return False
8044
8128
 
8045
8129
  with open(SESSION_ID_FILE) as f:
8046
8130
  cli_agent_session_id = f.read().strip()
8131
+ record_sandbox_op("session_id_read", int((time.time() - session_id_start) * 1000), True)
8047
8132
 
8048
8133
  # Read session history path from temp file
8134
+ session_history_start = time.time()
8049
8135
  if not os.path.exists(SESSION_HISTORY_PATH_FILE):
8050
8136
  log_error("No session history path found, checkpoint creation failed")
8137
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Session history path file not found")
8138
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8051
8139
  return False
8052
8140
 
8053
8141
  with open(SESSION_HISTORY_PATH_FILE) as f:
@@ -8058,6 +8146,8 @@ def create_checkpoint() -> bool:
8058
8146
  parts = session_history_path_raw.split(":", 2)
8059
8147
  if len(parts) != 3:
8060
8148
  log_error(f"Invalid Codex search marker format: {session_history_path_raw}")
8149
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Invalid Codex search marker")
8150
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8061
8151
  return False
8062
8152
  sessions_dir = parts[1]
8063
8153
  codex_session_id = parts[2]
@@ -8065,6 +8155,8 @@ def create_checkpoint() -> bool:
8065
8155
  session_history_path = find_codex_session_file(sessions_dir, codex_session_id)
8066
8156
  if not session_history_path:
8067
8157
  log_error(f"Could not find Codex session file for {codex_session_id} in {sessions_dir}")
8158
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Codex session file not found")
8159
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8068
8160
  return False
8069
8161
  else:
8070
8162
  session_history_path = session_history_path_raw
@@ -8072,6 +8164,8 @@ def create_checkpoint() -> bool:
8072
8164
  # Check if session history file exists
8073
8165
  if not os.path.exists(session_history_path):
8074
8166
  log_error(f"Session history file not found at {session_history_path}, checkpoint creation failed")
8167
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Session history file not found")
8168
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8075
8169
  return False
8076
8170
 
8077
8171
  # Read session history
@@ -8080,14 +8174,19 @@ def create_checkpoint() -> bool:
8080
8174
  cli_agent_session_history = f.read()
8081
8175
  except IOError as e:
8082
8176
  log_error(f"Failed to read session history: {e}")
8177
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, str(e))
8178
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8083
8179
  return False
8084
8180
 
8085
8181
  if not cli_agent_session_history.strip():
8086
8182
  log_error("Session history is empty, checkpoint creation failed")
8183
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Session history empty")
8184
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8087
8185
  return False
8088
8186
 
8089
8187
  line_count = len(cli_agent_session_history.strip().split("\\n"))
8090
8188
  log_info(f"Session history loaded ({line_count} lines)")
8189
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), True)
8091
8190
 
8092
8191
  # CLI agent type (default to claude-code)
8093
8192
  cli_agent_type = os.environ.get("CLI_AGENT_TYPE", "claude-code")
@@ -8101,6 +8200,7 @@ def create_checkpoint() -> bool:
8101
8200
 
8102
8201
  if ARTIFACT_DRIVER != "vas":
8103
8202
  log_error(f"Unknown artifact driver: {ARTIFACT_DRIVER} (only 'vas' is supported)")
8203
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8104
8204
  return False
8105
8205
 
8106
8206
  # VAS artifact: create snapshot using direct S3 upload (bypasses Vercel 4.5MB limit)
@@ -8117,12 +8217,14 @@ def create_checkpoint() -> bool:
8117
8217
 
8118
8218
  if not snapshot:
8119
8219
  log_error("Failed to create VAS snapshot for artifact")
8220
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8120
8221
  return False
8121
8222
 
8122
8223
  # Extract versionId from snapshot response
8123
8224
  artifact_version = snapshot.get("versionId")
8124
8225
  if not artifact_version:
8125
8226
  log_error("Failed to extract versionId from snapshot")
8227
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8126
8228
  return False
8127
8229
 
8128
8230
  # Build artifact snapshot JSON with new format (artifactName + artifactVersion)
@@ -8150,6 +8252,7 @@ def create_checkpoint() -> bool:
8150
8252
  checkpoint_payload["artifactSnapshot"] = artifact_snapshot
8151
8253
 
8152
8254
  # Call checkpoint API
8255
+ api_call_start = time.time()
8153
8256
  result = http_post_json(CHECKPOINT_URL, checkpoint_payload)
8154
8257
 
8155
8258
  # Validate response contains checkpointId to confirm checkpoint was actually created
@@ -8157,9 +8260,13 @@ def create_checkpoint() -> bool:
8157
8260
  if result and result.get("checkpointId"):
8158
8261
  checkpoint_id = result.get("checkpointId")
8159
8262
  log_info(f"Checkpoint created successfully: {checkpoint_id}")
8263
+ record_sandbox_op("checkpoint_api_call", int((time.time() - api_call_start) * 1000), True)
8264
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), True)
8160
8265
  return True
8161
8266
  else:
8162
8267
  log_error(f"Checkpoint API returned invalid response: {result}")
8268
+ record_sandbox_op("checkpoint_api_call", int((time.time() - api_call_start) * 1000), False, "Invalid API response")
8269
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8163
8270
  return False
8164
8271
  `;
8165
8272
 
@@ -8553,8 +8660,9 @@ from typing import List, Dict, Any
8553
8660
 
8554
8661
  from common import (
8555
8662
  RUN_ID, TELEMETRY_URL, TELEMETRY_INTERVAL,
8556
- SYSTEM_LOG_FILE, METRICS_LOG_FILE, NETWORK_LOG_FILE,
8557
- TELEMETRY_LOG_POS_FILE, TELEMETRY_METRICS_POS_FILE, TELEMETRY_NETWORK_POS_FILE
8663
+ SYSTEM_LOG_FILE, METRICS_LOG_FILE, NETWORK_LOG_FILE, SANDBOX_OPS_LOG_FILE,
8664
+ TELEMETRY_LOG_POS_FILE, TELEMETRY_METRICS_POS_FILE, TELEMETRY_NETWORK_POS_FILE,
8665
+ TELEMETRY_SANDBOX_OPS_POS_FILE
8558
8666
  )
8559
8667
  from log import log_info, log_error, log_debug, log_warn
8560
8668
  from http_client import http_post_json
@@ -8657,6 +8765,19 @@ def read_network_logs_from_position(pos_file: str) -> tuple[List[Dict[str, Any]]
8657
8765
  return read_jsonl_from_position(NETWORK_LOG_FILE, pos_file)
8658
8766
 
8659
8767
 
8768
+ def read_sandbox_ops_from_position(pos_file: str) -> tuple[List[Dict[str, Any]], int]:
8769
+ """
8770
+ Read new sandbox operations from JSONL file starting from last position.
8771
+
8772
+ Args:
8773
+ pos_file: Path to position tracking file
8774
+
8775
+ Returns:
8776
+ Tuple of (sandbox operations list, new_position)
8777
+ """
8778
+ return read_jsonl_from_position(SANDBOX_OPS_LOG_FILE, pos_file)
8779
+
8780
+
8660
8781
  def upload_telemetry() -> bool:
8661
8782
  """
8662
8783
  Upload telemetry data to VM0 API.
@@ -8673,8 +8794,11 @@ def upload_telemetry() -> bool:
8673
8794
  # Read new network logs
8674
8795
  network_logs, network_pos = read_network_logs_from_position(TELEMETRY_NETWORK_POS_FILE)
8675
8796
 
8797
+ # Read new sandbox operations
8798
+ sandbox_ops, sandbox_ops_pos = read_sandbox_ops_from_position(TELEMETRY_SANDBOX_OPS_POS_FILE)
8799
+
8676
8800
  # Skip if nothing new
8677
- if not system_log and not metrics and not network_logs:
8801
+ if not system_log and not metrics and not network_logs and not sandbox_ops:
8678
8802
  log_debug("No new telemetry data to upload")
8679
8803
  return True
8680
8804
 
@@ -8688,10 +8812,11 @@ def upload_telemetry() -> bool:
8688
8812
  "runId": RUN_ID,
8689
8813
  "systemLog": masked_system_log,
8690
8814
  "metrics": metrics, # Metrics don't contain secrets (just numbers)
8691
- "networkLogs": masked_network_logs
8815
+ "networkLogs": masked_network_logs,
8816
+ "sandboxOperations": sandbox_ops # Sandbox ops don't contain secrets (just timing data)
8692
8817
  }
8693
8818
 
8694
- log_debug(f"Uploading telemetry: {len(system_log)} bytes log, {len(metrics)} metrics, {len(network_logs)} network logs")
8819
+ log_debug(f"Uploading telemetry: {len(system_log)} bytes log, {len(metrics)} metrics, {len(network_logs)} network logs, {len(sandbox_ops)} sandbox ops")
8695
8820
 
8696
8821
  result = http_post_json(TELEMETRY_URL, payload, max_retries=1)
8697
8822
 
@@ -8700,6 +8825,7 @@ def upload_telemetry() -> bool:
8700
8825
  save_position(TELEMETRY_LOG_POS_FILE, log_pos)
8701
8826
  save_position(TELEMETRY_METRICS_POS_FILE, metrics_pos)
8702
8827
  save_position(TELEMETRY_NETWORK_POS_FILE, network_pos)
8828
+ save_position(TELEMETRY_SANDBOX_OPS_POS_FILE, sandbox_ops_pos)
8703
8829
  log_debug(f"Telemetry uploaded successfully: {result.get('id', 'unknown')}")
8704
8830
  return True
8705
8831
  else:
@@ -8937,7 +9063,7 @@ sys.path.insert(0, "/usr/local/bin/vm0-agent/lib")
8937
9063
  from common import (
8938
9064
  WORKING_DIR, PROMPT, RESUME_SESSION_ID, COMPLETE_URL, RUN_ID,
8939
9065
  EVENT_ERROR_FLAG, HEARTBEAT_URL, HEARTBEAT_INTERVAL, AGENT_LOG_FILE,
8940
- CLI_AGENT_TYPE, OPENAI_MODEL, validate_config
9066
+ CLI_AGENT_TYPE, OPENAI_MODEL, validate_config, record_sandbox_op
8941
9067
  )
8942
9068
  from log import log_info, log_error, log_warn
8943
9069
  from events import send_event
@@ -8973,10 +9099,14 @@ def _cleanup(exit_code: int, error_message: str):
8973
9099
 
8974
9100
  # Perform final telemetry upload before completion
8975
9101
  # This ensures all remaining data is captured
9102
+ telemetry_start = time.time()
9103
+ telemetry_success = True
8976
9104
  try:
8977
9105
  final_telemetry_upload()
8978
9106
  except Exception as e:
9107
+ telemetry_success = False
8979
9108
  log_error(f"Final telemetry upload failed: {e}")
9109
+ record_sandbox_op("final_telemetry_upload", int((time.time() - telemetry_start) * 1000), telemetry_success)
8980
9110
 
8981
9111
  # Always call complete API at the end
8982
9112
  # This sends vm0_result (on success) or vm0_error (on failure) and kills the sandbox
@@ -8989,13 +9119,17 @@ def _cleanup(exit_code: int, error_message: str):
8989
9119
  if error_message:
8990
9120
  complete_payload["error"] = error_message
8991
9121
 
9122
+ complete_start = time.time()
9123
+ complete_success = False
8992
9124
  try:
8993
9125
  if http_post_json(COMPLETE_URL, complete_payload):
8994
9126
  log_info("Complete API called successfully")
9127
+ complete_success = True
8995
9128
  else:
8996
9129
  log_error("Failed to call complete API (sandbox may not be cleaned up)")
8997
9130
  except Exception as e:
8998
9131
  log_error(f"Complete API call failed: {e}")
9132
+ record_sandbox_op("complete_api_call", int((time.time() - complete_start) * 1000), complete_success)
8999
9133
 
9000
9134
  # Stop heartbeat thread
9001
9135
  shutdown_event.set()
@@ -9027,25 +9161,36 @@ def _run() -> tuple[int, str]:
9027
9161
  log_info(f"Working directory: {WORKING_DIR}")
9028
9162
 
9029
9163
  # Start heartbeat thread
9164
+ heartbeat_start = time.time()
9030
9165
  heartbeat_thread = threading.Thread(target=heartbeat_loop, daemon=True)
9031
9166
  heartbeat_thread.start()
9032
9167
  log_info("Heartbeat thread started")
9168
+ record_sandbox_op("heartbeat_start", int((time.time() - heartbeat_start) * 1000), True)
9033
9169
 
9034
9170
  # Start metrics collector thread
9171
+ metrics_start = time.time()
9035
9172
  start_metrics_collector(shutdown_event)
9036
9173
  log_info("Metrics collector thread started")
9174
+ record_sandbox_op("metrics_collector_start", int((time.time() - metrics_start) * 1000), True)
9037
9175
 
9038
9176
  # Start telemetry upload thread
9177
+ telemetry_start = time.time()
9039
9178
  start_telemetry_upload(shutdown_event)
9040
9179
  log_info("Telemetry upload thread started")
9180
+ record_sandbox_op("telemetry_upload_start", int((time.time() - telemetry_start) * 1000), True)
9041
9181
 
9042
9182
  # Create and change to working directory - raises RuntimeError if fails
9043
9183
  # Directory may not exist if no artifact/storage was downloaded (e.g., first run)
9184
+ working_dir_start = time.time()
9185
+ working_dir_success = True
9044
9186
  try:
9045
9187
  os.makedirs(WORKING_DIR, exist_ok=True)
9046
9188
  os.chdir(WORKING_DIR)
9047
9189
  except OSError as e:
9190
+ working_dir_success = False
9191
+ record_sandbox_op("working_dir_setup", int((time.time() - working_dir_start) * 1000), False, str(e))
9048
9192
  raise RuntimeError(f"Failed to create/change to working directory: {WORKING_DIR} - {e}") from e
9193
+ record_sandbox_op("working_dir_setup", int((time.time() - working_dir_start) * 1000), working_dir_success)
9049
9194
 
9050
9195
  # Set up Codex configuration if using Codex CLI
9051
9196
  # Claude Code uses ~/.claude by default (no configuration needed)
@@ -9058,6 +9203,8 @@ def _run() -> tuple[int, str]:
9058
9203
  log_info(f"Codex home directory: {codex_home}")
9059
9204
 
9060
9205
  # Login with API key via stdin (recommended method)
9206
+ codex_login_start = time.time()
9207
+ codex_login_success = False
9061
9208
  api_key = os.environ.get("OPENAI_API_KEY", "")
9062
9209
  if api_key:
9063
9210
  result = subprocess.run(
@@ -9068,13 +9215,16 @@ def _run() -> tuple[int, str]:
9068
9215
  )
9069
9216
  if result.returncode == 0:
9070
9217
  log_info("Codex authenticated with API key")
9218
+ codex_login_success = True
9071
9219
  else:
9072
9220
  log_error(f"Codex login failed: {result.stderr}")
9073
9221
  else:
9074
9222
  log_error("OPENAI_API_KEY not set")
9223
+ record_sandbox_op("codex_login", int((time.time() - codex_login_start) * 1000), codex_login_success)
9075
9224
 
9076
- init_duration = int(time.time() - init_start_time)
9077
- log_info(f"\u2713 Initialization complete ({init_duration}s)")
9225
+ init_duration_ms = int((time.time() - init_start_time) * 1000)
9226
+ record_sandbox_op("init_total", init_duration_ms, True)
9227
+ log_info(f"\u2713 Initialization complete ({init_duration_ms // 1000}s)")
9078
9228
 
9079
9229
  # Lifecycle: Execution
9080
9230
  log_info("\u25B7 Execution")
@@ -9238,12 +9388,13 @@ def _run() -> tuple[int, str]:
9238
9388
  final_exit_code = 1
9239
9389
  error_message = "Some events failed to send"
9240
9390
 
9241
- # Log execution result
9242
- exec_duration = int(time.time() - exec_start_time)
9391
+ # Log execution result and record metric
9392
+ exec_duration_ms = int((time.time() - exec_start_time) * 1000)
9393
+ record_sandbox_op("cli_execution", exec_duration_ms, agent_exit_code == 0)
9243
9394
  if agent_exit_code == 0 and final_exit_code == 0:
9244
- log_info(f"\u2713 Execution complete ({exec_duration}s)")
9395
+ log_info(f"\u2713 Execution complete ({exec_duration_ms // 1000}s)")
9245
9396
  else:
9246
- log_info(f"\u2717 Execution failed ({exec_duration}s)")
9397
+ log_info(f"\u2717 Execution failed ({exec_duration_ms // 1000}s)")
9247
9398
 
9248
9399
  # Handle completion
9249
9400
  if agent_exit_code == 0 and final_exit_code == 0:
@@ -11096,7 +11247,7 @@ var benchmarkCommand = new Command3("benchmark").description(
11096
11247
  });
11097
11248
 
11098
11249
  // src/index.ts
11099
- var version = true ? "2.8.0" : "0.1.0";
11250
+ var version = true ? "2.8.2" : "0.1.0";
11100
11251
  program.name("vm0-runner").version(version).description("Self-hosted runner for VM0 agents");
11101
11252
  program.addCommand(startCommand);
11102
11253
  program.addCommand(statusCommand);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vm0/runner",
3
- "version": "2.8.0",
3
+ "version": "2.8.2",
4
4
  "description": "Self-hosted runner for VM0 agents",
5
5
  "repository": {
6
6
  "type": "git",