@vm0/runner 2.8.1 → 2.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +169 -21
  2. package/package.json +1 -1
package/index.js CHANGED
@@ -5897,6 +5897,13 @@ var metricDataSchema = z8.object({
5897
5897
  disk_used: z8.number(),
5898
5898
  disk_total: z8.number()
5899
5899
  });
5900
+ var sandboxOperationSchema = z8.object({
5901
+ ts: z8.string(),
5902
+ action_type: z8.string(),
5903
+ duration_ms: z8.number(),
5904
+ success: z8.boolean(),
5905
+ error: z8.string().optional()
5906
+ });
5900
5907
  var networkLogSchema = z8.object({
5901
5908
  timestamp: z8.string(),
5902
5909
  // Common fields (all modes)
@@ -5916,7 +5923,7 @@ var networkLogSchema = z8.object({
5916
5923
  var webhookTelemetryContract = c5.router({
5917
5924
  /**
5918
5925
  * POST /api/webhooks/agent/telemetry
5919
- * Receive telemetry data (system log, metrics, and network logs) from sandbox
5926
+ * Receive telemetry data (system log, metrics, network logs, and sandbox operations) from sandbox
5920
5927
  */
5921
5928
  send: {
5922
5929
  method: "POST",
@@ -5925,7 +5932,8 @@ var webhookTelemetryContract = c5.router({
5925
5932
  runId: z8.string().min(1, "runId is required"),
5926
5933
  systemLog: z8.string().optional(),
5927
5934
  metrics: z8.array(metricDataSchema).optional(),
5928
- networkLogs: z8.array(networkLogSchema).optional()
5935
+ networkLogs: z8.array(networkLogSchema).optional(),
5936
+ sandboxOperations: z8.array(sandboxOperationSchema).optional()
5929
5937
  }),
5930
5938
  responses: {
5931
5939
  200: z8.object({
@@ -7124,6 +7132,10 @@ NETWORK_LOG_FILE = f"/tmp/vm0-network-{RUN_ID}.jsonl"
7124
7132
  TELEMETRY_LOG_POS_FILE = f"/tmp/vm0-telemetry-log-pos-{RUN_ID}.txt"
7125
7133
  TELEMETRY_METRICS_POS_FILE = f"/tmp/vm0-telemetry-metrics-pos-{RUN_ID}.txt"
7126
7134
  TELEMETRY_NETWORK_POS_FILE = f"/tmp/vm0-telemetry-network-pos-{RUN_ID}.txt"
7135
+ TELEMETRY_SANDBOX_OPS_POS_FILE = f"/tmp/vm0-telemetry-sandbox-ops-pos-{RUN_ID}.txt"
7136
+
7137
+ # Sandbox operations log file (JSONL format)
7138
+ SANDBOX_OPS_LOG_FILE = f"/tmp/vm0-sandbox-ops-{RUN_ID}.jsonl"
7127
7139
 
7128
7140
  # Metrics collection configuration
7129
7141
  METRICS_INTERVAL = 5 # seconds
@@ -7137,6 +7149,36 @@ def validate_config() -> bool:
7137
7149
  if not WORKING_DIR:
7138
7150
  raise ValueError("VM0_WORKING_DIR is required but not set")
7139
7151
  return True
7152
+
7153
+ def record_sandbox_op(
7154
+ action_type: str,
7155
+ duration_ms: int,
7156
+ success: bool,
7157
+ error: str = None
7158
+ ) -> None:
7159
+ """
7160
+ Record a sandbox operation to JSONL file for telemetry upload.
7161
+
7162
+ Args:
7163
+ action_type: Operation name (e.g., "init_total", "storage_download", "cli_execution")
7164
+ duration_ms: Duration in milliseconds
7165
+ success: Whether the operation succeeded
7166
+ error: Optional error message if failed
7167
+ """
7168
+ from datetime import datetime, timezone
7169
+ import json
7170
+
7171
+ entry = {
7172
+ "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z",
7173
+ "action_type": action_type,
7174
+ "duration_ms": duration_ms,
7175
+ "success": success,
7176
+ }
7177
+ if error:
7178
+ entry["error"] = error
7179
+
7180
+ with open(SANDBOX_OPS_LOG_FILE, "a") as f:
7181
+ f.write(json.dumps(entry) + "\\n")
7140
7182
  `;
7141
7183
 
7142
7184
  // ../../packages/core/src/sandbox/scripts/lib/log.py.ts
@@ -7575,10 +7617,11 @@ import hashlib
7575
7617
  import tarfile
7576
7618
  import tempfile
7577
7619
  import shutil
7620
+ import time
7578
7621
  from typing import Optional, Dict, Any, List
7579
7622
  from datetime import datetime
7580
7623
 
7581
- from common import RUN_ID, STORAGE_PREPARE_URL, STORAGE_COMMIT_URL
7624
+ from common import RUN_ID, STORAGE_PREPARE_URL, STORAGE_COMMIT_URL, record_sandbox_op
7582
7625
  from log import log_info, log_warn, log_error, log_debug
7583
7626
  from http_client import http_post_json, http_put_presigned
7584
7627
 
@@ -7719,7 +7762,9 @@ def create_direct_upload_snapshot(
7719
7762
 
7720
7763
  # Step 1: Collect file metadata
7721
7764
  log_info("Computing file hashes...")
7765
+ hash_start = time.time()
7722
7766
  files = collect_file_metadata(mount_path)
7767
+ record_sandbox_op("artifact_hash_compute", int((time.time() - hash_start) * 1000), True)
7723
7768
  log_info(f"Found {len(files)} files")
7724
7769
 
7725
7770
  if not files:
@@ -7727,6 +7772,7 @@ def create_direct_upload_snapshot(
7727
7772
 
7728
7773
  # Step 2: Call prepare endpoint
7729
7774
  log_info("Calling prepare endpoint...")
7775
+ prepare_start = time.time()
7730
7776
  prepare_payload = {
7731
7777
  "storageName": storage_name,
7732
7778
  "storageType": storage_type,
@@ -7738,12 +7784,15 @@ def create_direct_upload_snapshot(
7738
7784
  prepare_response = http_post_json(STORAGE_PREPARE_URL, prepare_payload)
7739
7785
  if not prepare_response:
7740
7786
  log_error("Failed to call prepare endpoint")
7787
+ record_sandbox_op("artifact_prepare_api", int((time.time() - prepare_start) * 1000), False)
7741
7788
  return None
7742
7789
 
7743
7790
  version_id = prepare_response.get("versionId")
7744
7791
  if not version_id:
7745
7792
  log_error(f"Invalid prepare response: {prepare_response}")
7793
+ record_sandbox_op("artifact_prepare_api", int((time.time() - prepare_start) * 1000), False)
7746
7794
  return None
7795
+ record_sandbox_op("artifact_prepare_api", int((time.time() - prepare_start) * 1000), True)
7747
7796
 
7748
7797
  # Step 3: Check if version already exists (deduplication)
7749
7798
  # Still call commit to update HEAD pointer (fixes #649)
@@ -7786,10 +7835,13 @@ def create_direct_upload_snapshot(
7786
7835
  try:
7787
7836
  # Create archive
7788
7837
  log_info("Creating archive...")
7838
+ archive_start = time.time()
7789
7839
  archive_path = os.path.join(temp_dir, "archive.tar.gz")
7790
7840
  if not create_archive(mount_path, archive_path):
7791
7841
  log_error("Failed to create archive")
7842
+ record_sandbox_op("artifact_archive_create", int((time.time() - archive_start) * 1000), False)
7792
7843
  return None
7844
+ record_sandbox_op("artifact_archive_create", int((time.time() - archive_start) * 1000), True)
7793
7845
 
7794
7846
  # Create manifest
7795
7847
  log_info("Creating manifest...")
@@ -7800,12 +7852,14 @@ def create_direct_upload_snapshot(
7800
7852
 
7801
7853
  # Upload archive to S3
7802
7854
  log_info("Uploading archive to S3...")
7855
+ s3_upload_start = time.time()
7803
7856
  if not http_put_presigned(
7804
7857
  archive_info["presignedUrl"],
7805
7858
  archive_path,
7806
7859
  "application/gzip"
7807
7860
  ):
7808
7861
  log_error("Failed to upload archive to S3")
7862
+ record_sandbox_op("artifact_s3_upload", int((time.time() - s3_upload_start) * 1000), False)
7809
7863
  return None
7810
7864
 
7811
7865
  # Upload manifest to S3
@@ -7816,10 +7870,13 @@ def create_direct_upload_snapshot(
7816
7870
  "application/json"
7817
7871
  ):
7818
7872
  log_error("Failed to upload manifest to S3")
7873
+ record_sandbox_op("artifact_s3_upload", int((time.time() - s3_upload_start) * 1000), False)
7819
7874
  return None
7875
+ record_sandbox_op("artifact_s3_upload", int((time.time() - s3_upload_start) * 1000), True)
7820
7876
 
7821
7877
  # Step 6: Call commit endpoint
7822
7878
  log_info("Calling commit endpoint...")
7879
+ commit_start = time.time()
7823
7880
  commit_payload = {
7824
7881
  "storageName": storage_name,
7825
7882
  "storageType": storage_type,
@@ -7834,11 +7891,14 @@ def create_direct_upload_snapshot(
7834
7891
  commit_response = http_post_json(STORAGE_COMMIT_URL, commit_payload)
7835
7892
  if not commit_response:
7836
7893
  log_error("Failed to call commit endpoint")
7894
+ record_sandbox_op("artifact_commit_api", int((time.time() - commit_start) * 1000), False)
7837
7895
  return None
7838
7896
 
7839
7897
  if not commit_response.get("success"):
7840
7898
  log_error(f"Commit failed: {commit_response}")
7899
+ record_sandbox_op("artifact_commit_api", int((time.time() - commit_start) * 1000), False)
7841
7900
  return None
7901
+ record_sandbox_op("artifact_commit_api", int((time.time() - commit_start) * 1000), True)
7842
7902
 
7843
7903
  log_info(f"Direct upload snapshot created: {version_id[:8]}")
7844
7904
  return {"versionId": version_id}
@@ -7861,11 +7921,12 @@ import sys
7861
7921
  import json
7862
7922
  import tarfile
7863
7923
  import tempfile
7924
+ import time
7864
7925
 
7865
7926
  # Add lib to path for imports
7866
7927
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
7867
7928
 
7868
- from common import validate_config
7929
+ from common import validate_config, record_sandbox_op
7869
7930
  from log import log_info, log_error
7870
7931
  from http_client import http_download
7871
7932
 
@@ -7945,13 +8006,21 @@ def main():
7945
8006
 
7946
8007
  log_info(f"Found {storage_count} storages, artifact: {has_artifact}")
7947
8008
 
8009
+ # Track total download time
8010
+ download_total_start = time.time()
8011
+ download_success = True
8012
+
7948
8013
  # Process storages
7949
8014
  for storage in storages:
7950
8015
  mount_path = storage.get("mountPath")
7951
8016
  archive_url = storage.get("archiveUrl")
7952
8017
 
7953
8018
  if archive_url and archive_url != "null":
7954
- download_storage(mount_path, archive_url)
8019
+ storage_start = time.time()
8020
+ success = download_storage(mount_path, archive_url)
8021
+ record_sandbox_op("storage_download", int((time.time() - storage_start) * 1000), success)
8022
+ if not success:
8023
+ download_success = False
7955
8024
 
7956
8025
  # Process artifact
7957
8026
  if artifact:
@@ -7959,8 +8028,14 @@ def main():
7959
8028
  artifact_url = artifact.get("archiveUrl")
7960
8029
 
7961
8030
  if artifact_url and artifact_url != "null":
7962
- download_storage(artifact_mount, artifact_url)
7963
-
8031
+ artifact_start = time.time()
8032
+ success = download_storage(artifact_mount, artifact_url)
8033
+ record_sandbox_op("artifact_download", int((time.time() - artifact_start) * 1000), success)
8034
+ if not success:
8035
+ download_success = False
8036
+
8037
+ # Record total download time
8038
+ record_sandbox_op("download_total", int((time.time() - download_total_start) * 1000), download_success)
7964
8039
  log_info("All storages downloaded successfully")
7965
8040
 
7966
8041
 
@@ -7977,12 +8052,14 @@ Uses direct S3 upload exclusively (no fallback to legacy methods).
7977
8052
  """
7978
8053
  import os
7979
8054
  import glob
8055
+ import time
7980
8056
  from typing import Optional, Dict, Any
7981
8057
 
7982
8058
  from common import (
7983
8059
  RUN_ID, CHECKPOINT_URL,
7984
8060
  SESSION_ID_FILE, SESSION_HISTORY_PATH_FILE,
7985
- ARTIFACT_DRIVER, ARTIFACT_MOUNT_PATH, ARTIFACT_VOLUME_NAME
8061
+ ARTIFACT_DRIVER, ARTIFACT_MOUNT_PATH, ARTIFACT_VOLUME_NAME,
8062
+ record_sandbox_op
7986
8063
  )
7987
8064
  from log import log_info, log_error
7988
8065
  from http_client import http_post_json
@@ -8038,19 +8115,27 @@ def create_checkpoint() -> bool:
8038
8115
  Returns:
8039
8116
  True on success, False on failure
8040
8117
  """
8118
+ checkpoint_start = time.time()
8041
8119
  log_info("Creating checkpoint...")
8042
8120
 
8043
8121
  # Read session ID from temp file
8122
+ session_id_start = time.time()
8044
8123
  if not os.path.exists(SESSION_ID_FILE):
8045
8124
  log_error("No session ID found, checkpoint creation failed")
8125
+ record_sandbox_op("session_id_read", int((time.time() - session_id_start) * 1000), False, "Session ID file not found")
8126
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8046
8127
  return False
8047
8128
 
8048
8129
  with open(SESSION_ID_FILE) as f:
8049
8130
  cli_agent_session_id = f.read().strip()
8131
+ record_sandbox_op("session_id_read", int((time.time() - session_id_start) * 1000), True)
8050
8132
 
8051
8133
  # Read session history path from temp file
8134
+ session_history_start = time.time()
8052
8135
  if not os.path.exists(SESSION_HISTORY_PATH_FILE):
8053
8136
  log_error("No session history path found, checkpoint creation failed")
8137
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Session history path file not found")
8138
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8054
8139
  return False
8055
8140
 
8056
8141
  with open(SESSION_HISTORY_PATH_FILE) as f:
@@ -8061,6 +8146,8 @@ def create_checkpoint() -> bool:
8061
8146
  parts = session_history_path_raw.split(":", 2)
8062
8147
  if len(parts) != 3:
8063
8148
  log_error(f"Invalid Codex search marker format: {session_history_path_raw}")
8149
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Invalid Codex search marker")
8150
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8064
8151
  return False
8065
8152
  sessions_dir = parts[1]
8066
8153
  codex_session_id = parts[2]
@@ -8068,6 +8155,8 @@ def create_checkpoint() -> bool:
8068
8155
  session_history_path = find_codex_session_file(sessions_dir, codex_session_id)
8069
8156
  if not session_history_path:
8070
8157
  log_error(f"Could not find Codex session file for {codex_session_id} in {sessions_dir}")
8158
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Codex session file not found")
8159
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8071
8160
  return False
8072
8161
  else:
8073
8162
  session_history_path = session_history_path_raw
@@ -8075,6 +8164,8 @@ def create_checkpoint() -> bool:
8075
8164
  # Check if session history file exists
8076
8165
  if not os.path.exists(session_history_path):
8077
8166
  log_error(f"Session history file not found at {session_history_path}, checkpoint creation failed")
8167
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Session history file not found")
8168
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8078
8169
  return False
8079
8170
 
8080
8171
  # Read session history
@@ -8083,14 +8174,19 @@ def create_checkpoint() -> bool:
8083
8174
  cli_agent_session_history = f.read()
8084
8175
  except IOError as e:
8085
8176
  log_error(f"Failed to read session history: {e}")
8177
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, str(e))
8178
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8086
8179
  return False
8087
8180
 
8088
8181
  if not cli_agent_session_history.strip():
8089
8182
  log_error("Session history is empty, checkpoint creation failed")
8183
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Session history empty")
8184
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8090
8185
  return False
8091
8186
 
8092
8187
  line_count = len(cli_agent_session_history.strip().split("\\n"))
8093
8188
  log_info(f"Session history loaded ({line_count} lines)")
8189
+ record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), True)
8094
8190
 
8095
8191
  # CLI agent type (default to claude-code)
8096
8192
  cli_agent_type = os.environ.get("CLI_AGENT_TYPE", "claude-code")
@@ -8104,6 +8200,7 @@ def create_checkpoint() -> bool:
8104
8200
 
8105
8201
  if ARTIFACT_DRIVER != "vas":
8106
8202
  log_error(f"Unknown artifact driver: {ARTIFACT_DRIVER} (only 'vas' is supported)")
8203
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8107
8204
  return False
8108
8205
 
8109
8206
  # VAS artifact: create snapshot using direct S3 upload (bypasses Vercel 4.5MB limit)
@@ -8120,12 +8217,14 @@ def create_checkpoint() -> bool:
8120
8217
 
8121
8218
  if not snapshot:
8122
8219
  log_error("Failed to create VAS snapshot for artifact")
8220
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8123
8221
  return False
8124
8222
 
8125
8223
  # Extract versionId from snapshot response
8126
8224
  artifact_version = snapshot.get("versionId")
8127
8225
  if not artifact_version:
8128
8226
  log_error("Failed to extract versionId from snapshot")
8227
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8129
8228
  return False
8130
8229
 
8131
8230
  # Build artifact snapshot JSON with new format (artifactName + artifactVersion)
@@ -8153,6 +8252,7 @@ def create_checkpoint() -> bool:
8153
8252
  checkpoint_payload["artifactSnapshot"] = artifact_snapshot
8154
8253
 
8155
8254
  # Call checkpoint API
8255
+ api_call_start = time.time()
8156
8256
  result = http_post_json(CHECKPOINT_URL, checkpoint_payload)
8157
8257
 
8158
8258
  # Validate response contains checkpointId to confirm checkpoint was actually created
@@ -8160,9 +8260,13 @@ def create_checkpoint() -> bool:
8160
8260
  if result and result.get("checkpointId"):
8161
8261
  checkpoint_id = result.get("checkpointId")
8162
8262
  log_info(f"Checkpoint created successfully: {checkpoint_id}")
8263
+ record_sandbox_op("checkpoint_api_call", int((time.time() - api_call_start) * 1000), True)
8264
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), True)
8163
8265
  return True
8164
8266
  else:
8165
8267
  log_error(f"Checkpoint API returned invalid response: {result}")
8268
+ record_sandbox_op("checkpoint_api_call", int((time.time() - api_call_start) * 1000), False, "Invalid API response")
8269
+ record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
8166
8270
  return False
8167
8271
  `;
8168
8272
 
@@ -8556,8 +8660,9 @@ from typing import List, Dict, Any
8556
8660
 
8557
8661
  from common import (
8558
8662
  RUN_ID, TELEMETRY_URL, TELEMETRY_INTERVAL,
8559
- SYSTEM_LOG_FILE, METRICS_LOG_FILE, NETWORK_LOG_FILE,
8560
- TELEMETRY_LOG_POS_FILE, TELEMETRY_METRICS_POS_FILE, TELEMETRY_NETWORK_POS_FILE
8663
+ SYSTEM_LOG_FILE, METRICS_LOG_FILE, NETWORK_LOG_FILE, SANDBOX_OPS_LOG_FILE,
8664
+ TELEMETRY_LOG_POS_FILE, TELEMETRY_METRICS_POS_FILE, TELEMETRY_NETWORK_POS_FILE,
8665
+ TELEMETRY_SANDBOX_OPS_POS_FILE
8561
8666
  )
8562
8667
  from log import log_info, log_error, log_debug, log_warn
8563
8668
  from http_client import http_post_json
@@ -8660,6 +8765,19 @@ def read_network_logs_from_position(pos_file: str) -> tuple[List[Dict[str, Any]]
8660
8765
  return read_jsonl_from_position(NETWORK_LOG_FILE, pos_file)
8661
8766
 
8662
8767
 
8768
+ def read_sandbox_ops_from_position(pos_file: str) -> tuple[List[Dict[str, Any]], int]:
8769
+ """
8770
+ Read new sandbox operations from JSONL file starting from last position.
8771
+
8772
+ Args:
8773
+ pos_file: Path to position tracking file
8774
+
8775
+ Returns:
8776
+ Tuple of (sandbox operations list, new_position)
8777
+ """
8778
+ return read_jsonl_from_position(SANDBOX_OPS_LOG_FILE, pos_file)
8779
+
8780
+
8663
8781
  def upload_telemetry() -> bool:
8664
8782
  """
8665
8783
  Upload telemetry data to VM0 API.
@@ -8676,8 +8794,11 @@ def upload_telemetry() -> bool:
8676
8794
  # Read new network logs
8677
8795
  network_logs, network_pos = read_network_logs_from_position(TELEMETRY_NETWORK_POS_FILE)
8678
8796
 
8797
+ # Read new sandbox operations
8798
+ sandbox_ops, sandbox_ops_pos = read_sandbox_ops_from_position(TELEMETRY_SANDBOX_OPS_POS_FILE)
8799
+
8679
8800
  # Skip if nothing new
8680
- if not system_log and not metrics and not network_logs:
8801
+ if not system_log and not metrics and not network_logs and not sandbox_ops:
8681
8802
  log_debug("No new telemetry data to upload")
8682
8803
  return True
8683
8804
 
@@ -8691,10 +8812,11 @@ def upload_telemetry() -> bool:
8691
8812
  "runId": RUN_ID,
8692
8813
  "systemLog": masked_system_log,
8693
8814
  "metrics": metrics, # Metrics don't contain secrets (just numbers)
8694
- "networkLogs": masked_network_logs
8815
+ "networkLogs": masked_network_logs,
8816
+ "sandboxOperations": sandbox_ops # Sandbox ops don't contain secrets (just timing data)
8695
8817
  }
8696
8818
 
8697
- log_debug(f"Uploading telemetry: {len(system_log)} bytes log, {len(metrics)} metrics, {len(network_logs)} network logs")
8819
+ log_debug(f"Uploading telemetry: {len(system_log)} bytes log, {len(metrics)} metrics, {len(network_logs)} network logs, {len(sandbox_ops)} sandbox ops")
8698
8820
 
8699
8821
  result = http_post_json(TELEMETRY_URL, payload, max_retries=1)
8700
8822
 
@@ -8703,6 +8825,7 @@ def upload_telemetry() -> bool:
8703
8825
  save_position(TELEMETRY_LOG_POS_FILE, log_pos)
8704
8826
  save_position(TELEMETRY_METRICS_POS_FILE, metrics_pos)
8705
8827
  save_position(TELEMETRY_NETWORK_POS_FILE, network_pos)
8828
+ save_position(TELEMETRY_SANDBOX_OPS_POS_FILE, sandbox_ops_pos)
8706
8829
  log_debug(f"Telemetry uploaded successfully: {result.get('id', 'unknown')}")
8707
8830
  return True
8708
8831
  else:
@@ -8940,7 +9063,7 @@ sys.path.insert(0, "/usr/local/bin/vm0-agent/lib")
8940
9063
  from common import (
8941
9064
  WORKING_DIR, PROMPT, RESUME_SESSION_ID, COMPLETE_URL, RUN_ID,
8942
9065
  EVENT_ERROR_FLAG, HEARTBEAT_URL, HEARTBEAT_INTERVAL, AGENT_LOG_FILE,
8943
- CLI_AGENT_TYPE, OPENAI_MODEL, validate_config
9066
+ CLI_AGENT_TYPE, OPENAI_MODEL, validate_config, record_sandbox_op
8944
9067
  )
8945
9068
  from log import log_info, log_error, log_warn
8946
9069
  from events import send_event
@@ -8976,10 +9099,14 @@ def _cleanup(exit_code: int, error_message: str):
8976
9099
 
8977
9100
  # Perform final telemetry upload before completion
8978
9101
  # This ensures all remaining data is captured
9102
+ telemetry_start = time.time()
9103
+ telemetry_success = True
8979
9104
  try:
8980
9105
  final_telemetry_upload()
8981
9106
  except Exception as e:
9107
+ telemetry_success = False
8982
9108
  log_error(f"Final telemetry upload failed: {e}")
9109
+ record_sandbox_op("final_telemetry_upload", int((time.time() - telemetry_start) * 1000), telemetry_success)
8983
9110
 
8984
9111
  # Always call complete API at the end
8985
9112
  # This sends vm0_result (on success) or vm0_error (on failure) and kills the sandbox
@@ -8992,13 +9119,17 @@ def _cleanup(exit_code: int, error_message: str):
8992
9119
  if error_message:
8993
9120
  complete_payload["error"] = error_message
8994
9121
 
9122
+ complete_start = time.time()
9123
+ complete_success = False
8995
9124
  try:
8996
9125
  if http_post_json(COMPLETE_URL, complete_payload):
8997
9126
  log_info("Complete API called successfully")
9127
+ complete_success = True
8998
9128
  else:
8999
9129
  log_error("Failed to call complete API (sandbox may not be cleaned up)")
9000
9130
  except Exception as e:
9001
9131
  log_error(f"Complete API call failed: {e}")
9132
+ record_sandbox_op("complete_api_call", int((time.time() - complete_start) * 1000), complete_success)
9002
9133
 
9003
9134
  # Stop heartbeat thread
9004
9135
  shutdown_event.set()
@@ -9030,25 +9161,36 @@ def _run() -> tuple[int, str]:
9030
9161
  log_info(f"Working directory: {WORKING_DIR}")
9031
9162
 
9032
9163
  # Start heartbeat thread
9164
+ heartbeat_start = time.time()
9033
9165
  heartbeat_thread = threading.Thread(target=heartbeat_loop, daemon=True)
9034
9166
  heartbeat_thread.start()
9035
9167
  log_info("Heartbeat thread started")
9168
+ record_sandbox_op("heartbeat_start", int((time.time() - heartbeat_start) * 1000), True)
9036
9169
 
9037
9170
  # Start metrics collector thread
9171
+ metrics_start = time.time()
9038
9172
  start_metrics_collector(shutdown_event)
9039
9173
  log_info("Metrics collector thread started")
9174
+ record_sandbox_op("metrics_collector_start", int((time.time() - metrics_start) * 1000), True)
9040
9175
 
9041
9176
  # Start telemetry upload thread
9177
+ telemetry_start = time.time()
9042
9178
  start_telemetry_upload(shutdown_event)
9043
9179
  log_info("Telemetry upload thread started")
9180
+ record_sandbox_op("telemetry_upload_start", int((time.time() - telemetry_start) * 1000), True)
9044
9181
 
9045
9182
  # Create and change to working directory - raises RuntimeError if fails
9046
9183
  # Directory may not exist if no artifact/storage was downloaded (e.g., first run)
9184
+ working_dir_start = time.time()
9185
+ working_dir_success = True
9047
9186
  try:
9048
9187
  os.makedirs(WORKING_DIR, exist_ok=True)
9049
9188
  os.chdir(WORKING_DIR)
9050
9189
  except OSError as e:
9190
+ working_dir_success = False
9191
+ record_sandbox_op("working_dir_setup", int((time.time() - working_dir_start) * 1000), False, str(e))
9051
9192
  raise RuntimeError(f"Failed to create/change to working directory: {WORKING_DIR} - {e}") from e
9193
+ record_sandbox_op("working_dir_setup", int((time.time() - working_dir_start) * 1000), working_dir_success)
9052
9194
 
9053
9195
  # Set up Codex configuration if using Codex CLI
9054
9196
  # Claude Code uses ~/.claude by default (no configuration needed)
@@ -9061,6 +9203,8 @@ def _run() -> tuple[int, str]:
9061
9203
  log_info(f"Codex home directory: {codex_home}")
9062
9204
 
9063
9205
  # Login with API key via stdin (recommended method)
9206
+ codex_login_start = time.time()
9207
+ codex_login_success = False
9064
9208
  api_key = os.environ.get("OPENAI_API_KEY", "")
9065
9209
  if api_key:
9066
9210
  result = subprocess.run(
@@ -9071,13 +9215,16 @@ def _run() -> tuple[int, str]:
9071
9215
  )
9072
9216
  if result.returncode == 0:
9073
9217
  log_info("Codex authenticated with API key")
9218
+ codex_login_success = True
9074
9219
  else:
9075
9220
  log_error(f"Codex login failed: {result.stderr}")
9076
9221
  else:
9077
9222
  log_error("OPENAI_API_KEY not set")
9223
+ record_sandbox_op("codex_login", int((time.time() - codex_login_start) * 1000), codex_login_success)
9078
9224
 
9079
- init_duration = int(time.time() - init_start_time)
9080
- log_info(f"\u2713 Initialization complete ({init_duration}s)")
9225
+ init_duration_ms = int((time.time() - init_start_time) * 1000)
9226
+ record_sandbox_op("init_total", init_duration_ms, True)
9227
+ log_info(f"\u2713 Initialization complete ({init_duration_ms // 1000}s)")
9081
9228
 
9082
9229
  # Lifecycle: Execution
9083
9230
  log_info("\u25B7 Execution")
@@ -9241,12 +9388,13 @@ def _run() -> tuple[int, str]:
9241
9388
  final_exit_code = 1
9242
9389
  error_message = "Some events failed to send"
9243
9390
 
9244
- # Log execution result
9245
- exec_duration = int(time.time() - exec_start_time)
9391
+ # Log execution result and record metric
9392
+ exec_duration_ms = int((time.time() - exec_start_time) * 1000)
9393
+ record_sandbox_op("cli_execution", exec_duration_ms, agent_exit_code == 0)
9246
9394
  if agent_exit_code == 0 and final_exit_code == 0:
9247
- log_info(f"\u2713 Execution complete ({exec_duration}s)")
9395
+ log_info(f"\u2713 Execution complete ({exec_duration_ms // 1000}s)")
9248
9396
  else:
9249
- log_info(f"\u2717 Execution failed ({exec_duration}s)")
9397
+ log_info(f"\u2717 Execution failed ({exec_duration_ms // 1000}s)")
9250
9398
 
9251
9399
  # Handle completion
9252
9400
  if agent_exit_code == 0 and final_exit_code == 0:
@@ -11099,7 +11247,7 @@ var benchmarkCommand = new Command3("benchmark").description(
11099
11247
  });
11100
11248
 
11101
11249
  // src/index.ts
11102
- var version = true ? "2.8.1" : "0.1.0";
11250
+ var version = true ? "2.8.2" : "0.1.0";
11103
11251
  program.name("vm0-runner").version(version).description("Self-hosted runner for VM0 agents");
11104
11252
  program.addCommand(startCommand);
11105
11253
  program.addCommand(statusCommand);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vm0/runner",
3
- "version": "2.8.1",
3
+ "version": "2.8.2",
4
4
  "description": "Self-hosted runner for VM0 agents",
5
5
  "repository": {
6
6
  "type": "git",