@vm0/runner 2.8.1 → 2.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +230 -29
- package/package.json +2 -2
package/index.js
CHANGED
|
@@ -27,7 +27,12 @@ var runnerConfigSchema = z.object({
|
|
|
27
27
|
vcpu: z.number().int().min(1).default(2),
|
|
28
28
|
memory_mb: z.number().int().min(128).default(2048),
|
|
29
29
|
poll_interval_ms: z.number().int().min(1e3).default(5e3)
|
|
30
|
-
}).default({
|
|
30
|
+
}).default({
|
|
31
|
+
max_concurrent: 1,
|
|
32
|
+
vcpu: 2,
|
|
33
|
+
memory_mb: 2048,
|
|
34
|
+
poll_interval_ms: 5e3
|
|
35
|
+
}),
|
|
31
36
|
firecracker: z.object({
|
|
32
37
|
binary: z.string().min(1, "Firecracker binary path is required"),
|
|
33
38
|
kernel: z.string().min(1, "Kernel path is required"),
|
|
@@ -35,7 +40,9 @@ var runnerConfigSchema = z.object({
|
|
|
35
40
|
}),
|
|
36
41
|
proxy: z.object({
|
|
37
42
|
port: z.number().int().min(1024).max(65535).default(8080)
|
|
38
|
-
}).default({
|
|
43
|
+
}).default({
|
|
44
|
+
port: 8080
|
|
45
|
+
})
|
|
39
46
|
});
|
|
40
47
|
var debugConfigSchema = z.object({
|
|
41
48
|
name: z.string().default("debug-runner"),
|
|
@@ -43,13 +50,21 @@ var debugConfigSchema = z.object({
|
|
|
43
50
|
server: z.object({
|
|
44
51
|
url: z.string().url().default("http://localhost:3000"),
|
|
45
52
|
token: z.string().default("debug-token")
|
|
46
|
-
}).default({
|
|
53
|
+
}).default({
|
|
54
|
+
url: "http://localhost:3000",
|
|
55
|
+
token: "debug-token"
|
|
56
|
+
}),
|
|
47
57
|
sandbox: z.object({
|
|
48
58
|
max_concurrent: z.number().int().min(1).default(1),
|
|
49
59
|
vcpu: z.number().int().min(1).default(2),
|
|
50
60
|
memory_mb: z.number().int().min(128).default(2048),
|
|
51
61
|
poll_interval_ms: z.number().int().min(1e3).default(5e3)
|
|
52
|
-
}).default({
|
|
62
|
+
}).default({
|
|
63
|
+
max_concurrent: 1,
|
|
64
|
+
vcpu: 2,
|
|
65
|
+
memory_mb: 2048,
|
|
66
|
+
poll_interval_ms: 5e3
|
|
67
|
+
}),
|
|
53
68
|
firecracker: z.object({
|
|
54
69
|
binary: z.string().min(1, "Firecracker binary path is required"),
|
|
55
70
|
kernel: z.string().min(1, "Kernel path is required"),
|
|
@@ -57,7 +72,9 @@ var debugConfigSchema = z.object({
|
|
|
57
72
|
}),
|
|
58
73
|
proxy: z.object({
|
|
59
74
|
port: z.number().int().min(1024).max(65535).default(8080)
|
|
60
|
-
}).default({
|
|
75
|
+
}).default({
|
|
76
|
+
port: 8080
|
|
77
|
+
})
|
|
61
78
|
});
|
|
62
79
|
function loadDebugConfig(configPath) {
|
|
63
80
|
if (!fs.existsSync(configPath)) {
|
|
@@ -67,7 +84,7 @@ function loadDebugConfig(configPath) {
|
|
|
67
84
|
const raw = yaml.parse(content);
|
|
68
85
|
const result = debugConfigSchema.safeParse(raw);
|
|
69
86
|
if (!result.success) {
|
|
70
|
-
const errors = result.error.
|
|
87
|
+
const errors = result.error.issues.map((e) => ` - ${e.path.join(".")}: ${e.message}`).join("\n");
|
|
71
88
|
throw new Error(`Invalid configuration:
|
|
72
89
|
${errors}`);
|
|
73
90
|
}
|
|
@@ -81,7 +98,7 @@ function loadConfig(configPath) {
|
|
|
81
98
|
const raw = yaml.parse(content);
|
|
82
99
|
const result = runnerConfigSchema.safeParse(raw);
|
|
83
100
|
if (!result.success) {
|
|
84
|
-
const errors = result.error.
|
|
101
|
+
const errors = result.error.issues.map((e) => ` - ${e.path.join(".")}: ${e.message}`).join("\n");
|
|
85
102
|
throw new Error(`Invalid configuration:
|
|
86
103
|
${errors}`);
|
|
87
104
|
}
|
|
@@ -5897,6 +5914,13 @@ var metricDataSchema = z8.object({
|
|
|
5897
5914
|
disk_used: z8.number(),
|
|
5898
5915
|
disk_total: z8.number()
|
|
5899
5916
|
});
|
|
5917
|
+
var sandboxOperationSchema = z8.object({
|
|
5918
|
+
ts: z8.string(),
|
|
5919
|
+
action_type: z8.string(),
|
|
5920
|
+
duration_ms: z8.number(),
|
|
5921
|
+
success: z8.boolean(),
|
|
5922
|
+
error: z8.string().optional()
|
|
5923
|
+
});
|
|
5900
5924
|
var networkLogSchema = z8.object({
|
|
5901
5925
|
timestamp: z8.string(),
|
|
5902
5926
|
// Common fields (all modes)
|
|
@@ -5916,7 +5940,7 @@ var networkLogSchema = z8.object({
|
|
|
5916
5940
|
var webhookTelemetryContract = c5.router({
|
|
5917
5941
|
/**
|
|
5918
5942
|
* POST /api/webhooks/agent/telemetry
|
|
5919
|
-
* Receive telemetry data (system log, metrics, and
|
|
5943
|
+
* Receive telemetry data (system log, metrics, network logs, and sandbox operations) from sandbox
|
|
5920
5944
|
*/
|
|
5921
5945
|
send: {
|
|
5922
5946
|
method: "POST",
|
|
@@ -5925,7 +5949,8 @@ var webhookTelemetryContract = c5.router({
|
|
|
5925
5949
|
runId: z8.string().min(1, "runId is required"),
|
|
5926
5950
|
systemLog: z8.string().optional(),
|
|
5927
5951
|
metrics: z8.array(metricDataSchema).optional(),
|
|
5928
|
-
networkLogs: z8.array(networkLogSchema).optional()
|
|
5952
|
+
networkLogs: z8.array(networkLogSchema).optional(),
|
|
5953
|
+
sandboxOperations: z8.array(sandboxOperationSchema).optional()
|
|
5929
5954
|
}),
|
|
5930
5955
|
responses: {
|
|
5931
5956
|
200: z8.object({
|
|
@@ -7124,6 +7149,10 @@ NETWORK_LOG_FILE = f"/tmp/vm0-network-{RUN_ID}.jsonl"
|
|
|
7124
7149
|
TELEMETRY_LOG_POS_FILE = f"/tmp/vm0-telemetry-log-pos-{RUN_ID}.txt"
|
|
7125
7150
|
TELEMETRY_METRICS_POS_FILE = f"/tmp/vm0-telemetry-metrics-pos-{RUN_ID}.txt"
|
|
7126
7151
|
TELEMETRY_NETWORK_POS_FILE = f"/tmp/vm0-telemetry-network-pos-{RUN_ID}.txt"
|
|
7152
|
+
TELEMETRY_SANDBOX_OPS_POS_FILE = f"/tmp/vm0-telemetry-sandbox-ops-pos-{RUN_ID}.txt"
|
|
7153
|
+
|
|
7154
|
+
# Sandbox operations log file (JSONL format)
|
|
7155
|
+
SANDBOX_OPS_LOG_FILE = f"/tmp/vm0-sandbox-ops-{RUN_ID}.jsonl"
|
|
7127
7156
|
|
|
7128
7157
|
# Metrics collection configuration
|
|
7129
7158
|
METRICS_INTERVAL = 5 # seconds
|
|
@@ -7137,6 +7166,36 @@ def validate_config() -> bool:
|
|
|
7137
7166
|
if not WORKING_DIR:
|
|
7138
7167
|
raise ValueError("VM0_WORKING_DIR is required but not set")
|
|
7139
7168
|
return True
|
|
7169
|
+
|
|
7170
|
+
def record_sandbox_op(
|
|
7171
|
+
action_type: str,
|
|
7172
|
+
duration_ms: int,
|
|
7173
|
+
success: bool,
|
|
7174
|
+
error: str = None
|
|
7175
|
+
) -> None:
|
|
7176
|
+
"""
|
|
7177
|
+
Record a sandbox operation to JSONL file for telemetry upload.
|
|
7178
|
+
|
|
7179
|
+
Args:
|
|
7180
|
+
action_type: Operation name (e.g., "init_total", "storage_download", "cli_execution")
|
|
7181
|
+
duration_ms: Duration in milliseconds
|
|
7182
|
+
success: Whether the operation succeeded
|
|
7183
|
+
error: Optional error message if failed
|
|
7184
|
+
"""
|
|
7185
|
+
from datetime import datetime, timezone
|
|
7186
|
+
import json
|
|
7187
|
+
|
|
7188
|
+
entry = {
|
|
7189
|
+
"ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z",
|
|
7190
|
+
"action_type": action_type,
|
|
7191
|
+
"duration_ms": duration_ms,
|
|
7192
|
+
"success": success,
|
|
7193
|
+
}
|
|
7194
|
+
if error:
|
|
7195
|
+
entry["error"] = error
|
|
7196
|
+
|
|
7197
|
+
with open(SANDBOX_OPS_LOG_FILE, "a") as f:
|
|
7198
|
+
f.write(json.dumps(entry) + "\\n")
|
|
7140
7199
|
`;
|
|
7141
7200
|
|
|
7142
7201
|
// ../../packages/core/src/sandbox/scripts/lib/log.py.ts
|
|
@@ -7575,10 +7634,11 @@ import hashlib
|
|
|
7575
7634
|
import tarfile
|
|
7576
7635
|
import tempfile
|
|
7577
7636
|
import shutil
|
|
7637
|
+
import time
|
|
7578
7638
|
from typing import Optional, Dict, Any, List
|
|
7579
7639
|
from datetime import datetime
|
|
7580
7640
|
|
|
7581
|
-
from common import RUN_ID, STORAGE_PREPARE_URL, STORAGE_COMMIT_URL
|
|
7641
|
+
from common import RUN_ID, STORAGE_PREPARE_URL, STORAGE_COMMIT_URL, record_sandbox_op
|
|
7582
7642
|
from log import log_info, log_warn, log_error, log_debug
|
|
7583
7643
|
from http_client import http_post_json, http_put_presigned
|
|
7584
7644
|
|
|
@@ -7719,7 +7779,9 @@ def create_direct_upload_snapshot(
|
|
|
7719
7779
|
|
|
7720
7780
|
# Step 1: Collect file metadata
|
|
7721
7781
|
log_info("Computing file hashes...")
|
|
7782
|
+
hash_start = time.time()
|
|
7722
7783
|
files = collect_file_metadata(mount_path)
|
|
7784
|
+
record_sandbox_op("artifact_hash_compute", int((time.time() - hash_start) * 1000), True)
|
|
7723
7785
|
log_info(f"Found {len(files)} files")
|
|
7724
7786
|
|
|
7725
7787
|
if not files:
|
|
@@ -7727,6 +7789,7 @@ def create_direct_upload_snapshot(
|
|
|
7727
7789
|
|
|
7728
7790
|
# Step 2: Call prepare endpoint
|
|
7729
7791
|
log_info("Calling prepare endpoint...")
|
|
7792
|
+
prepare_start = time.time()
|
|
7730
7793
|
prepare_payload = {
|
|
7731
7794
|
"storageName": storage_name,
|
|
7732
7795
|
"storageType": storage_type,
|
|
@@ -7738,12 +7801,15 @@ def create_direct_upload_snapshot(
|
|
|
7738
7801
|
prepare_response = http_post_json(STORAGE_PREPARE_URL, prepare_payload)
|
|
7739
7802
|
if not prepare_response:
|
|
7740
7803
|
log_error("Failed to call prepare endpoint")
|
|
7804
|
+
record_sandbox_op("artifact_prepare_api", int((time.time() - prepare_start) * 1000), False)
|
|
7741
7805
|
return None
|
|
7742
7806
|
|
|
7743
7807
|
version_id = prepare_response.get("versionId")
|
|
7744
7808
|
if not version_id:
|
|
7745
7809
|
log_error(f"Invalid prepare response: {prepare_response}")
|
|
7810
|
+
record_sandbox_op("artifact_prepare_api", int((time.time() - prepare_start) * 1000), False)
|
|
7746
7811
|
return None
|
|
7812
|
+
record_sandbox_op("artifact_prepare_api", int((time.time() - prepare_start) * 1000), True)
|
|
7747
7813
|
|
|
7748
7814
|
# Step 3: Check if version already exists (deduplication)
|
|
7749
7815
|
# Still call commit to update HEAD pointer (fixes #649)
|
|
@@ -7786,10 +7852,13 @@ def create_direct_upload_snapshot(
|
|
|
7786
7852
|
try:
|
|
7787
7853
|
# Create archive
|
|
7788
7854
|
log_info("Creating archive...")
|
|
7855
|
+
archive_start = time.time()
|
|
7789
7856
|
archive_path = os.path.join(temp_dir, "archive.tar.gz")
|
|
7790
7857
|
if not create_archive(mount_path, archive_path):
|
|
7791
7858
|
log_error("Failed to create archive")
|
|
7859
|
+
record_sandbox_op("artifact_archive_create", int((time.time() - archive_start) * 1000), False)
|
|
7792
7860
|
return None
|
|
7861
|
+
record_sandbox_op("artifact_archive_create", int((time.time() - archive_start) * 1000), True)
|
|
7793
7862
|
|
|
7794
7863
|
# Create manifest
|
|
7795
7864
|
log_info("Creating manifest...")
|
|
@@ -7800,12 +7869,14 @@ def create_direct_upload_snapshot(
|
|
|
7800
7869
|
|
|
7801
7870
|
# Upload archive to S3
|
|
7802
7871
|
log_info("Uploading archive to S3...")
|
|
7872
|
+
s3_upload_start = time.time()
|
|
7803
7873
|
if not http_put_presigned(
|
|
7804
7874
|
archive_info["presignedUrl"],
|
|
7805
7875
|
archive_path,
|
|
7806
7876
|
"application/gzip"
|
|
7807
7877
|
):
|
|
7808
7878
|
log_error("Failed to upload archive to S3")
|
|
7879
|
+
record_sandbox_op("artifact_s3_upload", int((time.time() - s3_upload_start) * 1000), False)
|
|
7809
7880
|
return None
|
|
7810
7881
|
|
|
7811
7882
|
# Upload manifest to S3
|
|
@@ -7816,10 +7887,13 @@ def create_direct_upload_snapshot(
|
|
|
7816
7887
|
"application/json"
|
|
7817
7888
|
):
|
|
7818
7889
|
log_error("Failed to upload manifest to S3")
|
|
7890
|
+
record_sandbox_op("artifact_s3_upload", int((time.time() - s3_upload_start) * 1000), False)
|
|
7819
7891
|
return None
|
|
7892
|
+
record_sandbox_op("artifact_s3_upload", int((time.time() - s3_upload_start) * 1000), True)
|
|
7820
7893
|
|
|
7821
7894
|
# Step 6: Call commit endpoint
|
|
7822
7895
|
log_info("Calling commit endpoint...")
|
|
7896
|
+
commit_start = time.time()
|
|
7823
7897
|
commit_payload = {
|
|
7824
7898
|
"storageName": storage_name,
|
|
7825
7899
|
"storageType": storage_type,
|
|
@@ -7834,11 +7908,14 @@ def create_direct_upload_snapshot(
|
|
|
7834
7908
|
commit_response = http_post_json(STORAGE_COMMIT_URL, commit_payload)
|
|
7835
7909
|
if not commit_response:
|
|
7836
7910
|
log_error("Failed to call commit endpoint")
|
|
7911
|
+
record_sandbox_op("artifact_commit_api", int((time.time() - commit_start) * 1000), False)
|
|
7837
7912
|
return None
|
|
7838
7913
|
|
|
7839
7914
|
if not commit_response.get("success"):
|
|
7840
7915
|
log_error(f"Commit failed: {commit_response}")
|
|
7916
|
+
record_sandbox_op("artifact_commit_api", int((time.time() - commit_start) * 1000), False)
|
|
7841
7917
|
return None
|
|
7918
|
+
record_sandbox_op("artifact_commit_api", int((time.time() - commit_start) * 1000), True)
|
|
7842
7919
|
|
|
7843
7920
|
log_info(f"Direct upload snapshot created: {version_id[:8]}")
|
|
7844
7921
|
return {"versionId": version_id}
|
|
@@ -7861,11 +7938,12 @@ import sys
|
|
|
7861
7938
|
import json
|
|
7862
7939
|
import tarfile
|
|
7863
7940
|
import tempfile
|
|
7941
|
+
import time
|
|
7864
7942
|
|
|
7865
7943
|
# Add lib to path for imports
|
|
7866
7944
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
7867
7945
|
|
|
7868
|
-
from common import validate_config
|
|
7946
|
+
from common import validate_config, record_sandbox_op
|
|
7869
7947
|
from log import log_info, log_error
|
|
7870
7948
|
from http_client import http_download
|
|
7871
7949
|
|
|
@@ -7945,13 +8023,21 @@ def main():
|
|
|
7945
8023
|
|
|
7946
8024
|
log_info(f"Found {storage_count} storages, artifact: {has_artifact}")
|
|
7947
8025
|
|
|
8026
|
+
# Track total download time
|
|
8027
|
+
download_total_start = time.time()
|
|
8028
|
+
download_success = True
|
|
8029
|
+
|
|
7948
8030
|
# Process storages
|
|
7949
8031
|
for storage in storages:
|
|
7950
8032
|
mount_path = storage.get("mountPath")
|
|
7951
8033
|
archive_url = storage.get("archiveUrl")
|
|
7952
8034
|
|
|
7953
8035
|
if archive_url and archive_url != "null":
|
|
7954
|
-
|
|
8036
|
+
storage_start = time.time()
|
|
8037
|
+
success = download_storage(mount_path, archive_url)
|
|
8038
|
+
record_sandbox_op("storage_download", int((time.time() - storage_start) * 1000), success)
|
|
8039
|
+
if not success:
|
|
8040
|
+
download_success = False
|
|
7955
8041
|
|
|
7956
8042
|
# Process artifact
|
|
7957
8043
|
if artifact:
|
|
@@ -7959,8 +8045,14 @@ def main():
|
|
|
7959
8045
|
artifact_url = artifact.get("archiveUrl")
|
|
7960
8046
|
|
|
7961
8047
|
if artifact_url and artifact_url != "null":
|
|
7962
|
-
|
|
7963
|
-
|
|
8048
|
+
artifact_start = time.time()
|
|
8049
|
+
success = download_storage(artifact_mount, artifact_url)
|
|
8050
|
+
record_sandbox_op("artifact_download", int((time.time() - artifact_start) * 1000), success)
|
|
8051
|
+
if not success:
|
|
8052
|
+
download_success = False
|
|
8053
|
+
|
|
8054
|
+
# Record total download time
|
|
8055
|
+
record_sandbox_op("download_total", int((time.time() - download_total_start) * 1000), download_success)
|
|
7964
8056
|
log_info("All storages downloaded successfully")
|
|
7965
8057
|
|
|
7966
8058
|
|
|
@@ -7977,12 +8069,14 @@ Uses direct S3 upload exclusively (no fallback to legacy methods).
|
|
|
7977
8069
|
"""
|
|
7978
8070
|
import os
|
|
7979
8071
|
import glob
|
|
8072
|
+
import time
|
|
7980
8073
|
from typing import Optional, Dict, Any
|
|
7981
8074
|
|
|
7982
8075
|
from common import (
|
|
7983
8076
|
RUN_ID, CHECKPOINT_URL,
|
|
7984
8077
|
SESSION_ID_FILE, SESSION_HISTORY_PATH_FILE,
|
|
7985
|
-
ARTIFACT_DRIVER, ARTIFACT_MOUNT_PATH, ARTIFACT_VOLUME_NAME
|
|
8078
|
+
ARTIFACT_DRIVER, ARTIFACT_MOUNT_PATH, ARTIFACT_VOLUME_NAME,
|
|
8079
|
+
record_sandbox_op
|
|
7986
8080
|
)
|
|
7987
8081
|
from log import log_info, log_error
|
|
7988
8082
|
from http_client import http_post_json
|
|
@@ -8038,19 +8132,27 @@ def create_checkpoint() -> bool:
|
|
|
8038
8132
|
Returns:
|
|
8039
8133
|
True on success, False on failure
|
|
8040
8134
|
"""
|
|
8135
|
+
checkpoint_start = time.time()
|
|
8041
8136
|
log_info("Creating checkpoint...")
|
|
8042
8137
|
|
|
8043
8138
|
# Read session ID from temp file
|
|
8139
|
+
session_id_start = time.time()
|
|
8044
8140
|
if not os.path.exists(SESSION_ID_FILE):
|
|
8045
8141
|
log_error("No session ID found, checkpoint creation failed")
|
|
8142
|
+
record_sandbox_op("session_id_read", int((time.time() - session_id_start) * 1000), False, "Session ID file not found")
|
|
8143
|
+
record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
|
|
8046
8144
|
return False
|
|
8047
8145
|
|
|
8048
8146
|
with open(SESSION_ID_FILE) as f:
|
|
8049
8147
|
cli_agent_session_id = f.read().strip()
|
|
8148
|
+
record_sandbox_op("session_id_read", int((time.time() - session_id_start) * 1000), True)
|
|
8050
8149
|
|
|
8051
8150
|
# Read session history path from temp file
|
|
8151
|
+
session_history_start = time.time()
|
|
8052
8152
|
if not os.path.exists(SESSION_HISTORY_PATH_FILE):
|
|
8053
8153
|
log_error("No session history path found, checkpoint creation failed")
|
|
8154
|
+
record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Session history path file not found")
|
|
8155
|
+
record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
|
|
8054
8156
|
return False
|
|
8055
8157
|
|
|
8056
8158
|
with open(SESSION_HISTORY_PATH_FILE) as f:
|
|
@@ -8061,6 +8163,8 @@ def create_checkpoint() -> bool:
|
|
|
8061
8163
|
parts = session_history_path_raw.split(":", 2)
|
|
8062
8164
|
if len(parts) != 3:
|
|
8063
8165
|
log_error(f"Invalid Codex search marker format: {session_history_path_raw}")
|
|
8166
|
+
record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Invalid Codex search marker")
|
|
8167
|
+
record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
|
|
8064
8168
|
return False
|
|
8065
8169
|
sessions_dir = parts[1]
|
|
8066
8170
|
codex_session_id = parts[2]
|
|
@@ -8068,6 +8172,8 @@ def create_checkpoint() -> bool:
|
|
|
8068
8172
|
session_history_path = find_codex_session_file(sessions_dir, codex_session_id)
|
|
8069
8173
|
if not session_history_path:
|
|
8070
8174
|
log_error(f"Could not find Codex session file for {codex_session_id} in {sessions_dir}")
|
|
8175
|
+
record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Codex session file not found")
|
|
8176
|
+
record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
|
|
8071
8177
|
return False
|
|
8072
8178
|
else:
|
|
8073
8179
|
session_history_path = session_history_path_raw
|
|
@@ -8075,6 +8181,8 @@ def create_checkpoint() -> bool:
|
|
|
8075
8181
|
# Check if session history file exists
|
|
8076
8182
|
if not os.path.exists(session_history_path):
|
|
8077
8183
|
log_error(f"Session history file not found at {session_history_path}, checkpoint creation failed")
|
|
8184
|
+
record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Session history file not found")
|
|
8185
|
+
record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
|
|
8078
8186
|
return False
|
|
8079
8187
|
|
|
8080
8188
|
# Read session history
|
|
@@ -8083,14 +8191,19 @@ def create_checkpoint() -> bool:
|
|
|
8083
8191
|
cli_agent_session_history = f.read()
|
|
8084
8192
|
except IOError as e:
|
|
8085
8193
|
log_error(f"Failed to read session history: {e}")
|
|
8194
|
+
record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, str(e))
|
|
8195
|
+
record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
|
|
8086
8196
|
return False
|
|
8087
8197
|
|
|
8088
8198
|
if not cli_agent_session_history.strip():
|
|
8089
8199
|
log_error("Session history is empty, checkpoint creation failed")
|
|
8200
|
+
record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), False, "Session history empty")
|
|
8201
|
+
record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
|
|
8090
8202
|
return False
|
|
8091
8203
|
|
|
8092
8204
|
line_count = len(cli_agent_session_history.strip().split("\\n"))
|
|
8093
8205
|
log_info(f"Session history loaded ({line_count} lines)")
|
|
8206
|
+
record_sandbox_op("session_history_read", int((time.time() - session_history_start) * 1000), True)
|
|
8094
8207
|
|
|
8095
8208
|
# CLI agent type (default to claude-code)
|
|
8096
8209
|
cli_agent_type = os.environ.get("CLI_AGENT_TYPE", "claude-code")
|
|
@@ -8104,6 +8217,7 @@ def create_checkpoint() -> bool:
|
|
|
8104
8217
|
|
|
8105
8218
|
if ARTIFACT_DRIVER != "vas":
|
|
8106
8219
|
log_error(f"Unknown artifact driver: {ARTIFACT_DRIVER} (only 'vas' is supported)")
|
|
8220
|
+
record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
|
|
8107
8221
|
return False
|
|
8108
8222
|
|
|
8109
8223
|
# VAS artifact: create snapshot using direct S3 upload (bypasses Vercel 4.5MB limit)
|
|
@@ -8120,12 +8234,14 @@ def create_checkpoint() -> bool:
|
|
|
8120
8234
|
|
|
8121
8235
|
if not snapshot:
|
|
8122
8236
|
log_error("Failed to create VAS snapshot for artifact")
|
|
8237
|
+
record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
|
|
8123
8238
|
return False
|
|
8124
8239
|
|
|
8125
8240
|
# Extract versionId from snapshot response
|
|
8126
8241
|
artifact_version = snapshot.get("versionId")
|
|
8127
8242
|
if not artifact_version:
|
|
8128
8243
|
log_error("Failed to extract versionId from snapshot")
|
|
8244
|
+
record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
|
|
8129
8245
|
return False
|
|
8130
8246
|
|
|
8131
8247
|
# Build artifact snapshot JSON with new format (artifactName + artifactVersion)
|
|
@@ -8153,6 +8269,7 @@ def create_checkpoint() -> bool:
|
|
|
8153
8269
|
checkpoint_payload["artifactSnapshot"] = artifact_snapshot
|
|
8154
8270
|
|
|
8155
8271
|
# Call checkpoint API
|
|
8272
|
+
api_call_start = time.time()
|
|
8156
8273
|
result = http_post_json(CHECKPOINT_URL, checkpoint_payload)
|
|
8157
8274
|
|
|
8158
8275
|
# Validate response contains checkpointId to confirm checkpoint was actually created
|
|
@@ -8160,9 +8277,13 @@ def create_checkpoint() -> bool:
|
|
|
8160
8277
|
if result and result.get("checkpointId"):
|
|
8161
8278
|
checkpoint_id = result.get("checkpointId")
|
|
8162
8279
|
log_info(f"Checkpoint created successfully: {checkpoint_id}")
|
|
8280
|
+
record_sandbox_op("checkpoint_api_call", int((time.time() - api_call_start) * 1000), True)
|
|
8281
|
+
record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), True)
|
|
8163
8282
|
return True
|
|
8164
8283
|
else:
|
|
8165
8284
|
log_error(f"Checkpoint API returned invalid response: {result}")
|
|
8285
|
+
record_sandbox_op("checkpoint_api_call", int((time.time() - api_call_start) * 1000), False, "Invalid API response")
|
|
8286
|
+
record_sandbox_op("checkpoint_total", int((time.time() - checkpoint_start) * 1000), False)
|
|
8166
8287
|
return False
|
|
8167
8288
|
`;
|
|
8168
8289
|
|
|
@@ -8556,8 +8677,9 @@ from typing import List, Dict, Any
|
|
|
8556
8677
|
|
|
8557
8678
|
from common import (
|
|
8558
8679
|
RUN_ID, TELEMETRY_URL, TELEMETRY_INTERVAL,
|
|
8559
|
-
SYSTEM_LOG_FILE, METRICS_LOG_FILE, NETWORK_LOG_FILE,
|
|
8560
|
-
TELEMETRY_LOG_POS_FILE, TELEMETRY_METRICS_POS_FILE, TELEMETRY_NETWORK_POS_FILE
|
|
8680
|
+
SYSTEM_LOG_FILE, METRICS_LOG_FILE, NETWORK_LOG_FILE, SANDBOX_OPS_LOG_FILE,
|
|
8681
|
+
TELEMETRY_LOG_POS_FILE, TELEMETRY_METRICS_POS_FILE, TELEMETRY_NETWORK_POS_FILE,
|
|
8682
|
+
TELEMETRY_SANDBOX_OPS_POS_FILE
|
|
8561
8683
|
)
|
|
8562
8684
|
from log import log_info, log_error, log_debug, log_warn
|
|
8563
8685
|
from http_client import http_post_json
|
|
@@ -8660,6 +8782,19 @@ def read_network_logs_from_position(pos_file: str) -> tuple[List[Dict[str, Any]]
|
|
|
8660
8782
|
return read_jsonl_from_position(NETWORK_LOG_FILE, pos_file)
|
|
8661
8783
|
|
|
8662
8784
|
|
|
8785
|
+
def read_sandbox_ops_from_position(pos_file: str) -> tuple[List[Dict[str, Any]], int]:
|
|
8786
|
+
"""
|
|
8787
|
+
Read new sandbox operations from JSONL file starting from last position.
|
|
8788
|
+
|
|
8789
|
+
Args:
|
|
8790
|
+
pos_file: Path to position tracking file
|
|
8791
|
+
|
|
8792
|
+
Returns:
|
|
8793
|
+
Tuple of (sandbox operations list, new_position)
|
|
8794
|
+
"""
|
|
8795
|
+
return read_jsonl_from_position(SANDBOX_OPS_LOG_FILE, pos_file)
|
|
8796
|
+
|
|
8797
|
+
|
|
8663
8798
|
def upload_telemetry() -> bool:
|
|
8664
8799
|
"""
|
|
8665
8800
|
Upload telemetry data to VM0 API.
|
|
@@ -8676,8 +8811,11 @@ def upload_telemetry() -> bool:
|
|
|
8676
8811
|
# Read new network logs
|
|
8677
8812
|
network_logs, network_pos = read_network_logs_from_position(TELEMETRY_NETWORK_POS_FILE)
|
|
8678
8813
|
|
|
8814
|
+
# Read new sandbox operations
|
|
8815
|
+
sandbox_ops, sandbox_ops_pos = read_sandbox_ops_from_position(TELEMETRY_SANDBOX_OPS_POS_FILE)
|
|
8816
|
+
|
|
8679
8817
|
# Skip if nothing new
|
|
8680
|
-
if not system_log and not metrics and not network_logs:
|
|
8818
|
+
if not system_log and not metrics and not network_logs and not sandbox_ops:
|
|
8681
8819
|
log_debug("No new telemetry data to upload")
|
|
8682
8820
|
return True
|
|
8683
8821
|
|
|
@@ -8691,10 +8829,11 @@ def upload_telemetry() -> bool:
|
|
|
8691
8829
|
"runId": RUN_ID,
|
|
8692
8830
|
"systemLog": masked_system_log,
|
|
8693
8831
|
"metrics": metrics, # Metrics don't contain secrets (just numbers)
|
|
8694
|
-
"networkLogs": masked_network_logs
|
|
8832
|
+
"networkLogs": masked_network_logs,
|
|
8833
|
+
"sandboxOperations": sandbox_ops # Sandbox ops don't contain secrets (just timing data)
|
|
8695
8834
|
}
|
|
8696
8835
|
|
|
8697
|
-
log_debug(f"Uploading telemetry: {len(system_log)} bytes log, {len(metrics)} metrics, {len(network_logs)} network logs")
|
|
8836
|
+
log_debug(f"Uploading telemetry: {len(system_log)} bytes log, {len(metrics)} metrics, {len(network_logs)} network logs, {len(sandbox_ops)} sandbox ops")
|
|
8698
8837
|
|
|
8699
8838
|
result = http_post_json(TELEMETRY_URL, payload, max_retries=1)
|
|
8700
8839
|
|
|
@@ -8703,6 +8842,7 @@ def upload_telemetry() -> bool:
|
|
|
8703
8842
|
save_position(TELEMETRY_LOG_POS_FILE, log_pos)
|
|
8704
8843
|
save_position(TELEMETRY_METRICS_POS_FILE, metrics_pos)
|
|
8705
8844
|
save_position(TELEMETRY_NETWORK_POS_FILE, network_pos)
|
|
8845
|
+
save_position(TELEMETRY_SANDBOX_OPS_POS_FILE, sandbox_ops_pos)
|
|
8706
8846
|
log_debug(f"Telemetry uploaded successfully: {result.get('id', 'unknown')}")
|
|
8707
8847
|
return True
|
|
8708
8848
|
else:
|
|
@@ -8940,7 +9080,7 @@ sys.path.insert(0, "/usr/local/bin/vm0-agent/lib")
|
|
|
8940
9080
|
from common import (
|
|
8941
9081
|
WORKING_DIR, PROMPT, RESUME_SESSION_ID, COMPLETE_URL, RUN_ID,
|
|
8942
9082
|
EVENT_ERROR_FLAG, HEARTBEAT_URL, HEARTBEAT_INTERVAL, AGENT_LOG_FILE,
|
|
8943
|
-
CLI_AGENT_TYPE, OPENAI_MODEL, validate_config
|
|
9083
|
+
CLI_AGENT_TYPE, OPENAI_MODEL, validate_config, record_sandbox_op
|
|
8944
9084
|
)
|
|
8945
9085
|
from log import log_info, log_error, log_warn
|
|
8946
9086
|
from events import send_event
|
|
@@ -8976,10 +9116,14 @@ def _cleanup(exit_code: int, error_message: str):
|
|
|
8976
9116
|
|
|
8977
9117
|
# Perform final telemetry upload before completion
|
|
8978
9118
|
# This ensures all remaining data is captured
|
|
9119
|
+
telemetry_start = time.time()
|
|
9120
|
+
telemetry_success = True
|
|
8979
9121
|
try:
|
|
8980
9122
|
final_telemetry_upload()
|
|
8981
9123
|
except Exception as e:
|
|
9124
|
+
telemetry_success = False
|
|
8982
9125
|
log_error(f"Final telemetry upload failed: {e}")
|
|
9126
|
+
record_sandbox_op("final_telemetry_upload", int((time.time() - telemetry_start) * 1000), telemetry_success)
|
|
8983
9127
|
|
|
8984
9128
|
# Always call complete API at the end
|
|
8985
9129
|
# This sends vm0_result (on success) or vm0_error (on failure) and kills the sandbox
|
|
@@ -8992,13 +9136,17 @@ def _cleanup(exit_code: int, error_message: str):
|
|
|
8992
9136
|
if error_message:
|
|
8993
9137
|
complete_payload["error"] = error_message
|
|
8994
9138
|
|
|
9139
|
+
complete_start = time.time()
|
|
9140
|
+
complete_success = False
|
|
8995
9141
|
try:
|
|
8996
9142
|
if http_post_json(COMPLETE_URL, complete_payload):
|
|
8997
9143
|
log_info("Complete API called successfully")
|
|
9144
|
+
complete_success = True
|
|
8998
9145
|
else:
|
|
8999
9146
|
log_error("Failed to call complete API (sandbox may not be cleaned up)")
|
|
9000
9147
|
except Exception as e:
|
|
9001
9148
|
log_error(f"Complete API call failed: {e}")
|
|
9149
|
+
record_sandbox_op("complete_api_call", int((time.time() - complete_start) * 1000), complete_success)
|
|
9002
9150
|
|
|
9003
9151
|
# Stop heartbeat thread
|
|
9004
9152
|
shutdown_event.set()
|
|
@@ -9030,25 +9178,36 @@ def _run() -> tuple[int, str]:
|
|
|
9030
9178
|
log_info(f"Working directory: {WORKING_DIR}")
|
|
9031
9179
|
|
|
9032
9180
|
# Start heartbeat thread
|
|
9181
|
+
heartbeat_start = time.time()
|
|
9033
9182
|
heartbeat_thread = threading.Thread(target=heartbeat_loop, daemon=True)
|
|
9034
9183
|
heartbeat_thread.start()
|
|
9035
9184
|
log_info("Heartbeat thread started")
|
|
9185
|
+
record_sandbox_op("heartbeat_start", int((time.time() - heartbeat_start) * 1000), True)
|
|
9036
9186
|
|
|
9037
9187
|
# Start metrics collector thread
|
|
9188
|
+
metrics_start = time.time()
|
|
9038
9189
|
start_metrics_collector(shutdown_event)
|
|
9039
9190
|
log_info("Metrics collector thread started")
|
|
9191
|
+
record_sandbox_op("metrics_collector_start", int((time.time() - metrics_start) * 1000), True)
|
|
9040
9192
|
|
|
9041
9193
|
# Start telemetry upload thread
|
|
9194
|
+
telemetry_start = time.time()
|
|
9042
9195
|
start_telemetry_upload(shutdown_event)
|
|
9043
9196
|
log_info("Telemetry upload thread started")
|
|
9197
|
+
record_sandbox_op("telemetry_upload_start", int((time.time() - telemetry_start) * 1000), True)
|
|
9044
9198
|
|
|
9045
9199
|
# Create and change to working directory - raises RuntimeError if fails
|
|
9046
9200
|
# Directory may not exist if no artifact/storage was downloaded (e.g., first run)
|
|
9201
|
+
working_dir_start = time.time()
|
|
9202
|
+
working_dir_success = True
|
|
9047
9203
|
try:
|
|
9048
9204
|
os.makedirs(WORKING_DIR, exist_ok=True)
|
|
9049
9205
|
os.chdir(WORKING_DIR)
|
|
9050
9206
|
except OSError as e:
|
|
9207
|
+
working_dir_success = False
|
|
9208
|
+
record_sandbox_op("working_dir_setup", int((time.time() - working_dir_start) * 1000), False, str(e))
|
|
9051
9209
|
raise RuntimeError(f"Failed to create/change to working directory: {WORKING_DIR} - {e}") from e
|
|
9210
|
+
record_sandbox_op("working_dir_setup", int((time.time() - working_dir_start) * 1000), working_dir_success)
|
|
9052
9211
|
|
|
9053
9212
|
# Set up Codex configuration if using Codex CLI
|
|
9054
9213
|
# Claude Code uses ~/.claude by default (no configuration needed)
|
|
@@ -9061,6 +9220,8 @@ def _run() -> tuple[int, str]:
|
|
|
9061
9220
|
log_info(f"Codex home directory: {codex_home}")
|
|
9062
9221
|
|
|
9063
9222
|
# Login with API key via stdin (recommended method)
|
|
9223
|
+
codex_login_start = time.time()
|
|
9224
|
+
codex_login_success = False
|
|
9064
9225
|
api_key = os.environ.get("OPENAI_API_KEY", "")
|
|
9065
9226
|
if api_key:
|
|
9066
9227
|
result = subprocess.run(
|
|
@@ -9071,13 +9232,16 @@ def _run() -> tuple[int, str]:
|
|
|
9071
9232
|
)
|
|
9072
9233
|
if result.returncode == 0:
|
|
9073
9234
|
log_info("Codex authenticated with API key")
|
|
9235
|
+
codex_login_success = True
|
|
9074
9236
|
else:
|
|
9075
9237
|
log_error(f"Codex login failed: {result.stderr}")
|
|
9076
9238
|
else:
|
|
9077
9239
|
log_error("OPENAI_API_KEY not set")
|
|
9240
|
+
record_sandbox_op("codex_login", int((time.time() - codex_login_start) * 1000), codex_login_success)
|
|
9078
9241
|
|
|
9079
|
-
|
|
9080
|
-
|
|
9242
|
+
init_duration_ms = int((time.time() - init_start_time) * 1000)
|
|
9243
|
+
record_sandbox_op("init_total", init_duration_ms, True)
|
|
9244
|
+
log_info(f"\u2713 Initialization complete ({init_duration_ms // 1000}s)")
|
|
9081
9245
|
|
|
9082
9246
|
# Lifecycle: Execution
|
|
9083
9247
|
log_info("\u25B7 Execution")
|
|
@@ -9241,12 +9405,13 @@ def _run() -> tuple[int, str]:
|
|
|
9241
9405
|
final_exit_code = 1
|
|
9242
9406
|
error_message = "Some events failed to send"
|
|
9243
9407
|
|
|
9244
|
-
# Log execution result
|
|
9245
|
-
|
|
9408
|
+
# Log execution result and record metric
|
|
9409
|
+
exec_duration_ms = int((time.time() - exec_start_time) * 1000)
|
|
9410
|
+
record_sandbox_op("cli_execution", exec_duration_ms, agent_exit_code == 0)
|
|
9246
9411
|
if agent_exit_code == 0 and final_exit_code == 0:
|
|
9247
|
-
log_info(f"\u2713 Execution complete ({
|
|
9412
|
+
log_info(f"\u2713 Execution complete ({exec_duration_ms // 1000}s)")
|
|
9248
9413
|
else:
|
|
9249
|
-
log_info(f"\u2717 Execution failed ({
|
|
9414
|
+
log_info(f"\u2717 Execution failed ({exec_duration_ms // 1000}s)")
|
|
9250
9415
|
|
|
9251
9416
|
# Handle completion
|
|
9252
9417
|
if agent_exit_code == 0 and final_exit_code == 0:
|
|
@@ -10230,7 +10395,7 @@ function initMetrics(config) {
|
|
|
10230
10395
|
url: "https://api.axiom.co/v1/metrics",
|
|
10231
10396
|
headers: {
|
|
10232
10397
|
Authorization: `Bearer ${config.axiomToken}`,
|
|
10233
|
-
"X-Axiom-Dataset": `
|
|
10398
|
+
"X-Axiom-Dataset": `vm0-sandbox-op-log-${env}`
|
|
10234
10399
|
}
|
|
10235
10400
|
});
|
|
10236
10401
|
meterProvider = new MeterProvider({
|
|
@@ -10700,6 +10865,42 @@ async function executeJob(context, config, options = {}) {
|
|
|
10700
10865
|
completed = true;
|
|
10701
10866
|
break;
|
|
10702
10867
|
}
|
|
10868
|
+
if (!options.benchmarkMode) {
|
|
10869
|
+
const processCheck = await ssh.exec(
|
|
10870
|
+
`pgrep -f "env-loader.py" > /dev/null 2>&1 && echo "RUNNING" || echo "DEAD"`
|
|
10871
|
+
);
|
|
10872
|
+
if (processCheck.stdout.trim() === "DEAD") {
|
|
10873
|
+
log(
|
|
10874
|
+
`[Executor] Agent process died unexpectedly without writing exit code`
|
|
10875
|
+
);
|
|
10876
|
+
const logContent = await ssh.exec(
|
|
10877
|
+
`tail -50 ${systemLogFile} 2>/dev/null`
|
|
10878
|
+
);
|
|
10879
|
+
const dmesgCheck = await ssh.exec(
|
|
10880
|
+
`dmesg | tail -20 | grep -iE "killed|oom" 2>/dev/null`
|
|
10881
|
+
);
|
|
10882
|
+
let errorMsg = "Agent process terminated unexpectedly";
|
|
10883
|
+
if (dmesgCheck.stdout.toLowerCase().includes("oom") || dmesgCheck.stdout.toLowerCase().includes("killed")) {
|
|
10884
|
+
errorMsg = "Agent process killed by OOM killer";
|
|
10885
|
+
log(`[Executor] OOM detected: ${dmesgCheck.stdout}`);
|
|
10886
|
+
}
|
|
10887
|
+
if (logContent.stdout) {
|
|
10888
|
+
log(
|
|
10889
|
+
`[Executor] Last log output: ${logContent.stdout.substring(0, 500)}`
|
|
10890
|
+
);
|
|
10891
|
+
}
|
|
10892
|
+
const durationMs2 = Date.now() - startTime;
|
|
10893
|
+
recordRunnerOperation({
|
|
10894
|
+
actionType: "agent_execute",
|
|
10895
|
+
durationMs: durationMs2,
|
|
10896
|
+
success: false
|
|
10897
|
+
});
|
|
10898
|
+
return {
|
|
10899
|
+
exitCode: 1,
|
|
10900
|
+
error: errorMsg
|
|
10901
|
+
};
|
|
10902
|
+
}
|
|
10903
|
+
}
|
|
10703
10904
|
}
|
|
10704
10905
|
const durationMs = Date.now() - startTime;
|
|
10705
10906
|
const duration = Math.round(durationMs / 1e3);
|
|
@@ -11099,7 +11300,7 @@ var benchmarkCommand = new Command3("benchmark").description(
|
|
|
11099
11300
|
});
|
|
11100
11301
|
|
|
11101
11302
|
// src/index.ts
|
|
11102
|
-
var version = true ? "2.8.
|
|
11303
|
+
var version = true ? "2.8.3" : "0.1.0";
|
|
11103
11304
|
program.name("vm0-runner").version(version).description("Self-hosted runner for VM0 agents");
|
|
11104
11305
|
program.addCommand(startCommand);
|
|
11105
11306
|
program.addCommand(statusCommand);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vm0/runner",
|
|
3
|
-
"version": "2.8.
|
|
3
|
+
"version": "2.8.3",
|
|
4
4
|
"description": "Self-hosted runner for VM0 agents",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -22,6 +22,6 @@
|
|
|
22
22
|
"@opentelemetry/semantic-conventions": "^1.25.0",
|
|
23
23
|
"commander": "^14.0.0",
|
|
24
24
|
"yaml": "^2.3.4",
|
|
25
|
-
"zod": "^
|
|
25
|
+
"zod": "^4.1.12"
|
|
26
26
|
}
|
|
27
27
|
}
|