flywheel-bootstrap-staging 0.1.9.202601271835__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bootstrap/__init__.py +3 -0
- bootstrap/__main__.py +48 -0
- bootstrap/artifacts.py +101 -0
- bootstrap/config_loader.py +122 -0
- bootstrap/constants.py +20 -0
- bootstrap/git_ops.py +324 -0
- bootstrap/install.py +129 -0
- bootstrap/orchestrator.py +797 -0
- bootstrap/payload.py +119 -0
- bootstrap/prompts.py +79 -0
- bootstrap/py.typed +1 -0
- bootstrap/runner.py +145 -0
- bootstrap/telemetry.py +147 -0
- flywheel_bootstrap_staging-0.1.9.202601271835.dist-info/METADATA +94 -0
- flywheel_bootstrap_staging-0.1.9.202601271835.dist-info/RECORD +17 -0
- flywheel_bootstrap_staging-0.1.9.202601271835.dist-info/WHEEL +4 -0
- flywheel_bootstrap_staging-0.1.9.202601271835.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,797 @@
|
|
|
1
|
+
"""Top-down orchestration for the bootstrap flow.
|
|
2
|
+
|
|
3
|
+
Implementation is intentionally skeletal; individual steps will be filled in once
|
|
4
|
+
design details are finalized.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
import os
|
|
13
|
+
import sys
|
|
14
|
+
import threading
|
|
15
|
+
import json
|
|
16
|
+
import shutil
|
|
17
|
+
|
|
18
|
+
from bootstrap.constants import (
|
|
19
|
+
DEFAULT_ARTIFACT_MANIFEST,
|
|
20
|
+
DEFAULT_RUN_ROOT,
|
|
21
|
+
DEFAULT_SERVER_URL,
|
|
22
|
+
ENV_RUN_ID,
|
|
23
|
+
ENV_RUN_TOKEN,
|
|
24
|
+
ENV_SERVER_URL,
|
|
25
|
+
HEARTBEAT_INTERVAL_SECONDS,
|
|
26
|
+
MAX_ARTIFACT_RETRIES,
|
|
27
|
+
)
|
|
28
|
+
from bootstrap.config_loader import UserConfig, load_codex_config
|
|
29
|
+
from bootstrap.git_ops import GitConfig, initialize_repo, finalize_repo
|
|
30
|
+
from bootstrap.install import codex_login_status_ok, codex_on_path, ensure_codex
|
|
31
|
+
from bootstrap.payload import BootstrapPayload, fetch_bootstrap_payload
|
|
32
|
+
from bootstrap.prompts import build_prompt_text
|
|
33
|
+
from bootstrap.runner import (
|
|
34
|
+
CodexEvent,
|
|
35
|
+
build_invocation,
|
|
36
|
+
run_and_stream,
|
|
37
|
+
)
|
|
38
|
+
from bootstrap.telemetry import (
|
|
39
|
+
post_artifacts,
|
|
40
|
+
post_completion,
|
|
41
|
+
post_error,
|
|
42
|
+
post_heartbeat,
|
|
43
|
+
post_log,
|
|
44
|
+
)
|
|
45
|
+
from bootstrap.artifacts import ManifestResult, ManifestStatus, read_manifest
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class BootstrapConfig:
|
|
50
|
+
"""User-supplied CLI arguments plus derived defaults."""
|
|
51
|
+
|
|
52
|
+
run_id: str
|
|
53
|
+
capability_token: str
|
|
54
|
+
config_path: Path
|
|
55
|
+
server_url: str = DEFAULT_SERVER_URL
|
|
56
|
+
run_root: Path = DEFAULT_RUN_ROOT
|
|
57
|
+
artifact_manifest: str = DEFAULT_ARTIFACT_MANIFEST
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class BootstrapOrchestrator:
|
|
61
|
+
"""Coordinates install, payload fetch, Codex launch, telemetry, and artifacts."""
|
|
62
|
+
|
|
63
|
+
def __init__(self, config: BootstrapConfig) -> None:
|
|
64
|
+
self.config = config
|
|
65
|
+
self._mock_codex = bool(os.environ.get("BOOTSTRAP_MOCK_CODEX"))
|
|
66
|
+
self.user_config: UserConfig | None = None
|
|
67
|
+
self.bootstrap_payload: BootstrapPayload | None = None
|
|
68
|
+
self.workspace: Path | None = None
|
|
69
|
+
self.codex_executable: Path | None = None
|
|
70
|
+
self.codex_run_id: str | None = None
|
|
71
|
+
self.heartbeat_thread: threading.Thread | None = None
|
|
72
|
+
self._stop_heartbeats = threading.Event()
|
|
73
|
+
self.last_stderr: str = "" # Captured stderr for error reporting
|
|
74
|
+
self.git_config: GitConfig | None = None # Git config for code persistence
|
|
75
|
+
|
|
76
|
+
def run(self) -> int:
|
|
77
|
+
"""Execute the bootstrap flow.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Process exit code (0 for success, non-zero for failure).
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
self._ensure_prerequisites()
|
|
85
|
+
self._load_user_config()
|
|
86
|
+
self._resolve_workspace()
|
|
87
|
+
self._ensure_codex_available()
|
|
88
|
+
self._fetch_bootstrap_payload()
|
|
89
|
+
self._initialize_git_repo() # Clone repo if code persistence enabled
|
|
90
|
+
exit_code = self._launch_codex_and_stream()
|
|
91
|
+
self._finalize_git_repo(
|
|
92
|
+
exit_code
|
|
93
|
+
) # Commit and push if code persistence enabled
|
|
94
|
+
self._collect_and_post_artifacts(exit_code)
|
|
95
|
+
return 0
|
|
96
|
+
except SystemExit:
|
|
97
|
+
raise
|
|
98
|
+
except Exception as exc: # pragma: no cover - defensive
|
|
99
|
+
post_error(
|
|
100
|
+
self.config.server_url,
|
|
101
|
+
self.config.run_id,
|
|
102
|
+
self.config.capability_token,
|
|
103
|
+
reason=repr(exc),
|
|
104
|
+
summary="bootstrap failure",
|
|
105
|
+
)
|
|
106
|
+
print(f"bootstrap failed: {exc}", file=sys.stderr)
|
|
107
|
+
return 1
|
|
108
|
+
finally:
|
|
109
|
+
self._stop_heartbeats.set()
|
|
110
|
+
if self.heartbeat_thread and self.heartbeat_thread.is_alive():
|
|
111
|
+
self.heartbeat_thread.join(timeout=2)
|
|
112
|
+
|
|
113
|
+
# --- individual steps (to be implemented) ---
|
|
114
|
+
|
|
115
|
+
def _ensure_prerequisites(self) -> None:
|
|
116
|
+
"""Validate required binaries/env vars and fail fast if missing."""
|
|
117
|
+
if not self.config.run_id:
|
|
118
|
+
raise SystemExit("missing run id")
|
|
119
|
+
if not self.config.capability_token:
|
|
120
|
+
raise SystemExit("missing capability token")
|
|
121
|
+
if not self.config.config_path.exists():
|
|
122
|
+
raise SystemExit(f"config file not found: {self.config.config_path}")
|
|
123
|
+
|
|
124
|
+
def _load_user_config(self) -> None:
|
|
125
|
+
"""Read the user's Codex config.toml for sandbox/workspace settings."""
|
|
126
|
+
self.user_config = load_codex_config(self.config.config_path)
|
|
127
|
+
for warning in self.user_config.warnings:
|
|
128
|
+
print(f"bootstrap warning: {warning}", file=sys.stderr)
|
|
129
|
+
|
|
130
|
+
def _resolve_workspace(self) -> None:
|
|
131
|
+
"""Decide which working directory to hand to Codex (respect user config if set)."""
|
|
132
|
+
assert self.user_config is not None
|
|
133
|
+
if self.user_config.working_dir:
|
|
134
|
+
# Support the documented "<run_id>" placeholder in config paths.
|
|
135
|
+
raw = str(self.user_config.working_dir)
|
|
136
|
+
if "<run_id>" in raw:
|
|
137
|
+
raw = raw.replace("<run_id>", self.config.run_id)
|
|
138
|
+
workdir = Path(raw).expanduser().resolve()
|
|
139
|
+
else:
|
|
140
|
+
workdir = self.config.run_root / self.config.run_id
|
|
141
|
+
workdir.mkdir(parents=True, exist_ok=True)
|
|
142
|
+
|
|
143
|
+
# Auto-create writable_roots directories if they don't exist
|
|
144
|
+
for root in self.user_config.writable_roots:
|
|
145
|
+
root.mkdir(parents=True, exist_ok=True)
|
|
146
|
+
|
|
147
|
+
manifest_path = workdir / self.config.artifact_manifest
|
|
148
|
+
if self.user_config.writable_roots:
|
|
149
|
+
ok = False
|
|
150
|
+
for root in self.user_config.writable_roots:
|
|
151
|
+
try:
|
|
152
|
+
manifest_path.relative_to(root)
|
|
153
|
+
ok = True
|
|
154
|
+
break
|
|
155
|
+
except ValueError:
|
|
156
|
+
continue
|
|
157
|
+
if not ok:
|
|
158
|
+
roots = ", ".join(str(r) for r in self.user_config.writable_roots)
|
|
159
|
+
raise SystemExit(
|
|
160
|
+
f"manifest path {manifest_path} not in sandbox writable_roots ({roots}); "
|
|
161
|
+
"please add a writable root or adjust config"
|
|
162
|
+
)
|
|
163
|
+
self.workspace = workdir
|
|
164
|
+
|
|
165
|
+
def _ensure_codex_available(self) -> None:
|
|
166
|
+
"""Skip install if present; otherwise download tarball and prepend to PATH."""
|
|
167
|
+
if self._mock_codex:
|
|
168
|
+
return
|
|
169
|
+
codex_path: Path | None = None
|
|
170
|
+
if codex_on_path():
|
|
171
|
+
# On Windows, explicitly look for codex.exe to avoid conflicts
|
|
172
|
+
codex_name = "codex.exe" if sys.platform == "win32" else "codex"
|
|
173
|
+
found = shutil.which(codex_name)
|
|
174
|
+
print(
|
|
175
|
+
f"[bootstrap] shutil.which({codex_name!r}) = {found}", file=sys.stderr
|
|
176
|
+
)
|
|
177
|
+
if found:
|
|
178
|
+
codex_path = Path(found)
|
|
179
|
+
self.codex_executable = codex_path
|
|
180
|
+
else:
|
|
181
|
+
download_dir = self.workspace or self.config.run_root
|
|
182
|
+
self.codex_executable = ensure_codex(download_dir=download_dir)
|
|
183
|
+
codex_path = self.codex_executable
|
|
184
|
+
|
|
185
|
+
if codex_path is None:
|
|
186
|
+
codex_path = Path("codex")
|
|
187
|
+
print(f"[bootstrap] using codex at: {codex_path}", file=sys.stderr)
|
|
188
|
+
self._ensure_codex_authenticated(codex_path)
|
|
189
|
+
|
|
190
|
+
def _fetch_bootstrap_payload(self) -> None:
|
|
191
|
+
"""Call backend /runs/{id}/bootstrap to get the task prompt."""
|
|
192
|
+
self.bootstrap_payload = fetch_bootstrap_payload(
|
|
193
|
+
self.config.server_url, self.config.run_id, self.config.capability_token
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
def _initialize_git_repo(self) -> None:
|
|
197
|
+
"""Initialize git repository if code persistence is enabled.
|
|
198
|
+
|
|
199
|
+
If the bootstrap payload contains a repo_context and github_token,
|
|
200
|
+
we clone the repository and set up the experiment branch.
|
|
201
|
+
"""
|
|
202
|
+
assert self.bootstrap_payload is not None
|
|
203
|
+
assert self.workspace is not None
|
|
204
|
+
|
|
205
|
+
repo_context = self.bootstrap_payload.repo_context
|
|
206
|
+
github_token = self.bootstrap_payload.github_token
|
|
207
|
+
|
|
208
|
+
if repo_context is None or github_token is None:
|
|
209
|
+
self._log("Git: Code persistence not configured, skipping repo setup")
|
|
210
|
+
return
|
|
211
|
+
|
|
212
|
+
self._log(
|
|
213
|
+
f"Git: Initializing code persistence for {repo_context.repo_owner}/{repo_context.repo_name}"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Create git config with telemetry logging
|
|
217
|
+
def log_fn(level: str, message: str) -> None:
|
|
218
|
+
self._log(f"Git: {message}", level=level)
|
|
219
|
+
|
|
220
|
+
self.git_config = GitConfig(
|
|
221
|
+
workspace=self.workspace,
|
|
222
|
+
repo_context=repo_context,
|
|
223
|
+
github_token=github_token,
|
|
224
|
+
log_fn=log_fn,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Initialize the repository (clone, credentials, branch)
|
|
228
|
+
if not initialize_repo(self.git_config):
|
|
229
|
+
self._log(
|
|
230
|
+
"Git: Failed to initialize repository, continuing without code persistence",
|
|
231
|
+
level="warning",
|
|
232
|
+
)
|
|
233
|
+
self.git_config = None
|
|
234
|
+
return
|
|
235
|
+
|
|
236
|
+
self._log(
|
|
237
|
+
f"Git: Repository initialized, working on branch {repo_context.branch_name}"
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
def _finalize_git_repo(self, exit_code: int) -> None:
|
|
241
|
+
"""Finalize git repository after codex completes.
|
|
242
|
+
|
|
243
|
+
Commits any changes and pushes them to the remote.
|
|
244
|
+
Only runs if code persistence was successfully initialized.
|
|
245
|
+
"""
|
|
246
|
+
if self.git_config is None:
|
|
247
|
+
return
|
|
248
|
+
|
|
249
|
+
if exit_code != 0:
|
|
250
|
+
self._log(
|
|
251
|
+
f"Git: Codex exited with code {exit_code}, skipping push",
|
|
252
|
+
level="warning",
|
|
253
|
+
)
|
|
254
|
+
# Still commit changes so they're not lost
|
|
255
|
+
from bootstrap.git_ops import commit_changes
|
|
256
|
+
|
|
257
|
+
commit_changes(
|
|
258
|
+
self.git_config,
|
|
259
|
+
f"[WIP] Flywheel experiment run (failed): {self.config.run_id}",
|
|
260
|
+
)
|
|
261
|
+
return
|
|
262
|
+
|
|
263
|
+
self._log("Git: Finalizing repository, committing and pushing changes")
|
|
264
|
+
|
|
265
|
+
if finalize_repo(self.git_config, self.config.run_id):
|
|
266
|
+
self._log("Git: Changes pushed successfully")
|
|
267
|
+
else:
|
|
268
|
+
self._log("Git: Failed to push changes", level="error")
|
|
269
|
+
|
|
270
|
+
def _ensure_codex_authenticated(self, codex_path: Path) -> None:
|
|
271
|
+
"""Fail fast if codex is present but not logged in."""
|
|
272
|
+
if codex_login_status_ok(codex_path):
|
|
273
|
+
return
|
|
274
|
+
raise SystemExit(
|
|
275
|
+
"Codex isn't authenticated. Run `codex login` (browser/device flow) or "
|
|
276
|
+
"`printenv OPENAI_API_KEY | codex login --with-api-key` then rerun the bootstrap."
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
def _launch_codex_and_stream(self) -> int:
|
|
280
|
+
"""Run codex exec --json, forward logs/heartbeats, and capture exit status."""
|
|
281
|
+
assert self.workspace is not None
|
|
282
|
+
assert self.bootstrap_payload is not None
|
|
283
|
+
assert self.user_config is not None
|
|
284
|
+
|
|
285
|
+
prompt_text = build_prompt_text(
|
|
286
|
+
server_prompt=self.bootstrap_payload.prompt,
|
|
287
|
+
workspace_instructions=self.user_config.workspace_instructions,
|
|
288
|
+
artifact_manifest=self.config.artifact_manifest,
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
if self._mock_codex:
|
|
292
|
+
# Fast-path: emit one heartbeat, a couple logs, a run id, and exit 0.
|
|
293
|
+
post_heartbeat(
|
|
294
|
+
self.config.server_url,
|
|
295
|
+
self.config.run_id,
|
|
296
|
+
self.config.capability_token,
|
|
297
|
+
summary="alive (mock)",
|
|
298
|
+
)
|
|
299
|
+
for event in self._mock_codex_events():
|
|
300
|
+
self._handle_event(event)
|
|
301
|
+
self._write_mock_manifest()
|
|
302
|
+
return 0
|
|
303
|
+
|
|
304
|
+
codex_path = self.codex_executable or Path("codex")
|
|
305
|
+
env = os.environ.copy()
|
|
306
|
+
env.update(
|
|
307
|
+
{
|
|
308
|
+
"FLYWHEEL_RUN_ID": self.config.run_id,
|
|
309
|
+
"FLYWHEEL_RUN_TOKEN": self.config.capability_token,
|
|
310
|
+
"FLYWHEEL_SERVER": self.config.server_url,
|
|
311
|
+
"FLYWHEEL_WORKSPACE": str(self.workspace.resolve()),
|
|
312
|
+
}
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# Debug: show API key status
|
|
316
|
+
api_key = env.get("OPENAI_API_KEY", "")
|
|
317
|
+
if api_key:
|
|
318
|
+
print(
|
|
319
|
+
f"[bootstrap] OPENAI_API_KEY is set (starts with: {api_key[:10]}...)",
|
|
320
|
+
file=sys.stderr,
|
|
321
|
+
)
|
|
322
|
+
else:
|
|
323
|
+
print(
|
|
324
|
+
"[bootstrap] WARNING: OPENAI_API_KEY is NOT set in environment",
|
|
325
|
+
file=sys.stderr,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
# If using LM Studio, set the API base URL for Codex
|
|
329
|
+
if self.user_config.oss_provider == "lmstudio":
|
|
330
|
+
env["OPENAI_API_BASE"] = "http://localhost:1234/v1"
|
|
331
|
+
env["OPENAI_BASE_URL"] = "http://localhost:1234/v1"
|
|
332
|
+
# LM Studio doesn't validate API keys, but some tools require one to be set
|
|
333
|
+
if "OPENAI_API_KEY" not in env:
|
|
334
|
+
env["OPENAI_API_KEY"] = "lm-studio"
|
|
335
|
+
print(
|
|
336
|
+
"[bootstrap] Using LM Studio at http://localhost:1234/v1",
|
|
337
|
+
file=sys.stderr,
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
# Ensure Codex actually sees the same config file bootstrap parsed.
|
|
341
|
+
# Codex reads config from CODEX_HOME/config.toml; point it at a per-run copy.
|
|
342
|
+
try:
|
|
343
|
+
codex_home = self.workspace / ".codex_home"
|
|
344
|
+
codex_home.mkdir(parents=True, exist_ok=True)
|
|
345
|
+
|
|
346
|
+
# Copy config but expand ~ paths so codex sees absolute paths
|
|
347
|
+
config_text = self.config.config_path.read_text(encoding="utf-8")
|
|
348
|
+
# Expand common tilde patterns in the config
|
|
349
|
+
# Use forward slashes for cross-platform compatibility (works on Windows too)
|
|
350
|
+
# and avoids TOML interpreting backslashes as escape sequences
|
|
351
|
+
home_dir = str(Path.home()).replace("\\", "/")
|
|
352
|
+
config_text = config_text.replace('"~/', f'"{home_dir}/')
|
|
353
|
+
config_text = config_text.replace("'~/", f"'{home_dir}/")
|
|
354
|
+
(codex_home / "config.toml").write_text(config_text, encoding="utf-8")
|
|
355
|
+
|
|
356
|
+
# Also copy auth credentials from user's default codex home so the
|
|
357
|
+
# spawned codex process stays authenticated.
|
|
358
|
+
# TODO: more sophisticated auth
|
|
359
|
+
user_codex_home = Path.home() / ".codex"
|
|
360
|
+
user_auth = user_codex_home / "auth.json"
|
|
361
|
+
if user_auth.exists():
|
|
362
|
+
shutil.copyfile(user_auth, codex_home / "auth.json")
|
|
363
|
+
print(f"[bootstrap] Copied auth from {user_auth}", file=sys.stderr)
|
|
364
|
+
else:
|
|
365
|
+
print(
|
|
366
|
+
f"[bootstrap] WARNING: No auth.json found at {user_auth}",
|
|
367
|
+
file=sys.stderr,
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
env["CODEX_HOME"] = str(codex_home)
|
|
371
|
+
print(f"[bootstrap] CODEX_HOME set to: {codex_home}", file=sys.stderr)
|
|
372
|
+
except Exception as exc:
|
|
373
|
+
post_log(
|
|
374
|
+
self.config.server_url,
|
|
375
|
+
self.config.run_id,
|
|
376
|
+
self.config.capability_token,
|
|
377
|
+
level="warning",
|
|
378
|
+
message="failed to prepare CODEX_HOME config override; continuing",
|
|
379
|
+
extra={"error": repr(exc)},
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
# Pass sandbox settings as proper CLI flags
|
|
383
|
+
# For provisioned instances, we use --yolo to completely bypass sandbox and approvals
|
|
384
|
+
extra_flags: list[str] = []
|
|
385
|
+
if self.user_config.sandbox_mode:
|
|
386
|
+
if self.user_config.sandbox_mode == "danger-full-access":
|
|
387
|
+
# Use --yolo (--dangerously-bypass-approvals-and-sandbox) for unrestricted access
|
|
388
|
+
# This is safe on provisioned instances since they're isolated VMs
|
|
389
|
+
extra_flags.append("--yolo")
|
|
390
|
+
else:
|
|
391
|
+
extra_flags.extend(["--sandbox", self.user_config.sandbox_mode])
|
|
392
|
+
|
|
393
|
+
invocation = build_invocation(
|
|
394
|
+
codex_executable=Path(codex_path),
|
|
395
|
+
prompt=prompt_text,
|
|
396
|
+
workdir=self.workspace,
|
|
397
|
+
env=env,
|
|
398
|
+
extra_flags=tuple(extra_flags),
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
# Start heartbeat thread
|
|
402
|
+
self._stop_heartbeats.clear()
|
|
403
|
+
self.heartbeat_thread = threading.Thread(
|
|
404
|
+
target=self._heartbeat_loop, daemon=True
|
|
405
|
+
)
|
|
406
|
+
self.heartbeat_thread.start()
|
|
407
|
+
|
|
408
|
+
exit_code: int = 1
|
|
409
|
+
for event in run_and_stream(invocation):
|
|
410
|
+
self._handle_event(event)
|
|
411
|
+
if invocation.exit_code is not None:
|
|
412
|
+
exit_code = invocation.exit_code
|
|
413
|
+
# Capture stderr for error reporting
|
|
414
|
+
if invocation.stderr_output:
|
|
415
|
+
self.last_stderr = invocation.stderr_output
|
|
416
|
+
return exit_code
|
|
417
|
+
|
|
418
|
+
def _collect_and_post_artifacts(self, exit_code: int) -> None:
|
|
419
|
+
"""Read manifest (and optional resume attempts) then POST /artifacts/complete/error."""
|
|
420
|
+
assert self.workspace is not None
|
|
421
|
+
manifest_path = self.workspace / self.config.artifact_manifest
|
|
422
|
+
manifest_result, artifacts = self._load_artifacts_with_content(manifest_path)
|
|
423
|
+
|
|
424
|
+
# Auto-resume up to MAX_ARTIFACT_RETRIES times if artifacts are
|
|
425
|
+
# missing or the manifest was malformed.
|
|
426
|
+
retries = 0
|
|
427
|
+
while not artifacts and self.codex_run_id and retries < MAX_ARTIFACT_RETRIES:
|
|
428
|
+
retries += 1
|
|
429
|
+
self._attempt_artifact_retry(manifest_path, manifest_result)
|
|
430
|
+
manifest_result, artifacts = self._load_artifacts_with_content(
|
|
431
|
+
manifest_path
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
if artifacts:
|
|
435
|
+
post_artifacts(
|
|
436
|
+
self.config.server_url,
|
|
437
|
+
self.config.run_id,
|
|
438
|
+
self.config.capability_token,
|
|
439
|
+
artifacts,
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
if exit_code == 0:
|
|
443
|
+
summary = "codex run completed"
|
|
444
|
+
post_completion(
|
|
445
|
+
self.config.server_url,
|
|
446
|
+
self.config.run_id,
|
|
447
|
+
self.config.capability_token,
|
|
448
|
+
summary,
|
|
449
|
+
)
|
|
450
|
+
else:
|
|
451
|
+
# Include stderr in error reason for debugging
|
|
452
|
+
reason = f"codex exit code {exit_code}"
|
|
453
|
+
if self.last_stderr:
|
|
454
|
+
# Truncate stderr to avoid overly long error messages
|
|
455
|
+
stderr_preview = self.last_stderr[:2000]
|
|
456
|
+
if len(self.last_stderr) > 2000:
|
|
457
|
+
stderr_preview += "... (truncated)"
|
|
458
|
+
reason = f"{reason}\nstderr: {stderr_preview}"
|
|
459
|
+
post_error(
|
|
460
|
+
self.config.server_url,
|
|
461
|
+
self.config.run_id,
|
|
462
|
+
self.config.capability_token,
|
|
463
|
+
reason=reason,
|
|
464
|
+
summary="codex failed",
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
def _load_artifacts_with_content(
|
|
468
|
+
self, manifest_path: Path
|
|
469
|
+
) -> tuple[ManifestResult, list[dict[str, object]]]:
|
|
470
|
+
"""Load artifacts and inline content when a path is provided.
|
|
471
|
+
|
|
472
|
+
For text/html artifacts, if payload includes a "path" (or "file") inside the workspace,
|
|
473
|
+
read the file as UTF-8 and attach it as payload["content"].
|
|
474
|
+
For image artifacts, read as binary and create a base64 data URL.
|
|
475
|
+
This keeps artifacts self-contained for server-side rendering.
|
|
476
|
+
Best-effort; failures are logged and skipped.
|
|
477
|
+
|
|
478
|
+
Size limit: 2MB per artifact to prevent huge payloads.
|
|
479
|
+
|
|
480
|
+
Returns a tuple of (ManifestResult, enriched artifacts list).
|
|
481
|
+
"""
|
|
482
|
+
import base64
|
|
483
|
+
import mimetypes
|
|
484
|
+
|
|
485
|
+
MAX_ARTIFACT_SIZE = 25 * 1024 * 1024 # 25MB
|
|
486
|
+
|
|
487
|
+
assert self.workspace is not None
|
|
488
|
+
manifest_result = read_manifest(manifest_path)
|
|
489
|
+
artifacts = manifest_result.artifacts
|
|
490
|
+
enriched: list[dict[str, object]] = []
|
|
491
|
+
|
|
492
|
+
# Checkpoint file extensions (model weights, etc.)
|
|
493
|
+
checkpoint_extensions = {
|
|
494
|
+
".pt",
|
|
495
|
+
".pth",
|
|
496
|
+
".ckpt",
|
|
497
|
+
".safetensors",
|
|
498
|
+
".bin",
|
|
499
|
+
".h5",
|
|
500
|
+
".hdf5",
|
|
501
|
+
".pkl",
|
|
502
|
+
".pickle",
|
|
503
|
+
".joblib",
|
|
504
|
+
".npy",
|
|
505
|
+
".npz",
|
|
506
|
+
".onnx",
|
|
507
|
+
".pb",
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
for artifact in artifacts:
|
|
511
|
+
try:
|
|
512
|
+
artifact_type = str(artifact.get("artifact_type", "")).lower()
|
|
513
|
+
payload = artifact.get("payload", {})
|
|
514
|
+
|
|
515
|
+
# Detect checkpoint files and convert to 'checkpoint' type
|
|
516
|
+
if isinstance(payload, dict):
|
|
517
|
+
path_str = payload.get("path") or payload.get("file")
|
|
518
|
+
if isinstance(path_str, str) and path_str:
|
|
519
|
+
path_lower = path_str.lower()
|
|
520
|
+
if any(
|
|
521
|
+
path_lower.endswith(ext) for ext in checkpoint_extensions
|
|
522
|
+
):
|
|
523
|
+
# Convert to checkpoint type
|
|
524
|
+
artifact = dict(artifact)
|
|
525
|
+
artifact["artifact_type"] = "checkpoint"
|
|
526
|
+
artifact_type = "checkpoint"
|
|
527
|
+
# Add file size if we can resolve the path
|
|
528
|
+
resolved = self._resolve_artifact_path(path_str)
|
|
529
|
+
if resolved and resolved.is_file():
|
|
530
|
+
payload = dict(payload)
|
|
531
|
+
payload["size_bytes"] = resolved.stat().st_size
|
|
532
|
+
artifact["payload"] = payload
|
|
533
|
+
|
|
534
|
+
# Handle text and html artifacts - inline as content
|
|
535
|
+
if artifact_type in ("text", "html") and isinstance(payload, dict):
|
|
536
|
+
path_str = payload.get("path") or payload.get("file")
|
|
537
|
+
if isinstance(path_str, str) and path_str:
|
|
538
|
+
resolved = self._resolve_artifact_path(path_str)
|
|
539
|
+
if resolved and resolved.is_file():
|
|
540
|
+
file_size = resolved.stat().st_size
|
|
541
|
+
if file_size > MAX_ARTIFACT_SIZE:
|
|
542
|
+
payload = dict(payload)
|
|
543
|
+
payload["rendering_error"] = (
|
|
544
|
+
f"File too large ({file_size} bytes, max {MAX_ARTIFACT_SIZE})"
|
|
545
|
+
)
|
|
546
|
+
else:
|
|
547
|
+
try:
|
|
548
|
+
payload = dict(payload)
|
|
549
|
+
payload["content"] = resolved.read_text(
|
|
550
|
+
encoding="utf-8"
|
|
551
|
+
)
|
|
552
|
+
except UnicodeDecodeError:
|
|
553
|
+
self._log(
|
|
554
|
+
f"failed to read {artifact_type} artifact at {resolved} (encoding)",
|
|
555
|
+
level="warning",
|
|
556
|
+
)
|
|
557
|
+
artifact = dict(artifact)
|
|
558
|
+
artifact["payload"] = payload
|
|
559
|
+
|
|
560
|
+
# Handle image artifacts - inline as data_url
|
|
561
|
+
elif artifact_type == "image" and isinstance(payload, dict):
|
|
562
|
+
path_str = payload.get("path") or payload.get("file")
|
|
563
|
+
if isinstance(path_str, str) and path_str:
|
|
564
|
+
resolved = self._resolve_artifact_path(path_str)
|
|
565
|
+
if resolved and resolved.is_file():
|
|
566
|
+
file_size = resolved.stat().st_size
|
|
567
|
+
if file_size > MAX_ARTIFACT_SIZE:
|
|
568
|
+
payload = dict(payload)
|
|
569
|
+
payload["rendering_error"] = (
|
|
570
|
+
f"File too large ({file_size} bytes, max {MAX_ARTIFACT_SIZE})"
|
|
571
|
+
)
|
|
572
|
+
else:
|
|
573
|
+
try:
|
|
574
|
+
image_data = resolved.read_bytes()
|
|
575
|
+
mime_type, _ = mimetypes.guess_type(str(resolved))
|
|
576
|
+
if not mime_type:
|
|
577
|
+
mime_type = "image/png" # default fallback
|
|
578
|
+
b64 = base64.b64encode(image_data).decode("ascii")
|
|
579
|
+
payload = dict(payload)
|
|
580
|
+
payload["data_url"] = (
|
|
581
|
+
f"data:{mime_type};base64,{b64}"
|
|
582
|
+
)
|
|
583
|
+
except Exception as exc:
|
|
584
|
+
self._log(
|
|
585
|
+
f"failed to read image artifact at {resolved}: {exc}",
|
|
586
|
+
level="warning",
|
|
587
|
+
)
|
|
588
|
+
artifact = dict(artifact)
|
|
589
|
+
artifact["payload"] = payload
|
|
590
|
+
|
|
591
|
+
# Handle table artifacts - inline as content
|
|
592
|
+
elif artifact_type == "table" and isinstance(payload, dict):
|
|
593
|
+
path_str = payload.get("path") or payload.get("file")
|
|
594
|
+
if isinstance(path_str, str) and path_str:
|
|
595
|
+
resolved = self._resolve_artifact_path(path_str)
|
|
596
|
+
if resolved and resolved.is_file():
|
|
597
|
+
file_size = resolved.stat().st_size
|
|
598
|
+
if file_size > MAX_ARTIFACT_SIZE:
|
|
599
|
+
payload = dict(payload)
|
|
600
|
+
payload["rendering_error"] = (
|
|
601
|
+
f"File too large ({file_size} bytes, max {MAX_ARTIFACT_SIZE})"
|
|
602
|
+
)
|
|
603
|
+
else:
|
|
604
|
+
try:
|
|
605
|
+
payload = dict(payload)
|
|
606
|
+
payload["content"] = resolved.read_text(
|
|
607
|
+
encoding="utf-8"
|
|
608
|
+
)
|
|
609
|
+
except UnicodeDecodeError:
|
|
610
|
+
self._log(
|
|
611
|
+
f"failed to read table artifact at {resolved} (encoding)",
|
|
612
|
+
level="warning",
|
|
613
|
+
)
|
|
614
|
+
artifact = dict(artifact)
|
|
615
|
+
artifact["payload"] = payload
|
|
616
|
+
|
|
617
|
+
except Exception as exc: # pragma: no cover - defensive
|
|
618
|
+
self._log(f"artifact enrichment error: {exc}", level="warning")
|
|
619
|
+
enriched.append(dict(artifact))
|
|
620
|
+
return manifest_result, enriched
|
|
621
|
+
|
|
622
|
+
def _resolve_artifact_path(self, path_str: str) -> Path | None:
|
|
623
|
+
"""Resolve artifact path within workspace, returning None if invalid."""
|
|
624
|
+
assert self.workspace is not None
|
|
625
|
+
path = Path(path_str)
|
|
626
|
+
resolved = (self.workspace / path).resolve()
|
|
627
|
+
workspace_root = self.workspace.resolve()
|
|
628
|
+
if resolved == workspace_root or workspace_root in resolved.parents:
|
|
629
|
+
return resolved
|
|
630
|
+
self._log(f"skipping artifact outside workspace: {resolved}", level="warning")
|
|
631
|
+
return None
|
|
632
|
+
|
|
633
|
+
def _heartbeat_loop(self) -> None:
|
|
634
|
+
while not self._stop_heartbeats.is_set():
|
|
635
|
+
try:
|
|
636
|
+
post_heartbeat(
|
|
637
|
+
self.config.server_url,
|
|
638
|
+
self.config.run_id,
|
|
639
|
+
self.config.capability_token,
|
|
640
|
+
summary="alive",
|
|
641
|
+
)
|
|
642
|
+
except Exception as exc: # pragma: no cover - best effort
|
|
643
|
+
print(f"heartbeat failed: {exc}", file=sys.stderr)
|
|
644
|
+
self._stop_heartbeats.wait(HEARTBEAT_INTERVAL_SECONDS)
|
|
645
|
+
|
|
646
|
+
def _handle_event(self, event: CodexEvent) -> None:
|
|
647
|
+
post_log(
|
|
648
|
+
self.config.server_url,
|
|
649
|
+
self.config.run_id,
|
|
650
|
+
self.config.capability_token,
|
|
651
|
+
level="info",
|
|
652
|
+
message=str(event.raw),
|
|
653
|
+
extra={},
|
|
654
|
+
)
|
|
655
|
+
if isinstance(event.raw, dict):
|
|
656
|
+
run_id = event.raw.get("run_id")
|
|
657
|
+
if isinstance(run_id, str):
|
|
658
|
+
self.codex_run_id = run_id
|
|
659
|
+
|
|
660
|
+
def _attempt_artifact_retry(
|
|
661
|
+
self, manifest_path: Path, manifest_result: ManifestResult
|
|
662
|
+
) -> None:
|
|
663
|
+
"""Retry artifact collection via ``codex exec`` with a feedback prompt.
|
|
664
|
+
|
|
665
|
+
Both MISSING and MALFORMED manifests are handled by launching a new
|
|
666
|
+
Codex exec with a targeted prompt describing the problem and telling
|
|
667
|
+
Codex exactly what to do. This is preferable to ``codex resume``
|
|
668
|
+
which cannot accept additional instructions.
|
|
669
|
+
"""
|
|
670
|
+
if not self.codex_run_id:
|
|
671
|
+
return
|
|
672
|
+
|
|
673
|
+
manifest_name = self.config.artifact_manifest
|
|
674
|
+
|
|
675
|
+
if manifest_result.status == ManifestStatus.MALFORMED:
|
|
676
|
+
error_detail = manifest_result.error or "unknown error"
|
|
677
|
+
raw_content = ""
|
|
678
|
+
if manifest_path.exists():
|
|
679
|
+
try:
|
|
680
|
+
raw_content = manifest_path.read_text(encoding="utf-8")[:2000]
|
|
681
|
+
except Exception:
|
|
682
|
+
raw_content = "<could not read file>"
|
|
683
|
+
|
|
684
|
+
fix_prompt = (
|
|
685
|
+
"The artifact manifest file at "
|
|
686
|
+
f"$FLYWHEEL_WORKSPACE/{manifest_name} is malformed.\n\n"
|
|
687
|
+
f"Error: {error_detail}\n\n"
|
|
688
|
+
f"Current file contents:\n{raw_content}\n\n"
|
|
689
|
+
"Please rewrite this file so it is a valid JSON list of "
|
|
690
|
+
"artifact entries. Each entry must be an object with "
|
|
691
|
+
'"artifact_type" and "payload" keys. The file must be a '
|
|
692
|
+
"top-level JSON array, for example:\n"
|
|
693
|
+
"[\n"
|
|
694
|
+
' {"artifact_type": "text", "payload": {"content": "..."}},\n'
|
|
695
|
+
' {"artifact_type": "image", "payload": {"path": "plot.png",'
|
|
696
|
+
' "format": "png"}}\n'
|
|
697
|
+
"]\n\n"
|
|
698
|
+
"Do NOT wrap the list in an object. The file must start with "
|
|
699
|
+
"[ and end with ].\n"
|
|
700
|
+
"Only fix the manifest format — do not change the actual "
|
|
701
|
+
"artifact content or paths."
|
|
702
|
+
)
|
|
703
|
+
log_msg = "attempting codex exec to fix malformed artifact manifest"
|
|
704
|
+
else:
|
|
705
|
+
# MISSING — the file was never written.
|
|
706
|
+
fix_prompt = (
|
|
707
|
+
"The artifact manifest file was not found at "
|
|
708
|
+
f"$FLYWHEEL_WORKSPACE/{manifest_name}.\n\n"
|
|
709
|
+
"Your task already completed successfully, but the manifest "
|
|
710
|
+
"file is missing. Please write the manifest now.\n\n"
|
|
711
|
+
"The file must be a valid JSON list of artifact entries. "
|
|
712
|
+
'Each entry must be an object with "artifact_type" and '
|
|
713
|
+
'"payload" keys. The file must be a top-level JSON array, '
|
|
714
|
+
"for example:\n"
|
|
715
|
+
"[\n"
|
|
716
|
+
' {"artifact_type": "text", "payload": {"content": "..."}},\n'
|
|
717
|
+
' {"artifact_type": "image", "payload": {"path": "plot.png",'
|
|
718
|
+
' "format": "png"}}\n'
|
|
719
|
+
"]\n\n"
|
|
720
|
+
"Do NOT wrap the list in an object. The file must start with "
|
|
721
|
+
"[ and end with ].\n"
|
|
722
|
+
"Look at the files you produced in the workspace and create "
|
|
723
|
+
"the manifest based on what you find."
|
|
724
|
+
)
|
|
725
|
+
log_msg = "attempting codex exec to write missing artifact manifest"
|
|
726
|
+
|
|
727
|
+
self._log(
|
|
728
|
+
log_msg,
|
|
729
|
+
extra={
|
|
730
|
+
"status": manifest_result.status.value,
|
|
731
|
+
"error": manifest_result.error,
|
|
732
|
+
},
|
|
733
|
+
)
|
|
734
|
+
|
|
735
|
+
codex_path = self.codex_executable or Path("codex")
|
|
736
|
+
try:
|
|
737
|
+
invocation = build_invocation(
|
|
738
|
+
codex_executable=codex_path,
|
|
739
|
+
prompt=fix_prompt,
|
|
740
|
+
workdir=self.workspace or Path("."),
|
|
741
|
+
env=os.environ.copy(),
|
|
742
|
+
)
|
|
743
|
+
for event in run_and_stream(invocation):
|
|
744
|
+
self._handle_event(event)
|
|
745
|
+
except Exception as exc: # pragma: no cover
|
|
746
|
+
self._log(
|
|
747
|
+
"codex artifact retry failed",
|
|
748
|
+
level="error",
|
|
749
|
+
extra={"error": repr(exc)},
|
|
750
|
+
)
|
|
751
|
+
|
|
752
|
+
def _log(
|
|
753
|
+
self, message: str, level: str = "info", extra: dict[str, object] | None = None
|
|
754
|
+
) -> None:
|
|
755
|
+
"""Lightweight logger that routes to telemetry."""
|
|
756
|
+
post_log(
|
|
757
|
+
self.config.server_url,
|
|
758
|
+
self.config.run_id,
|
|
759
|
+
self.config.capability_token,
|
|
760
|
+
level=level,
|
|
761
|
+
message=message,
|
|
762
|
+
extra=extra or {},
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
# --- mock codex helpers (used in tests via BOOTSTRAP_MOCK_CODEX=1) ---
|
|
766
|
+
|
|
767
|
+
def _mock_codex_events(self):
|
|
768
|
+
yield CodexEvent(raw={"run_id": "mock-codex-run"})
|
|
769
|
+
yield CodexEvent(raw={"message": "mock: starting work"})
|
|
770
|
+
yield CodexEvent(raw={"message": "mock: finished"})
|
|
771
|
+
self.codex_run_id = "mock-codex-run"
|
|
772
|
+
|
|
773
|
+
def _write_mock_manifest(self) -> None:
|
|
774
|
+
assert self.workspace is not None
|
|
775
|
+
manifest_path = self.workspace / self.config.artifact_manifest
|
|
776
|
+
manifest = [{"artifact_type": "text", "payload": {"content": "mock artifact"}}]
|
|
777
|
+
manifest_path.write_text(json.dumps(manifest), encoding="utf-8")
|
|
778
|
+
|
|
779
|
+
|
|
780
|
+
def build_config(args: Any) -> BootstrapConfig:
|
|
781
|
+
"""Construct BootstrapConfig from CLI args and environment."""
|
|
782
|
+
|
|
783
|
+
server_url = args.server or os.environ.get(ENV_SERVER_URL, DEFAULT_SERVER_URL)
|
|
784
|
+
config_path = Path(args.config).expanduser().resolve()
|
|
785
|
+
return BootstrapConfig(
|
|
786
|
+
run_id=args.run_id or _env_or_throw(ENV_RUN_ID, "run id"),
|
|
787
|
+
capability_token=args.token or _env_or_throw(ENV_RUN_TOKEN, "capability token"),
|
|
788
|
+
config_path=config_path,
|
|
789
|
+
server_url=server_url,
|
|
790
|
+
)
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
def _env_or_throw(var: str, label: str) -> str:
|
|
794
|
+
value = os.environ.get(var)
|
|
795
|
+
if not value:
|
|
796
|
+
raise SystemExit(f"missing {label} (pass flag or set {var})")
|
|
797
|
+
return value
|