flywheel-bootstrap-staging 0.1.9.202601271835__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,797 @@
1
+ """Top-down orchestration for the bootstrap flow.
2
+
3
+ Implementation is intentionally skeletal; individual steps will be filled in once
4
+ design details are finalized.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+ from typing import Any
12
+ import os
13
+ import sys
14
+ import threading
15
+ import json
16
+ import shutil
17
+
18
+ from bootstrap.constants import (
19
+ DEFAULT_ARTIFACT_MANIFEST,
20
+ DEFAULT_RUN_ROOT,
21
+ DEFAULT_SERVER_URL,
22
+ ENV_RUN_ID,
23
+ ENV_RUN_TOKEN,
24
+ ENV_SERVER_URL,
25
+ HEARTBEAT_INTERVAL_SECONDS,
26
+ MAX_ARTIFACT_RETRIES,
27
+ )
28
+ from bootstrap.config_loader import UserConfig, load_codex_config
29
+ from bootstrap.git_ops import GitConfig, initialize_repo, finalize_repo
30
+ from bootstrap.install import codex_login_status_ok, codex_on_path, ensure_codex
31
+ from bootstrap.payload import BootstrapPayload, fetch_bootstrap_payload
32
+ from bootstrap.prompts import build_prompt_text
33
+ from bootstrap.runner import (
34
+ CodexEvent,
35
+ build_invocation,
36
+ run_and_stream,
37
+ )
38
+ from bootstrap.telemetry import (
39
+ post_artifacts,
40
+ post_completion,
41
+ post_error,
42
+ post_heartbeat,
43
+ post_log,
44
+ )
45
+ from bootstrap.artifacts import ManifestResult, ManifestStatus, read_manifest
46
+
47
+
48
+ @dataclass
49
+ class BootstrapConfig:
50
+ """User-supplied CLI arguments plus derived defaults."""
51
+
52
+ run_id: str
53
+ capability_token: str
54
+ config_path: Path
55
+ server_url: str = DEFAULT_SERVER_URL
56
+ run_root: Path = DEFAULT_RUN_ROOT
57
+ artifact_manifest: str = DEFAULT_ARTIFACT_MANIFEST
58
+
59
+
60
+ class BootstrapOrchestrator:
61
+ """Coordinates install, payload fetch, Codex launch, telemetry, and artifacts."""
62
+
63
+ def __init__(self, config: BootstrapConfig) -> None:
64
+ self.config = config
65
+ self._mock_codex = bool(os.environ.get("BOOTSTRAP_MOCK_CODEX"))
66
+ self.user_config: UserConfig | None = None
67
+ self.bootstrap_payload: BootstrapPayload | None = None
68
+ self.workspace: Path | None = None
69
+ self.codex_executable: Path | None = None
70
+ self.codex_run_id: str | None = None
71
+ self.heartbeat_thread: threading.Thread | None = None
72
+ self._stop_heartbeats = threading.Event()
73
+ self.last_stderr: str = "" # Captured stderr for error reporting
74
+ self.git_config: GitConfig | None = None # Git config for code persistence
75
+
76
+ def run(self) -> int:
77
+ """Execute the bootstrap flow.
78
+
79
+ Returns:
80
+ Process exit code (0 for success, non-zero for failure).
81
+ """
82
+
83
+ try:
84
+ self._ensure_prerequisites()
85
+ self._load_user_config()
86
+ self._resolve_workspace()
87
+ self._ensure_codex_available()
88
+ self._fetch_bootstrap_payload()
89
+ self._initialize_git_repo() # Clone repo if code persistence enabled
90
+ exit_code = self._launch_codex_and_stream()
91
+ self._finalize_git_repo(
92
+ exit_code
93
+ ) # Commit and push if code persistence enabled
94
+ self._collect_and_post_artifacts(exit_code)
95
+ return 0
96
+ except SystemExit:
97
+ raise
98
+ except Exception as exc: # pragma: no cover - defensive
99
+ post_error(
100
+ self.config.server_url,
101
+ self.config.run_id,
102
+ self.config.capability_token,
103
+ reason=repr(exc),
104
+ summary="bootstrap failure",
105
+ )
106
+ print(f"bootstrap failed: {exc}", file=sys.stderr)
107
+ return 1
108
+ finally:
109
+ self._stop_heartbeats.set()
110
+ if self.heartbeat_thread and self.heartbeat_thread.is_alive():
111
+ self.heartbeat_thread.join(timeout=2)
112
+
113
+ # --- individual steps (to be implemented) ---
114
+
115
+ def _ensure_prerequisites(self) -> None:
116
+ """Validate required binaries/env vars and fail fast if missing."""
117
+ if not self.config.run_id:
118
+ raise SystemExit("missing run id")
119
+ if not self.config.capability_token:
120
+ raise SystemExit("missing capability token")
121
+ if not self.config.config_path.exists():
122
+ raise SystemExit(f"config file not found: {self.config.config_path}")
123
+
124
+ def _load_user_config(self) -> None:
125
+ """Read the user's Codex config.toml for sandbox/workspace settings."""
126
+ self.user_config = load_codex_config(self.config.config_path)
127
+ for warning in self.user_config.warnings:
128
+ print(f"bootstrap warning: {warning}", file=sys.stderr)
129
+
130
+ def _resolve_workspace(self) -> None:
131
+ """Decide which working directory to hand to Codex (respect user config if set)."""
132
+ assert self.user_config is not None
133
+ if self.user_config.working_dir:
134
+ # Support the documented "<run_id>" placeholder in config paths.
135
+ raw = str(self.user_config.working_dir)
136
+ if "<run_id>" in raw:
137
+ raw = raw.replace("<run_id>", self.config.run_id)
138
+ workdir = Path(raw).expanduser().resolve()
139
+ else:
140
+ workdir = self.config.run_root / self.config.run_id
141
+ workdir.mkdir(parents=True, exist_ok=True)
142
+
143
+ # Auto-create writable_roots directories if they don't exist
144
+ for root in self.user_config.writable_roots:
145
+ root.mkdir(parents=True, exist_ok=True)
146
+
147
+ manifest_path = workdir / self.config.artifact_manifest
148
+ if self.user_config.writable_roots:
149
+ ok = False
150
+ for root in self.user_config.writable_roots:
151
+ try:
152
+ manifest_path.relative_to(root)
153
+ ok = True
154
+ break
155
+ except ValueError:
156
+ continue
157
+ if not ok:
158
+ roots = ", ".join(str(r) for r in self.user_config.writable_roots)
159
+ raise SystemExit(
160
+ f"manifest path {manifest_path} not in sandbox writable_roots ({roots}); "
161
+ "please add a writable root or adjust config"
162
+ )
163
+ self.workspace = workdir
164
+
165
+ def _ensure_codex_available(self) -> None:
166
+ """Skip install if present; otherwise download tarball and prepend to PATH."""
167
+ if self._mock_codex:
168
+ return
169
+ codex_path: Path | None = None
170
+ if codex_on_path():
171
+ # On Windows, explicitly look for codex.exe to avoid conflicts
172
+ codex_name = "codex.exe" if sys.platform == "win32" else "codex"
173
+ found = shutil.which(codex_name)
174
+ print(
175
+ f"[bootstrap] shutil.which({codex_name!r}) = {found}", file=sys.stderr
176
+ )
177
+ if found:
178
+ codex_path = Path(found)
179
+ self.codex_executable = codex_path
180
+ else:
181
+ download_dir = self.workspace or self.config.run_root
182
+ self.codex_executable = ensure_codex(download_dir=download_dir)
183
+ codex_path = self.codex_executable
184
+
185
+ if codex_path is None:
186
+ codex_path = Path("codex")
187
+ print(f"[bootstrap] using codex at: {codex_path}", file=sys.stderr)
188
+ self._ensure_codex_authenticated(codex_path)
189
+
190
+ def _fetch_bootstrap_payload(self) -> None:
191
+ """Call backend /runs/{id}/bootstrap to get the task prompt."""
192
+ self.bootstrap_payload = fetch_bootstrap_payload(
193
+ self.config.server_url, self.config.run_id, self.config.capability_token
194
+ )
195
+
196
+ def _initialize_git_repo(self) -> None:
197
+ """Initialize git repository if code persistence is enabled.
198
+
199
+ If the bootstrap payload contains a repo_context and github_token,
200
+ we clone the repository and set up the experiment branch.
201
+ """
202
+ assert self.bootstrap_payload is not None
203
+ assert self.workspace is not None
204
+
205
+ repo_context = self.bootstrap_payload.repo_context
206
+ github_token = self.bootstrap_payload.github_token
207
+
208
+ if repo_context is None or github_token is None:
209
+ self._log("Git: Code persistence not configured, skipping repo setup")
210
+ return
211
+
212
+ self._log(
213
+ f"Git: Initializing code persistence for {repo_context.repo_owner}/{repo_context.repo_name}"
214
+ )
215
+
216
+ # Create git config with telemetry logging
217
+ def log_fn(level: str, message: str) -> None:
218
+ self._log(f"Git: {message}", level=level)
219
+
220
+ self.git_config = GitConfig(
221
+ workspace=self.workspace,
222
+ repo_context=repo_context,
223
+ github_token=github_token,
224
+ log_fn=log_fn,
225
+ )
226
+
227
+ # Initialize the repository (clone, credentials, branch)
228
+ if not initialize_repo(self.git_config):
229
+ self._log(
230
+ "Git: Failed to initialize repository, continuing without code persistence",
231
+ level="warning",
232
+ )
233
+ self.git_config = None
234
+ return
235
+
236
+ self._log(
237
+ f"Git: Repository initialized, working on branch {repo_context.branch_name}"
238
+ )
239
+
240
+ def _finalize_git_repo(self, exit_code: int) -> None:
241
+ """Finalize git repository after codex completes.
242
+
243
+ Commits any changes and pushes them to the remote.
244
+ Only runs if code persistence was successfully initialized.
245
+ """
246
+ if self.git_config is None:
247
+ return
248
+
249
+ if exit_code != 0:
250
+ self._log(
251
+ f"Git: Codex exited with code {exit_code}, skipping push",
252
+ level="warning",
253
+ )
254
+ # Still commit changes so they're not lost
255
+ from bootstrap.git_ops import commit_changes
256
+
257
+ commit_changes(
258
+ self.git_config,
259
+ f"[WIP] Flywheel experiment run (failed): {self.config.run_id}",
260
+ )
261
+ return
262
+
263
+ self._log("Git: Finalizing repository, committing and pushing changes")
264
+
265
+ if finalize_repo(self.git_config, self.config.run_id):
266
+ self._log("Git: Changes pushed successfully")
267
+ else:
268
+ self._log("Git: Failed to push changes", level="error")
269
+
270
+ def _ensure_codex_authenticated(self, codex_path: Path) -> None:
271
+ """Fail fast if codex is present but not logged in."""
272
+ if codex_login_status_ok(codex_path):
273
+ return
274
+ raise SystemExit(
275
+ "Codex isn't authenticated. Run `codex login` (browser/device flow) or "
276
+ "`printenv OPENAI_API_KEY | codex login --with-api-key` then rerun the bootstrap."
277
+ )
278
+
279
+ def _launch_codex_and_stream(self) -> int:
280
+ """Run codex exec --json, forward logs/heartbeats, and capture exit status."""
281
+ assert self.workspace is not None
282
+ assert self.bootstrap_payload is not None
283
+ assert self.user_config is not None
284
+
285
+ prompt_text = build_prompt_text(
286
+ server_prompt=self.bootstrap_payload.prompt,
287
+ workspace_instructions=self.user_config.workspace_instructions,
288
+ artifact_manifest=self.config.artifact_manifest,
289
+ )
290
+
291
+ if self._mock_codex:
292
+ # Fast-path: emit one heartbeat, a couple logs, a run id, and exit 0.
293
+ post_heartbeat(
294
+ self.config.server_url,
295
+ self.config.run_id,
296
+ self.config.capability_token,
297
+ summary="alive (mock)",
298
+ )
299
+ for event in self._mock_codex_events():
300
+ self._handle_event(event)
301
+ self._write_mock_manifest()
302
+ return 0
303
+
304
+ codex_path = self.codex_executable or Path("codex")
305
+ env = os.environ.copy()
306
+ env.update(
307
+ {
308
+ "FLYWHEEL_RUN_ID": self.config.run_id,
309
+ "FLYWHEEL_RUN_TOKEN": self.config.capability_token,
310
+ "FLYWHEEL_SERVER": self.config.server_url,
311
+ "FLYWHEEL_WORKSPACE": str(self.workspace.resolve()),
312
+ }
313
+ )
314
+
315
+ # Debug: show API key status
316
+ api_key = env.get("OPENAI_API_KEY", "")
317
+ if api_key:
318
+ print(
319
+ f"[bootstrap] OPENAI_API_KEY is set (starts with: {api_key[:10]}...)",
320
+ file=sys.stderr,
321
+ )
322
+ else:
323
+ print(
324
+ "[bootstrap] WARNING: OPENAI_API_KEY is NOT set in environment",
325
+ file=sys.stderr,
326
+ )
327
+
328
+ # If using LM Studio, set the API base URL for Codex
329
+ if self.user_config.oss_provider == "lmstudio":
330
+ env["OPENAI_API_BASE"] = "http://localhost:1234/v1"
331
+ env["OPENAI_BASE_URL"] = "http://localhost:1234/v1"
332
+ # LM Studio doesn't validate API keys, but some tools require one to be set
333
+ if "OPENAI_API_KEY" not in env:
334
+ env["OPENAI_API_KEY"] = "lm-studio"
335
+ print(
336
+ "[bootstrap] Using LM Studio at http://localhost:1234/v1",
337
+ file=sys.stderr,
338
+ )
339
+
340
+ # Ensure Codex actually sees the same config file bootstrap parsed.
341
+ # Codex reads config from CODEX_HOME/config.toml; point it at a per-run copy.
342
+ try:
343
+ codex_home = self.workspace / ".codex_home"
344
+ codex_home.mkdir(parents=True, exist_ok=True)
345
+
346
+ # Copy config but expand ~ paths so codex sees absolute paths
347
+ config_text = self.config.config_path.read_text(encoding="utf-8")
348
+ # Expand common tilde patterns in the config
349
+ # Use forward slashes for cross-platform compatibility (works on Windows too)
350
+ # and avoids TOML interpreting backslashes as escape sequences
351
+ home_dir = str(Path.home()).replace("\\", "/")
352
+ config_text = config_text.replace('"~/', f'"{home_dir}/')
353
+ config_text = config_text.replace("'~/", f"'{home_dir}/")
354
+ (codex_home / "config.toml").write_text(config_text, encoding="utf-8")
355
+
356
+ # Also copy auth credentials from user's default codex home so the
357
+ # spawned codex process stays authenticated.
358
+ # TODO: more sophisticated auth
359
+ user_codex_home = Path.home() / ".codex"
360
+ user_auth = user_codex_home / "auth.json"
361
+ if user_auth.exists():
362
+ shutil.copyfile(user_auth, codex_home / "auth.json")
363
+ print(f"[bootstrap] Copied auth from {user_auth}", file=sys.stderr)
364
+ else:
365
+ print(
366
+ f"[bootstrap] WARNING: No auth.json found at {user_auth}",
367
+ file=sys.stderr,
368
+ )
369
+
370
+ env["CODEX_HOME"] = str(codex_home)
371
+ print(f"[bootstrap] CODEX_HOME set to: {codex_home}", file=sys.stderr)
372
+ except Exception as exc:
373
+ post_log(
374
+ self.config.server_url,
375
+ self.config.run_id,
376
+ self.config.capability_token,
377
+ level="warning",
378
+ message="failed to prepare CODEX_HOME config override; continuing",
379
+ extra={"error": repr(exc)},
380
+ )
381
+
382
+ # Pass sandbox settings as proper CLI flags
383
+ # For provisioned instances, we use --yolo to completely bypass sandbox and approvals
384
+ extra_flags: list[str] = []
385
+ if self.user_config.sandbox_mode:
386
+ if self.user_config.sandbox_mode == "danger-full-access":
387
+ # Use --yolo (--dangerously-bypass-approvals-and-sandbox) for unrestricted access
388
+ # This is safe on provisioned instances since they're isolated VMs
389
+ extra_flags.append("--yolo")
390
+ else:
391
+ extra_flags.extend(["--sandbox", self.user_config.sandbox_mode])
392
+
393
+ invocation = build_invocation(
394
+ codex_executable=Path(codex_path),
395
+ prompt=prompt_text,
396
+ workdir=self.workspace,
397
+ env=env,
398
+ extra_flags=tuple(extra_flags),
399
+ )
400
+
401
+ # Start heartbeat thread
402
+ self._stop_heartbeats.clear()
403
+ self.heartbeat_thread = threading.Thread(
404
+ target=self._heartbeat_loop, daemon=True
405
+ )
406
+ self.heartbeat_thread.start()
407
+
408
+ exit_code: int = 1
409
+ for event in run_and_stream(invocation):
410
+ self._handle_event(event)
411
+ if invocation.exit_code is not None:
412
+ exit_code = invocation.exit_code
413
+ # Capture stderr for error reporting
414
+ if invocation.stderr_output:
415
+ self.last_stderr = invocation.stderr_output
416
+ return exit_code
417
+
418
+ def _collect_and_post_artifacts(self, exit_code: int) -> None:
419
+ """Read manifest (and optional resume attempts) then POST /artifacts/complete/error."""
420
+ assert self.workspace is not None
421
+ manifest_path = self.workspace / self.config.artifact_manifest
422
+ manifest_result, artifacts = self._load_artifacts_with_content(manifest_path)
423
+
424
+ # Auto-resume up to MAX_ARTIFACT_RETRIES times if artifacts are
425
+ # missing or the manifest was malformed.
426
+ retries = 0
427
+ while not artifacts and self.codex_run_id and retries < MAX_ARTIFACT_RETRIES:
428
+ retries += 1
429
+ self._attempt_artifact_retry(manifest_path, manifest_result)
430
+ manifest_result, artifacts = self._load_artifacts_with_content(
431
+ manifest_path
432
+ )
433
+
434
+ if artifacts:
435
+ post_artifacts(
436
+ self.config.server_url,
437
+ self.config.run_id,
438
+ self.config.capability_token,
439
+ artifacts,
440
+ )
441
+
442
+ if exit_code == 0:
443
+ summary = "codex run completed"
444
+ post_completion(
445
+ self.config.server_url,
446
+ self.config.run_id,
447
+ self.config.capability_token,
448
+ summary,
449
+ )
450
+ else:
451
+ # Include stderr in error reason for debugging
452
+ reason = f"codex exit code {exit_code}"
453
+ if self.last_stderr:
454
+ # Truncate stderr to avoid overly long error messages
455
+ stderr_preview = self.last_stderr[:2000]
456
+ if len(self.last_stderr) > 2000:
457
+ stderr_preview += "... (truncated)"
458
+ reason = f"{reason}\nstderr: {stderr_preview}"
459
+ post_error(
460
+ self.config.server_url,
461
+ self.config.run_id,
462
+ self.config.capability_token,
463
+ reason=reason,
464
+ summary="codex failed",
465
+ )
466
+
467
+ def _load_artifacts_with_content(
468
+ self, manifest_path: Path
469
+ ) -> tuple[ManifestResult, list[dict[str, object]]]:
470
+ """Load artifacts and inline content when a path is provided.
471
+
472
+ For text/html artifacts, if payload includes a "path" (or "file") inside the workspace,
473
+ read the file as UTF-8 and attach it as payload["content"].
474
+ For image artifacts, read as binary and create a base64 data URL.
475
+ This keeps artifacts self-contained for server-side rendering.
476
+ Best-effort; failures are logged and skipped.
477
+
478
+ Size limit: 2MB per artifact to prevent huge payloads.
479
+
480
+ Returns a tuple of (ManifestResult, enriched artifacts list).
481
+ """
482
+ import base64
483
+ import mimetypes
484
+
485
+ MAX_ARTIFACT_SIZE = 25 * 1024 * 1024 # 25MB
486
+
487
+ assert self.workspace is not None
488
+ manifest_result = read_manifest(manifest_path)
489
+ artifacts = manifest_result.artifacts
490
+ enriched: list[dict[str, object]] = []
491
+
492
+ # Checkpoint file extensions (model weights, etc.)
493
+ checkpoint_extensions = {
494
+ ".pt",
495
+ ".pth",
496
+ ".ckpt",
497
+ ".safetensors",
498
+ ".bin",
499
+ ".h5",
500
+ ".hdf5",
501
+ ".pkl",
502
+ ".pickle",
503
+ ".joblib",
504
+ ".npy",
505
+ ".npz",
506
+ ".onnx",
507
+ ".pb",
508
+ }
509
+
510
+ for artifact in artifacts:
511
+ try:
512
+ artifact_type = str(artifact.get("artifact_type", "")).lower()
513
+ payload = artifact.get("payload", {})
514
+
515
+ # Detect checkpoint files and convert to 'checkpoint' type
516
+ if isinstance(payload, dict):
517
+ path_str = payload.get("path") or payload.get("file")
518
+ if isinstance(path_str, str) and path_str:
519
+ path_lower = path_str.lower()
520
+ if any(
521
+ path_lower.endswith(ext) for ext in checkpoint_extensions
522
+ ):
523
+ # Convert to checkpoint type
524
+ artifact = dict(artifact)
525
+ artifact["artifact_type"] = "checkpoint"
526
+ artifact_type = "checkpoint"
527
+ # Add file size if we can resolve the path
528
+ resolved = self._resolve_artifact_path(path_str)
529
+ if resolved and resolved.is_file():
530
+ payload = dict(payload)
531
+ payload["size_bytes"] = resolved.stat().st_size
532
+ artifact["payload"] = payload
533
+
534
+ # Handle text and html artifacts - inline as content
535
+ if artifact_type in ("text", "html") and isinstance(payload, dict):
536
+ path_str = payload.get("path") or payload.get("file")
537
+ if isinstance(path_str, str) and path_str:
538
+ resolved = self._resolve_artifact_path(path_str)
539
+ if resolved and resolved.is_file():
540
+ file_size = resolved.stat().st_size
541
+ if file_size > MAX_ARTIFACT_SIZE:
542
+ payload = dict(payload)
543
+ payload["rendering_error"] = (
544
+ f"File too large ({file_size} bytes, max {MAX_ARTIFACT_SIZE})"
545
+ )
546
+ else:
547
+ try:
548
+ payload = dict(payload)
549
+ payload["content"] = resolved.read_text(
550
+ encoding="utf-8"
551
+ )
552
+ except UnicodeDecodeError:
553
+ self._log(
554
+ f"failed to read {artifact_type} artifact at {resolved} (encoding)",
555
+ level="warning",
556
+ )
557
+ artifact = dict(artifact)
558
+ artifact["payload"] = payload
559
+
560
+ # Handle image artifacts - inline as data_url
561
+ elif artifact_type == "image" and isinstance(payload, dict):
562
+ path_str = payload.get("path") or payload.get("file")
563
+ if isinstance(path_str, str) and path_str:
564
+ resolved = self._resolve_artifact_path(path_str)
565
+ if resolved and resolved.is_file():
566
+ file_size = resolved.stat().st_size
567
+ if file_size > MAX_ARTIFACT_SIZE:
568
+ payload = dict(payload)
569
+ payload["rendering_error"] = (
570
+ f"File too large ({file_size} bytes, max {MAX_ARTIFACT_SIZE})"
571
+ )
572
+ else:
573
+ try:
574
+ image_data = resolved.read_bytes()
575
+ mime_type, _ = mimetypes.guess_type(str(resolved))
576
+ if not mime_type:
577
+ mime_type = "image/png" # default fallback
578
+ b64 = base64.b64encode(image_data).decode("ascii")
579
+ payload = dict(payload)
580
+ payload["data_url"] = (
581
+ f"data:{mime_type};base64,{b64}"
582
+ )
583
+ except Exception as exc:
584
+ self._log(
585
+ f"failed to read image artifact at {resolved}: {exc}",
586
+ level="warning",
587
+ )
588
+ artifact = dict(artifact)
589
+ artifact["payload"] = payload
590
+
591
+ # Handle table artifacts - inline as content
592
+ elif artifact_type == "table" and isinstance(payload, dict):
593
+ path_str = payload.get("path") or payload.get("file")
594
+ if isinstance(path_str, str) and path_str:
595
+ resolved = self._resolve_artifact_path(path_str)
596
+ if resolved and resolved.is_file():
597
+ file_size = resolved.stat().st_size
598
+ if file_size > MAX_ARTIFACT_SIZE:
599
+ payload = dict(payload)
600
+ payload["rendering_error"] = (
601
+ f"File too large ({file_size} bytes, max {MAX_ARTIFACT_SIZE})"
602
+ )
603
+ else:
604
+ try:
605
+ payload = dict(payload)
606
+ payload["content"] = resolved.read_text(
607
+ encoding="utf-8"
608
+ )
609
+ except UnicodeDecodeError:
610
+ self._log(
611
+ f"failed to read table artifact at {resolved} (encoding)",
612
+ level="warning",
613
+ )
614
+ artifact = dict(artifact)
615
+ artifact["payload"] = payload
616
+
617
+ except Exception as exc: # pragma: no cover - defensive
618
+ self._log(f"artifact enrichment error: {exc}", level="warning")
619
+ enriched.append(dict(artifact))
620
+ return manifest_result, enriched
621
+
622
+ def _resolve_artifact_path(self, path_str: str) -> Path | None:
623
+ """Resolve artifact path within workspace, returning None if invalid."""
624
+ assert self.workspace is not None
625
+ path = Path(path_str)
626
+ resolved = (self.workspace / path).resolve()
627
+ workspace_root = self.workspace.resolve()
628
+ if resolved == workspace_root or workspace_root in resolved.parents:
629
+ return resolved
630
+ self._log(f"skipping artifact outside workspace: {resolved}", level="warning")
631
+ return None
632
+
633
+ def _heartbeat_loop(self) -> None:
634
+ while not self._stop_heartbeats.is_set():
635
+ try:
636
+ post_heartbeat(
637
+ self.config.server_url,
638
+ self.config.run_id,
639
+ self.config.capability_token,
640
+ summary="alive",
641
+ )
642
+ except Exception as exc: # pragma: no cover - best effort
643
+ print(f"heartbeat failed: {exc}", file=sys.stderr)
644
+ self._stop_heartbeats.wait(HEARTBEAT_INTERVAL_SECONDS)
645
+
646
+ def _handle_event(self, event: CodexEvent) -> None:
647
+ post_log(
648
+ self.config.server_url,
649
+ self.config.run_id,
650
+ self.config.capability_token,
651
+ level="info",
652
+ message=str(event.raw),
653
+ extra={},
654
+ )
655
+ if isinstance(event.raw, dict):
656
+ run_id = event.raw.get("run_id")
657
+ if isinstance(run_id, str):
658
+ self.codex_run_id = run_id
659
+
660
+ def _attempt_artifact_retry(
661
+ self, manifest_path: Path, manifest_result: ManifestResult
662
+ ) -> None:
663
+ """Retry artifact collection via ``codex exec`` with a feedback prompt.
664
+
665
+ Both MISSING and MALFORMED manifests are handled by launching a new
666
+ Codex exec with a targeted prompt describing the problem and telling
667
+ Codex exactly what to do. This is preferable to ``codex resume``
668
+ which cannot accept additional instructions.
669
+ """
670
+ if not self.codex_run_id:
671
+ return
672
+
673
+ manifest_name = self.config.artifact_manifest
674
+
675
+ if manifest_result.status == ManifestStatus.MALFORMED:
676
+ error_detail = manifest_result.error or "unknown error"
677
+ raw_content = ""
678
+ if manifest_path.exists():
679
+ try:
680
+ raw_content = manifest_path.read_text(encoding="utf-8")[:2000]
681
+ except Exception:
682
+ raw_content = "<could not read file>"
683
+
684
+ fix_prompt = (
685
+ "The artifact manifest file at "
686
+ f"$FLYWHEEL_WORKSPACE/{manifest_name} is malformed.\n\n"
687
+ f"Error: {error_detail}\n\n"
688
+ f"Current file contents:\n{raw_content}\n\n"
689
+ "Please rewrite this file so it is a valid JSON list of "
690
+ "artifact entries. Each entry must be an object with "
691
+ '"artifact_type" and "payload" keys. The file must be a '
692
+ "top-level JSON array, for example:\n"
693
+ "[\n"
694
+ ' {"artifact_type": "text", "payload": {"content": "..."}},\n'
695
+ ' {"artifact_type": "image", "payload": {"path": "plot.png",'
696
+ ' "format": "png"}}\n'
697
+ "]\n\n"
698
+ "Do NOT wrap the list in an object. The file must start with "
699
+ "[ and end with ].\n"
700
+ "Only fix the manifest format — do not change the actual "
701
+ "artifact content or paths."
702
+ )
703
+ log_msg = "attempting codex exec to fix malformed artifact manifest"
704
+ else:
705
+ # MISSING — the file was never written.
706
+ fix_prompt = (
707
+ "The artifact manifest file was not found at "
708
+ f"$FLYWHEEL_WORKSPACE/{manifest_name}.\n\n"
709
+ "Your task already completed successfully, but the manifest "
710
+ "file is missing. Please write the manifest now.\n\n"
711
+ "The file must be a valid JSON list of artifact entries. "
712
+ 'Each entry must be an object with "artifact_type" and '
713
+ '"payload" keys. The file must be a top-level JSON array, '
714
+ "for example:\n"
715
+ "[\n"
716
+ ' {"artifact_type": "text", "payload": {"content": "..."}},\n'
717
+ ' {"artifact_type": "image", "payload": {"path": "plot.png",'
718
+ ' "format": "png"}}\n'
719
+ "]\n\n"
720
+ "Do NOT wrap the list in an object. The file must start with "
721
+ "[ and end with ].\n"
722
+ "Look at the files you produced in the workspace and create "
723
+ "the manifest based on what you find."
724
+ )
725
+ log_msg = "attempting codex exec to write missing artifact manifest"
726
+
727
+ self._log(
728
+ log_msg,
729
+ extra={
730
+ "status": manifest_result.status.value,
731
+ "error": manifest_result.error,
732
+ },
733
+ )
734
+
735
+ codex_path = self.codex_executable or Path("codex")
736
+ try:
737
+ invocation = build_invocation(
738
+ codex_executable=codex_path,
739
+ prompt=fix_prompt,
740
+ workdir=self.workspace or Path("."),
741
+ env=os.environ.copy(),
742
+ )
743
+ for event in run_and_stream(invocation):
744
+ self._handle_event(event)
745
+ except Exception as exc: # pragma: no cover
746
+ self._log(
747
+ "codex artifact retry failed",
748
+ level="error",
749
+ extra={"error": repr(exc)},
750
+ )
751
+
752
+ def _log(
753
+ self, message: str, level: str = "info", extra: dict[str, object] | None = None
754
+ ) -> None:
755
+ """Lightweight logger that routes to telemetry."""
756
+ post_log(
757
+ self.config.server_url,
758
+ self.config.run_id,
759
+ self.config.capability_token,
760
+ level=level,
761
+ message=message,
762
+ extra=extra or {},
763
+ )
764
+
765
+ # --- mock codex helpers (used in tests via BOOTSTRAP_MOCK_CODEX=1) ---
766
+
767
+ def _mock_codex_events(self):
768
+ yield CodexEvent(raw={"run_id": "mock-codex-run"})
769
+ yield CodexEvent(raw={"message": "mock: starting work"})
770
+ yield CodexEvent(raw={"message": "mock: finished"})
771
+ self.codex_run_id = "mock-codex-run"
772
+
773
+ def _write_mock_manifest(self) -> None:
774
+ assert self.workspace is not None
775
+ manifest_path = self.workspace / self.config.artifact_manifest
776
+ manifest = [{"artifact_type": "text", "payload": {"content": "mock artifact"}}]
777
+ manifest_path.write_text(json.dumps(manifest), encoding="utf-8")
778
+
779
+
780
+ def build_config(args: Any) -> BootstrapConfig:
781
+ """Construct BootstrapConfig from CLI args and environment."""
782
+
783
+ server_url = args.server or os.environ.get(ENV_SERVER_URL, DEFAULT_SERVER_URL)
784
+ config_path = Path(args.config).expanduser().resolve()
785
+ return BootstrapConfig(
786
+ run_id=args.run_id or _env_or_throw(ENV_RUN_ID, "run id"),
787
+ capability_token=args.token or _env_or_throw(ENV_RUN_TOKEN, "capability token"),
788
+ config_path=config_path,
789
+ server_url=server_url,
790
+ )
791
+
792
+
793
+ def _env_or_throw(var: str, label: str) -> str:
794
+ value = os.environ.get(var)
795
+ if not value:
796
+ raise SystemExit(f"missing {label} (pass flag or set {var})")
797
+ return value