fc-data 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. datasmith/__init__.py +330 -0
  2. datasmith/__init__.pyi +194 -0
  3. datasmith/agents/__init__.py +31 -0
  4. datasmith/agents/classifiers.py +272 -0
  5. datasmith/agents/codex.py +25 -0
  6. datasmith/agents/config.py +108 -0
  7. datasmith/agents/extractors.py +197 -0
  8. datasmith/agents/installed/README.md +52 -0
  9. datasmith/agents/installed/__init__.py +22 -0
  10. datasmith/agents/installed/base.py +240 -0
  11. datasmith/agents/installed/claude.py +134 -0
  12. datasmith/agents/installed/codex.py +91 -0
  13. datasmith/agents/installed/gemini.py +118 -0
  14. datasmith/agents/installed/none.py +27 -0
  15. datasmith/agents/sandbox.py +547 -0
  16. datasmith/agents/synthesizer.py +439 -0
  17. datasmith/agents/templates/AGENTS.md.j2 +150 -0
  18. datasmith/agents/templates/sandbox_verify.py +428 -0
  19. datasmith/docker/__init__.py +31 -0
  20. datasmith/docker/context.py +112 -0
  21. datasmith/docker/images.py +158 -0
  22. datasmith/docker/publish.py +56 -0
  23. datasmith/docker/templates/Dockerfile.base +26 -0
  24. datasmith/docker/templates/Dockerfile.pr +42 -0
  25. datasmith/docker/templates/Dockerfile.repo +11 -0
  26. datasmith/docker/templates/docker_build_base.sh +780 -0
  27. datasmith/docker/templates/docker_build_env.sh +309 -0
  28. datasmith/docker/templates/docker_build_final.sh +106 -0
  29. datasmith/docker/templates/docker_build_pkg.sh +99 -0
  30. datasmith/docker/templates/docker_build_run.sh +124 -0
  31. datasmith/docker/templates/entrypoint.sh +62 -0
  32. datasmith/docker/templates/parser.py +1405 -0
  33. datasmith/docker/templates/profile.sh +199 -0
  34. datasmith/docker/templates/pytest_runner.py +692 -0
  35. datasmith/docker/templates/run-tests.sh +197 -0
  36. datasmith/docker/verifiers.py +131 -0
  37. datasmith/filters.py +154 -0
  38. datasmith/github/__init__.py +22 -0
  39. datasmith/github/client.py +333 -0
  40. datasmith/github/hooks.py +50 -0
  41. datasmith/github/links.py +110 -0
  42. datasmith/github/models.py +206 -0
  43. datasmith/github/render.py +173 -0
  44. datasmith/github/search.py +66 -0
  45. datasmith/github/templates/comment.md.j2 +5 -0
  46. datasmith/github/templates/final.md.j2 +66 -0
  47. datasmith/github/templates/issues.md.j2 +21 -0
  48. datasmith/github/templates/repo.md.j2 +1 -0
  49. datasmith/preflight.py +162 -0
  50. datasmith/publish/__init__.py +13 -0
  51. datasmith/publish/huggingface.py +104 -0
  52. datasmith/publish/pipeline.py +60 -0
  53. datasmith/publish/records.py +91 -0
  54. datasmith/py.typed +1 -0
  55. datasmith/resolution/__init__.py +14 -0
  56. datasmith/resolution/blocklist.py +145 -0
  57. datasmith/resolution/cache.py +120 -0
  58. datasmith/resolution/constants.py +277 -0
  59. datasmith/resolution/dependency_resolver.py +174 -0
  60. datasmith/resolution/git_utils.py +378 -0
  61. datasmith/resolution/import_analyzer.py +66 -0
  62. datasmith/resolution/metadata_parser.py +412 -0
  63. datasmith/resolution/models.py +41 -0
  64. datasmith/resolution/orchestrator.py +522 -0
  65. datasmith/resolution/package_filters.py +312 -0
  66. datasmith/resolution/python_manager.py +110 -0
  67. datasmith/runners/__init__.py +15 -0
  68. datasmith/runners/base.py +112 -0
  69. datasmith/runners/classify_prs.py +48 -0
  70. datasmith/runners/render_problems.py +113 -0
  71. datasmith/runners/resolve_packages.py +66 -0
  72. datasmith/runners/scrape_commits.py +166 -0
  73. datasmith/runners/scrape_repos.py +44 -0
  74. datasmith/runners/synthesize_images.py +310 -0
  75. datasmith/update/__init__.py +5 -0
  76. datasmith/update/cli.py +169 -0
  77. datasmith/update/offline.py +173 -0
  78. datasmith/update/pipeline.py +497 -0
  79. datasmith/utils/__init__.py +18 -0
  80. datasmith/utils/core.py +67 -0
  81. datasmith/utils/db.py +156 -0
  82. datasmith/utils/tokens.py +65 -0
  83. fc_data-0.2.0.dist-info/METADATA +441 -0
  84. fc_data-0.2.0.dist-info/RECORD +87 -0
  85. fc_data-0.2.0.dist-info/WHEEL +4 -0
  86. fc_data-0.2.0.dist-info/entry_points.txt +2 -0
  87. fc_data-0.2.0.dist-info/licenses/LICENSE +28 -0
@@ -0,0 +1,547 @@
1
+ """Sandboxed synthesis via an installed CLI agent.
2
+
3
+ Prepares a temporary workspace with Docker build context files, an AGENTS.md
4
+ guide, and a simplified verify script, then launches the first available
5
+ installed agent (Claude Code, Codex, or Gemini CLI). The agent iterates
6
+ internally — reading failure.json, editing build scripts, re-running
7
+ verification — until it succeeds or the session times out.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import hashlib
13
+ import json
14
+ import shutil
15
+ import subprocess
16
+ import sys
17
+ import tempfile
18
+ import time
19
+ from dataclasses import dataclass, field
20
+ from pathlib import Path
21
+
22
+ from jinja2 import Environment, FileSystemLoader
23
+
24
+ from datasmith.agents.installed import AgentResult, get_agent
25
+ from datasmith.docker.context import DockerContext
26
+ from datasmith.utils import get_logger
27
+
28
+ logger = get_logger("agents.sandbox")
29
+
30
+ _TEMPLATES_DIR = Path(__file__).parent / "templates"
31
+
32
+ # Files the agent must NOT modify. Hashes are recorded at workspace setup
33
+ # and verified both by sandbox_verify.py (so the agent gets feedback) and by
34
+ # _extract_results (hard server-side check the agent cannot bypass).
35
+ _IMMUTABLE_FILES = (
36
+ "Dockerfile.pr",
37
+ "docker_build_base.sh",
38
+ "docker_build_env.sh",
39
+ "docker_build_final.sh",
40
+ "profile.sh",
41
+ "run-tests.sh",
42
+ "entrypoint.sh",
43
+ "task.txt",
44
+ )
45
+
46
+
47
+ def _compute_immutable_hashes(task_dir: Path) -> dict[str, str]:
48
+ """Compute MD5 hashes of all immutable files in *task_dir*."""
49
+ hashes: dict[str, str] = {}
50
+ for fname in _IMMUTABLE_FILES:
51
+ fp = task_dir / fname
52
+ if fp.exists():
53
+ hashes[fname] = hashlib.md5(fp.read_bytes()).hexdigest() # noqa: S324
54
+ return hashes
55
+
56
+
57
+ @dataclass
58
+ class SandboxConfig:
59
+ """Configuration for the Codex sandbox runner."""
60
+
61
+ timeout_s: int = 3600
62
+ """Total wall-clock timeout for the codex session (seconds)."""
63
+
64
+ codex_timeout_s: int = 3600
65
+ """Timeout passed to subprocess.run for the codex process (seconds)."""
66
+
67
+
68
+ @dataclass
69
+ class SandboxResult:
70
+ """Outcome of a sandbox synthesis run."""
71
+
72
+ success: bool
73
+ docker_context: DockerContext | None = None
74
+ failure_json: dict | None = None
75
+ duration_s: float = 0.0
76
+ agent_output: str = ""
77
+ raw_agent_output: str = ""
78
+ agent_name: str = ""
79
+ files_changed: list[str] = field(default_factory=list)
80
+ resource_metrics: dict = field(default_factory=dict)
81
+
82
+
83
+ class SandboxRunner:
84
+ """Launch an installed CLI agent in a sandboxed workspace to iteratively fix Docker builds."""
85
+
86
+ def __init__(self, config: SandboxConfig | None = None, agent: str | None = None) -> None:
87
+ self._config = config or SandboxConfig()
88
+ self._agent = agent
89
+
90
+ def run(
91
+ self,
92
+ owner: str,
93
+ repo: str,
94
+ sha: str,
95
+ repo_image: str,
96
+ env_payload: str,
97
+ python_version: str,
98
+ pr_context: str,
99
+ prior_attempts: str = "",
100
+ dry_run: bool = False,
101
+ ) -> SandboxResult:
102
+ """Prepare workspace, launch agent, extract results.
103
+
104
+ Returns a ``SandboxResult`` indicating success/failure and the
105
+ (potentially modified) ``DockerContext``.
106
+ """
107
+ start = time.time()
108
+
109
+ with tempfile.TemporaryDirectory(prefix="synthesis-") as tmpdir:
110
+ workspace = Path(tmpdir)
111
+
112
+ # 1. Prepare workspace
113
+ self._prepare_workspace(
114
+ workspace=workspace,
115
+ owner=owner,
116
+ repo=repo,
117
+ sha=sha,
118
+ repo_image=repo_image,
119
+ env_payload=env_payload,
120
+ python_version=python_version,
121
+ pr_context=pr_context,
122
+ prior_attempts=prior_attempts,
123
+ )
124
+
125
+ # 2. Init git repo (Codex requirement)
126
+ self._init_git(workspace)
127
+
128
+ if dry_run:
129
+ logger.info(
130
+ "[DRY RUN] Would launch agent sandbox for %s/%s@%s in %s",
131
+ owner,
132
+ repo,
133
+ sha[:12],
134
+ workspace,
135
+ )
136
+ return SandboxResult(
137
+ success=True,
138
+ docker_context=DockerContext(),
139
+ duration_s=time.time() - start,
140
+ agent_output="[dry run — no execution]",
141
+ )
142
+
143
+ # 3. Launch agent
144
+ agent_name, agent_result = self._launch_agent(workspace)
145
+
146
+ # 4. Extract results
147
+ result = self._extract_results(workspace, agent_result, agent_name)
148
+ result.duration_s = time.time() - start
149
+ return result
150
+
151
+ def _prepare_workspace(
152
+ self,
153
+ workspace: Path,
154
+ owner: str,
155
+ repo: str,
156
+ sha: str,
157
+ repo_image: str,
158
+ env_payload: str,
159
+ python_version: str,
160
+ pr_context: str,
161
+ prior_attempts: str = "",
162
+ ) -> None:
163
+ """Create the workspace directory structure."""
164
+ task_dir = workspace / "task"
165
+ task_dir.mkdir(parents=True, exist_ok=True)
166
+
167
+ # Copy ALL template files from docker/templates/ into task/
168
+ docker_templates = Path(__file__).parents[1] / "docker" / "templates"
169
+ for fname in (
170
+ "Dockerfile.pr",
171
+ "docker_build_base.sh",
172
+ "docker_build_env.sh",
173
+ "docker_build_pkg.sh",
174
+ "docker_build_run.sh",
175
+ "docker_build_final.sh",
176
+ "profile.sh",
177
+ "entrypoint.sh",
178
+ ):
179
+ src = docker_templates / fname
180
+ if src.exists():
181
+ shutil.copy2(str(src), str(task_dir / fname))
182
+
183
+ # Render run-tests.sh from Jinja2 template with embedded scripts
184
+ run_tests_sh = _render_run_tests_sh(docker_templates, base_commit=sha)
185
+ (task_dir / "run-tests.sh").write_text(run_tests_sh)
186
+
187
+ # Generate task.txt
188
+ task_txt = _generate_task_txt(owner, repo, sha, env_payload, python_version, repo_image)
189
+ (task_dir / "task.txt").write_text(task_txt)
190
+
191
+ # Render AGENTS.md from Jinja2 template
192
+ agents_md = _render_agents_md(
193
+ owner=owner,
194
+ repo=repo,
195
+ sha=sha,
196
+ python_version=python_version,
197
+ pr_context=pr_context,
198
+ )
199
+ (workspace / "AGENTS.md").write_text(agents_md)
200
+
201
+ # Copy sandbox_verify.py
202
+ src_verify = _TEMPLATES_DIR / "sandbox_verify.py"
203
+ shutil.copy2(str(src_verify), str(workspace / "sandbox_verify.py"))
204
+
205
+ # Write prior attempts context (from failed TRY_SIMILAR stage)
206
+ if prior_attempts:
207
+ (workspace / "prior_attempts.md").write_text(prior_attempts)
208
+
209
+ # Record immutable file hashes so sandbox_verify.py and
210
+ # _extract_results can detect unauthorised modifications.
211
+ hashes = _compute_immutable_hashes(task_dir)
212
+ (workspace / ".immutable_hashes.json").write_text(json.dumps(hashes))
213
+
214
+ def _init_git(self, workspace: Path) -> None:
215
+ """Initialize a git repo in the workspace (required by Codex)."""
216
+ subprocess.run(
217
+ ["git", "init"],
218
+ cwd=str(workspace),
219
+ capture_output=True,
220
+ check=True,
221
+ )
222
+ subprocess.run(
223
+ ["git", "add", "-A"],
224
+ cwd=str(workspace),
225
+ capture_output=True,
226
+ check=True,
227
+ )
228
+ subprocess.run(
229
+ [
230
+ "git",
231
+ "-c",
232
+ "user.name=sandbox",
233
+ "-c",
234
+ "user.email=sandbox@local",
235
+ "commit",
236
+ "-m",
237
+ "init",
238
+ ],
239
+ cwd=str(workspace),
240
+ capture_output=True,
241
+ check=True,
242
+ )
243
+
244
+ def _launch_agent(self, workspace: Path) -> tuple[str, AgentResult]:
245
+ """Launch the first available installed CLI agent in the workspace.
246
+
247
+ Returns ``(agent_name, AgentResult)``.
248
+ """
249
+ preference = [self._agent] if self._agent else None
250
+ agent = get_agent(preference=preference)
251
+ logger.info("Launching %s agent sandbox in %s", agent.name(), workspace)
252
+ result = agent.exec(
253
+ prompt="Read AGENTS.md and follow its instructions to fix the Docker build.",
254
+ timeout=self._config.codex_timeout_s,
255
+ workdir=str(workspace),
256
+ )
257
+ logger.info(
258
+ "Agent %s exited (success=%s, duration=%.1fs, output_len=%d, error=%s)",
259
+ agent.name(),
260
+ result.success,
261
+ result.duration_s,
262
+ len(result.output),
263
+ result.error[:200] if result.error else "",
264
+ )
265
+ return agent.name(), result
266
+
267
+ def _extract_results(self, workspace: Path, codex_result: AgentResult, agent_name: str = "") -> SandboxResult:
268
+ """Read workspace state after the agent exits to build the result."""
269
+ task_dir = workspace / "task"
270
+
271
+ # Hard integrity check — the agent cannot bypass this even if it
272
+ # modifies sandbox_verify.py or writes a fake success file.
273
+ hashes_file = workspace / ".immutable_hashes.json"
274
+ if hashes_file.exists():
275
+ expected = json.loads(hashes_file.read_text())
276
+ current = _compute_immutable_hashes(task_dir)
277
+ modified = [f for f in expected if expected[f] != current.get(f, "")]
278
+ if modified:
279
+ logger.warning("File integrity violation: %s", ", ".join(modified))
280
+ return SandboxResult(
281
+ success=False,
282
+ failure_json={
283
+ "stage": "integrity",
284
+ "return_code": 1,
285
+ "error_message": f"Agent modified immutable files: {', '.join(modified)}",
286
+ },
287
+ agent_output=codex_result.output,
288
+ raw_agent_output=codex_result.raw_output,
289
+ agent_name=agent_name,
290
+ files_changed=codex_result.files_changed,
291
+ )
292
+
293
+ # Check for success
294
+ success_file = task_dir / "verification_success.json"
295
+ failure_file = task_dir / "failure.json"
296
+
297
+ success = success_file.exists()
298
+
299
+ # Read back only the two agent-editable scripts (the rest are templates)
300
+ docker_context: DockerContext | None = None
301
+ try:
302
+ pkg_sh = (
303
+ (task_dir / "docker_build_pkg.sh").read_text() if (task_dir / "docker_build_pkg.sh").exists() else ""
304
+ )
305
+ run_sh = (
306
+ (task_dir / "docker_build_run.sh").read_text() if (task_dir / "docker_build_run.sh").exists() else ""
307
+ )
308
+ docker_context = DockerContext(build_pkg_sh=pkg_sh, build_run_sh=run_sh)
309
+ except Exception:
310
+ logger.warning("Failed to read Docker context from workspace")
311
+
312
+ # Read failure.json if present
313
+ failure_json: dict | None = None
314
+ if failure_file.exists():
315
+ try:
316
+ failure_json = json.loads(failure_file.read_text())
317
+ except Exception:
318
+ logger.warning("Failed to parse failure.json")
319
+
320
+ if success:
321
+ logger.info("Sandbox synthesis succeeded")
322
+ else:
323
+ stage = failure_json.get("stage", "unknown") if failure_json else "unknown"
324
+ logger.warning("Sandbox synthesis failed at stage: %s", stage)
325
+ if stage == "unknown":
326
+ # No failure.json means sandbox_verify.py was never run or crashed.
327
+ # Log agent details to help diagnose why.
328
+ logger.warning(
329
+ "No failure.json found — agent likely never ran sandbox_verify.py. Agent error: %s",
330
+ codex_result.error[:500] if codex_result.error else "(none)",
331
+ )
332
+ if codex_result.output:
333
+ logger.info(
334
+ "Agent output (last 1000 chars): %s",
335
+ codex_result.output[-1000:],
336
+ )
337
+
338
+ # Extract resource_metrics from whichever JSON file was written
339
+ resource_metrics = _extract_resource_metrics(success_file, failure_file, failure_json)
340
+
341
+ return SandboxResult(
342
+ success=success,
343
+ docker_context=docker_context if success else None,
344
+ failure_json=failure_json,
345
+ agent_output=codex_result.output,
346
+ raw_agent_output=codex_result.raw_output,
347
+ agent_name=agent_name,
348
+ files_changed=codex_result.files_changed,
349
+ resource_metrics=resource_metrics,
350
+ )
351
+
352
+
353
+ def _extract_resource_metrics(
354
+ success_file: Path,
355
+ failure_file: Path,
356
+ failure_json: dict | None,
357
+ ) -> dict:
358
+ """Read ``resource_metrics`` from the verification JSON files.
359
+
360
+ ``sandbox_verify.py`` writes metrics into both ``verification_success.json``
361
+ and ``failure.json``. We check the success file first (authoritative on
362
+ success), then fall back to the failure JSON dict (already parsed by caller).
363
+ """
364
+ if success_file.exists():
365
+ try:
366
+ data = json.loads(success_file.read_text())
367
+ rm = data.get("resource_metrics")
368
+ if isinstance(rm, dict):
369
+ return dict(rm)
370
+ except Exception:
371
+ logger.debug("Failed to read resource_metrics from success file")
372
+ if isinstance(failure_json, dict):
373
+ metrics = failure_json.get("resource_metrics")
374
+ if isinstance(metrics, dict):
375
+ return metrics
376
+ return {}
377
+
378
+
379
+ def _generate_task_txt(
380
+ owner: str,
381
+ repo: str,
382
+ sha: str,
383
+ env_payload: str,
384
+ python_version: str,
385
+ repo_image: str = "",
386
+ ) -> str:
387
+ """Generate a task.txt file content."""
388
+ # Escape env_payload for repr
389
+ return (
390
+ f"Task(\n"
391
+ f" owner={owner!r},\n"
392
+ f" repo={repo!r},\n"
393
+ f" sha={sha!r},\n"
394
+ f" commit_date=0.0,\n"
395
+ f" env_payload={env_payload!r},\n"
396
+ f" python_version={python_version!r},\n"
397
+ f" tag='pkg',\n"
398
+ f" benchmarks='',\n"
399
+ f" repo_image={repo_image!r}\n"
400
+ f")\n"
401
+ )
402
+
403
+
404
+ def _render_agents_md(
405
+ owner: str,
406
+ repo: str,
407
+ sha: str,
408
+ python_version: str,
409
+ pr_context: str,
410
+ ) -> str:
411
+ """Render the AGENTS.md template with task-specific variables."""
412
+ env = Environment(
413
+ loader=FileSystemLoader(str(_TEMPLATES_DIR)),
414
+ keep_trailing_newline=True,
415
+ autoescape=False,
416
+ )
417
+ template = env.get_template("AGENTS.md.j2")
418
+ return template.render(
419
+ owner=owner,
420
+ repo=repo,
421
+ sha=sha,
422
+ python_version=python_version,
423
+ pr_context=pr_context,
424
+ )
425
+
426
+
427
+ def verify_context(
428
+ owner: str,
429
+ repo: str,
430
+ sha: str,
431
+ repo_image: str,
432
+ env_payload: str,
433
+ python_version: str,
434
+ context: DockerContext,
435
+ timeout_s: int = 3600,
436
+ ) -> SandboxResult:
437
+ """Build and verify a :class:`DockerContext` without launching an agent.
438
+
439
+ Used by ``Synthesizer.TRY_SIMILAR`` to test whether a previously
440
+ successful build context works for a new commit in the same repository.
441
+ """
442
+ start = time.time()
443
+ docker_templates = Path(__file__).parents[1] / "docker" / "templates"
444
+
445
+ with tempfile.TemporaryDirectory(prefix="verify-ctx-") as tmpdir:
446
+ workspace = Path(tmpdir)
447
+ task_dir = workspace / "task"
448
+ task_dir.mkdir(parents=True, exist_ok=True)
449
+
450
+ # Copy template files
451
+ for fname in (
452
+ "Dockerfile.pr",
453
+ "docker_build_base.sh",
454
+ "docker_build_env.sh",
455
+ "docker_build_pkg.sh",
456
+ "docker_build_run.sh",
457
+ "docker_build_final.sh",
458
+ "profile.sh",
459
+ "entrypoint.sh",
460
+ ):
461
+ src = docker_templates / fname
462
+ if src.exists():
463
+ shutil.copy2(str(src), str(task_dir / fname))
464
+
465
+ # Render run-tests.sh from Jinja2 template
466
+ run_tests_sh = _render_run_tests_sh(docker_templates, base_commit=sha)
467
+ (task_dir / "run-tests.sh").write_text(run_tests_sh)
468
+
469
+ # Write task.txt
470
+ task_txt = _generate_task_txt(owner, repo, sha, env_payload, python_version, repo_image)
471
+ (task_dir / "task.txt").write_text(task_txt)
472
+
473
+ # Override with the candidate context's editable scripts
474
+ if context.build_pkg_sh:
475
+ (task_dir / "docker_build_pkg.sh").write_text(context.build_pkg_sh)
476
+ if context.build_run_sh:
477
+ (task_dir / "docker_build_run.sh").write_text(context.build_run_sh)
478
+
479
+ # Copy sandbox_verify.py
480
+ src_verify = _TEMPLATES_DIR / "sandbox_verify.py"
481
+ shutil.copy2(str(src_verify), str(workspace / "sandbox_verify.py"))
482
+
483
+ # Run sandbox_verify.py directly (no agent)
484
+ try:
485
+ proc = subprocess.run(
486
+ [sys.executable, str(workspace / "sandbox_verify.py"), "--task", str(task_dir)],
487
+ capture_output=True,
488
+ text=True,
489
+ timeout=timeout_s,
490
+ )
491
+ output = proc.stdout
492
+ except subprocess.TimeoutExpired:
493
+ return SandboxResult(
494
+ success=False,
495
+ failure_json={
496
+ "stage": "timeout",
497
+ "return_code": 124,
498
+ "error_message": f"Verification timed out after {timeout_s}s",
499
+ },
500
+ duration_s=time.time() - start,
501
+ agent_output=f"Timed out after {timeout_s}s",
502
+ )
503
+
504
+ # Read results
505
+ success_file = task_dir / "verification_success.json"
506
+ failure_file = task_dir / "failure.json"
507
+
508
+ success = success_file.exists()
509
+
510
+ failure_json: dict | None = None
511
+ if failure_file.exists():
512
+ try:
513
+ failure_json = json.loads(failure_file.read_text())
514
+ except Exception:
515
+ logger.debug("Failed to parse failure.json in verify_context")
516
+
517
+ resource_metrics = _extract_resource_metrics(success_file, failure_file, failure_json)
518
+
519
+ return SandboxResult(
520
+ success=success,
521
+ docker_context=context if success else None,
522
+ failure_json=failure_json,
523
+ duration_s=time.time() - start,
524
+ agent_output=output,
525
+ resource_metrics=resource_metrics,
526
+ )
527
+
528
+
529
+ def _render_run_tests_sh(docker_templates: Path, base_commit: str) -> str:
530
+ """Render the run-tests.sh Jinja2 template with embedded scripts."""
531
+ env = Environment(
532
+ loader=FileSystemLoader(str(docker_templates)),
533
+ keep_trailing_newline=True,
534
+ autoescape=False,
535
+ )
536
+ template = env.get_template("run-tests.sh")
537
+
538
+ # Read the embedded scripts
539
+ pytest_runner = (docker_templates / "pytest_runner.py").read_text()
540
+ parser = (docker_templates / "parser.py").read_text()
541
+
542
+ return template.render(
543
+ base_commit=base_commit,
544
+ pytest_runner=pytest_runner,
545
+ parser=parser,
546
+ run_pytest=True,
547
+ )