fc-data 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datasmith/__init__.py +330 -0
- datasmith/__init__.pyi +194 -0
- datasmith/agents/__init__.py +31 -0
- datasmith/agents/classifiers.py +272 -0
- datasmith/agents/codex.py +25 -0
- datasmith/agents/config.py +108 -0
- datasmith/agents/extractors.py +197 -0
- datasmith/agents/installed/README.md +52 -0
- datasmith/agents/installed/__init__.py +22 -0
- datasmith/agents/installed/base.py +240 -0
- datasmith/agents/installed/claude.py +134 -0
- datasmith/agents/installed/codex.py +91 -0
- datasmith/agents/installed/gemini.py +118 -0
- datasmith/agents/installed/none.py +27 -0
- datasmith/agents/sandbox.py +547 -0
- datasmith/agents/synthesizer.py +439 -0
- datasmith/agents/templates/AGENTS.md.j2 +150 -0
- datasmith/agents/templates/sandbox_verify.py +428 -0
- datasmith/docker/__init__.py +31 -0
- datasmith/docker/context.py +112 -0
- datasmith/docker/images.py +158 -0
- datasmith/docker/publish.py +56 -0
- datasmith/docker/templates/Dockerfile.base +26 -0
- datasmith/docker/templates/Dockerfile.pr +42 -0
- datasmith/docker/templates/Dockerfile.repo +11 -0
- datasmith/docker/templates/docker_build_base.sh +780 -0
- datasmith/docker/templates/docker_build_env.sh +309 -0
- datasmith/docker/templates/docker_build_final.sh +106 -0
- datasmith/docker/templates/docker_build_pkg.sh +99 -0
- datasmith/docker/templates/docker_build_run.sh +124 -0
- datasmith/docker/templates/entrypoint.sh +62 -0
- datasmith/docker/templates/parser.py +1405 -0
- datasmith/docker/templates/profile.sh +199 -0
- datasmith/docker/templates/pytest_runner.py +692 -0
- datasmith/docker/templates/run-tests.sh +197 -0
- datasmith/docker/verifiers.py +131 -0
- datasmith/filters.py +154 -0
- datasmith/github/__init__.py +22 -0
- datasmith/github/client.py +333 -0
- datasmith/github/hooks.py +50 -0
- datasmith/github/links.py +110 -0
- datasmith/github/models.py +206 -0
- datasmith/github/render.py +173 -0
- datasmith/github/search.py +66 -0
- datasmith/github/templates/comment.md.j2 +5 -0
- datasmith/github/templates/final.md.j2 +66 -0
- datasmith/github/templates/issues.md.j2 +21 -0
- datasmith/github/templates/repo.md.j2 +1 -0
- datasmith/preflight.py +162 -0
- datasmith/publish/__init__.py +13 -0
- datasmith/publish/huggingface.py +104 -0
- datasmith/publish/pipeline.py +60 -0
- datasmith/publish/records.py +91 -0
- datasmith/py.typed +1 -0
- datasmith/resolution/__init__.py +14 -0
- datasmith/resolution/blocklist.py +145 -0
- datasmith/resolution/cache.py +120 -0
- datasmith/resolution/constants.py +277 -0
- datasmith/resolution/dependency_resolver.py +174 -0
- datasmith/resolution/git_utils.py +378 -0
- datasmith/resolution/import_analyzer.py +66 -0
- datasmith/resolution/metadata_parser.py +412 -0
- datasmith/resolution/models.py +41 -0
- datasmith/resolution/orchestrator.py +522 -0
- datasmith/resolution/package_filters.py +312 -0
- datasmith/resolution/python_manager.py +110 -0
- datasmith/runners/__init__.py +15 -0
- datasmith/runners/base.py +112 -0
- datasmith/runners/classify_prs.py +48 -0
- datasmith/runners/render_problems.py +113 -0
- datasmith/runners/resolve_packages.py +66 -0
- datasmith/runners/scrape_commits.py +166 -0
- datasmith/runners/scrape_repos.py +44 -0
- datasmith/runners/synthesize_images.py +310 -0
- datasmith/update/__init__.py +5 -0
- datasmith/update/cli.py +169 -0
- datasmith/update/offline.py +173 -0
- datasmith/update/pipeline.py +497 -0
- datasmith/utils/__init__.py +18 -0
- datasmith/utils/core.py +67 -0
- datasmith/utils/db.py +156 -0
- datasmith/utils/tokens.py +65 -0
- fc_data-0.2.0.dist-info/METADATA +441 -0
- fc_data-0.2.0.dist-info/RECORD +87 -0
- fc_data-0.2.0.dist-info/WHEEL +4 -0
- fc_data-0.2.0.dist-info/entry_points.txt +2 -0
- fc_data-0.2.0.dist-info/licenses/LICENSE +28 -0
|
@@ -0,0 +1,547 @@
|
|
|
1
|
+
"""Sandboxed synthesis via an installed CLI agent.
|
|
2
|
+
|
|
3
|
+
Prepares a temporary workspace with Docker build context files, an AGENTS.md
|
|
4
|
+
guide, and a simplified verify script, then launches the first available
|
|
5
|
+
installed agent (Claude Code, Codex, or Gemini CLI). The agent iterates
|
|
6
|
+
internally — reading failure.json, editing build scripts, re-running
|
|
7
|
+
verification — until it succeeds or the session times out.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import hashlib
|
|
13
|
+
import json
|
|
14
|
+
import shutil
|
|
15
|
+
import subprocess
|
|
16
|
+
import sys
|
|
17
|
+
import tempfile
|
|
18
|
+
import time
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
from jinja2 import Environment, FileSystemLoader
|
|
23
|
+
|
|
24
|
+
from datasmith.agents.installed import AgentResult, get_agent
|
|
25
|
+
from datasmith.docker.context import DockerContext
|
|
26
|
+
from datasmith.utils import get_logger
|
|
27
|
+
|
|
28
|
+
logger = get_logger("agents.sandbox")
|
|
29
|
+
|
|
30
|
+
_TEMPLATES_DIR = Path(__file__).parent / "templates"
|
|
31
|
+
|
|
32
|
+
# Files the agent must NOT modify. Hashes are recorded at workspace setup
|
|
33
|
+
# and verified both by sandbox_verify.py (so the agent gets feedback) and by
|
|
34
|
+
# _extract_results (hard server-side check the agent cannot bypass).
|
|
35
|
+
_IMMUTABLE_FILES = (
|
|
36
|
+
"Dockerfile.pr",
|
|
37
|
+
"docker_build_base.sh",
|
|
38
|
+
"docker_build_env.sh",
|
|
39
|
+
"docker_build_final.sh",
|
|
40
|
+
"profile.sh",
|
|
41
|
+
"run-tests.sh",
|
|
42
|
+
"entrypoint.sh",
|
|
43
|
+
"task.txt",
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _compute_immutable_hashes(task_dir: Path) -> dict[str, str]:
|
|
48
|
+
"""Compute MD5 hashes of all immutable files in *task_dir*."""
|
|
49
|
+
hashes: dict[str, str] = {}
|
|
50
|
+
for fname in _IMMUTABLE_FILES:
|
|
51
|
+
fp = task_dir / fname
|
|
52
|
+
if fp.exists():
|
|
53
|
+
hashes[fname] = hashlib.md5(fp.read_bytes()).hexdigest() # noqa: S324
|
|
54
|
+
return hashes
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class SandboxConfig:
|
|
59
|
+
"""Configuration for the Codex sandbox runner."""
|
|
60
|
+
|
|
61
|
+
timeout_s: int = 3600
|
|
62
|
+
"""Total wall-clock timeout for the codex session (seconds)."""
|
|
63
|
+
|
|
64
|
+
codex_timeout_s: int = 3600
|
|
65
|
+
"""Timeout passed to subprocess.run for the codex process (seconds)."""
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class SandboxResult:
|
|
70
|
+
"""Outcome of a sandbox synthesis run."""
|
|
71
|
+
|
|
72
|
+
success: bool
|
|
73
|
+
docker_context: DockerContext | None = None
|
|
74
|
+
failure_json: dict | None = None
|
|
75
|
+
duration_s: float = 0.0
|
|
76
|
+
agent_output: str = ""
|
|
77
|
+
raw_agent_output: str = ""
|
|
78
|
+
agent_name: str = ""
|
|
79
|
+
files_changed: list[str] = field(default_factory=list)
|
|
80
|
+
resource_metrics: dict = field(default_factory=dict)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class SandboxRunner:
|
|
84
|
+
"""Launch an installed CLI agent in a sandboxed workspace to iteratively fix Docker builds."""
|
|
85
|
+
|
|
86
|
+
def __init__(self, config: SandboxConfig | None = None, agent: str | None = None) -> None:
|
|
87
|
+
self._config = config or SandboxConfig()
|
|
88
|
+
self._agent = agent
|
|
89
|
+
|
|
90
|
+
def run(
|
|
91
|
+
self,
|
|
92
|
+
owner: str,
|
|
93
|
+
repo: str,
|
|
94
|
+
sha: str,
|
|
95
|
+
repo_image: str,
|
|
96
|
+
env_payload: str,
|
|
97
|
+
python_version: str,
|
|
98
|
+
pr_context: str,
|
|
99
|
+
prior_attempts: str = "",
|
|
100
|
+
dry_run: bool = False,
|
|
101
|
+
) -> SandboxResult:
|
|
102
|
+
"""Prepare workspace, launch agent, extract results.
|
|
103
|
+
|
|
104
|
+
Returns a ``SandboxResult`` indicating success/failure and the
|
|
105
|
+
(potentially modified) ``DockerContext``.
|
|
106
|
+
"""
|
|
107
|
+
start = time.time()
|
|
108
|
+
|
|
109
|
+
with tempfile.TemporaryDirectory(prefix="synthesis-") as tmpdir:
|
|
110
|
+
workspace = Path(tmpdir)
|
|
111
|
+
|
|
112
|
+
# 1. Prepare workspace
|
|
113
|
+
self._prepare_workspace(
|
|
114
|
+
workspace=workspace,
|
|
115
|
+
owner=owner,
|
|
116
|
+
repo=repo,
|
|
117
|
+
sha=sha,
|
|
118
|
+
repo_image=repo_image,
|
|
119
|
+
env_payload=env_payload,
|
|
120
|
+
python_version=python_version,
|
|
121
|
+
pr_context=pr_context,
|
|
122
|
+
prior_attempts=prior_attempts,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# 2. Init git repo (Codex requirement)
|
|
126
|
+
self._init_git(workspace)
|
|
127
|
+
|
|
128
|
+
if dry_run:
|
|
129
|
+
logger.info(
|
|
130
|
+
"[DRY RUN] Would launch agent sandbox for %s/%s@%s in %s",
|
|
131
|
+
owner,
|
|
132
|
+
repo,
|
|
133
|
+
sha[:12],
|
|
134
|
+
workspace,
|
|
135
|
+
)
|
|
136
|
+
return SandboxResult(
|
|
137
|
+
success=True,
|
|
138
|
+
docker_context=DockerContext(),
|
|
139
|
+
duration_s=time.time() - start,
|
|
140
|
+
agent_output="[dry run — no execution]",
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# 3. Launch agent
|
|
144
|
+
agent_name, agent_result = self._launch_agent(workspace)
|
|
145
|
+
|
|
146
|
+
# 4. Extract results
|
|
147
|
+
result = self._extract_results(workspace, agent_result, agent_name)
|
|
148
|
+
result.duration_s = time.time() - start
|
|
149
|
+
return result
|
|
150
|
+
|
|
151
|
+
def _prepare_workspace(
|
|
152
|
+
self,
|
|
153
|
+
workspace: Path,
|
|
154
|
+
owner: str,
|
|
155
|
+
repo: str,
|
|
156
|
+
sha: str,
|
|
157
|
+
repo_image: str,
|
|
158
|
+
env_payload: str,
|
|
159
|
+
python_version: str,
|
|
160
|
+
pr_context: str,
|
|
161
|
+
prior_attempts: str = "",
|
|
162
|
+
) -> None:
|
|
163
|
+
"""Create the workspace directory structure."""
|
|
164
|
+
task_dir = workspace / "task"
|
|
165
|
+
task_dir.mkdir(parents=True, exist_ok=True)
|
|
166
|
+
|
|
167
|
+
# Copy ALL template files from docker/templates/ into task/
|
|
168
|
+
docker_templates = Path(__file__).parents[1] / "docker" / "templates"
|
|
169
|
+
for fname in (
|
|
170
|
+
"Dockerfile.pr",
|
|
171
|
+
"docker_build_base.sh",
|
|
172
|
+
"docker_build_env.sh",
|
|
173
|
+
"docker_build_pkg.sh",
|
|
174
|
+
"docker_build_run.sh",
|
|
175
|
+
"docker_build_final.sh",
|
|
176
|
+
"profile.sh",
|
|
177
|
+
"entrypoint.sh",
|
|
178
|
+
):
|
|
179
|
+
src = docker_templates / fname
|
|
180
|
+
if src.exists():
|
|
181
|
+
shutil.copy2(str(src), str(task_dir / fname))
|
|
182
|
+
|
|
183
|
+
# Render run-tests.sh from Jinja2 template with embedded scripts
|
|
184
|
+
run_tests_sh = _render_run_tests_sh(docker_templates, base_commit=sha)
|
|
185
|
+
(task_dir / "run-tests.sh").write_text(run_tests_sh)
|
|
186
|
+
|
|
187
|
+
# Generate task.txt
|
|
188
|
+
task_txt = _generate_task_txt(owner, repo, sha, env_payload, python_version, repo_image)
|
|
189
|
+
(task_dir / "task.txt").write_text(task_txt)
|
|
190
|
+
|
|
191
|
+
# Render AGENTS.md from Jinja2 template
|
|
192
|
+
agents_md = _render_agents_md(
|
|
193
|
+
owner=owner,
|
|
194
|
+
repo=repo,
|
|
195
|
+
sha=sha,
|
|
196
|
+
python_version=python_version,
|
|
197
|
+
pr_context=pr_context,
|
|
198
|
+
)
|
|
199
|
+
(workspace / "AGENTS.md").write_text(agents_md)
|
|
200
|
+
|
|
201
|
+
# Copy sandbox_verify.py
|
|
202
|
+
src_verify = _TEMPLATES_DIR / "sandbox_verify.py"
|
|
203
|
+
shutil.copy2(str(src_verify), str(workspace / "sandbox_verify.py"))
|
|
204
|
+
|
|
205
|
+
# Write prior attempts context (from failed TRY_SIMILAR stage)
|
|
206
|
+
if prior_attempts:
|
|
207
|
+
(workspace / "prior_attempts.md").write_text(prior_attempts)
|
|
208
|
+
|
|
209
|
+
# Record immutable file hashes so sandbox_verify.py and
|
|
210
|
+
# _extract_results can detect unauthorised modifications.
|
|
211
|
+
hashes = _compute_immutable_hashes(task_dir)
|
|
212
|
+
(workspace / ".immutable_hashes.json").write_text(json.dumps(hashes))
|
|
213
|
+
|
|
214
|
+
def _init_git(self, workspace: Path) -> None:
|
|
215
|
+
"""Initialize a git repo in the workspace (required by Codex)."""
|
|
216
|
+
subprocess.run(
|
|
217
|
+
["git", "init"],
|
|
218
|
+
cwd=str(workspace),
|
|
219
|
+
capture_output=True,
|
|
220
|
+
check=True,
|
|
221
|
+
)
|
|
222
|
+
subprocess.run(
|
|
223
|
+
["git", "add", "-A"],
|
|
224
|
+
cwd=str(workspace),
|
|
225
|
+
capture_output=True,
|
|
226
|
+
check=True,
|
|
227
|
+
)
|
|
228
|
+
subprocess.run(
|
|
229
|
+
[
|
|
230
|
+
"git",
|
|
231
|
+
"-c",
|
|
232
|
+
"user.name=sandbox",
|
|
233
|
+
"-c",
|
|
234
|
+
"user.email=sandbox@local",
|
|
235
|
+
"commit",
|
|
236
|
+
"-m",
|
|
237
|
+
"init",
|
|
238
|
+
],
|
|
239
|
+
cwd=str(workspace),
|
|
240
|
+
capture_output=True,
|
|
241
|
+
check=True,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
def _launch_agent(self, workspace: Path) -> tuple[str, AgentResult]:
|
|
245
|
+
"""Launch the first available installed CLI agent in the workspace.
|
|
246
|
+
|
|
247
|
+
Returns ``(agent_name, AgentResult)``.
|
|
248
|
+
"""
|
|
249
|
+
preference = [self._agent] if self._agent else None
|
|
250
|
+
agent = get_agent(preference=preference)
|
|
251
|
+
logger.info("Launching %s agent sandbox in %s", agent.name(), workspace)
|
|
252
|
+
result = agent.exec(
|
|
253
|
+
prompt="Read AGENTS.md and follow its instructions to fix the Docker build.",
|
|
254
|
+
timeout=self._config.codex_timeout_s,
|
|
255
|
+
workdir=str(workspace),
|
|
256
|
+
)
|
|
257
|
+
logger.info(
|
|
258
|
+
"Agent %s exited (success=%s, duration=%.1fs, output_len=%d, error=%s)",
|
|
259
|
+
agent.name(),
|
|
260
|
+
result.success,
|
|
261
|
+
result.duration_s,
|
|
262
|
+
len(result.output),
|
|
263
|
+
result.error[:200] if result.error else "",
|
|
264
|
+
)
|
|
265
|
+
return agent.name(), result
|
|
266
|
+
|
|
267
|
+
def _extract_results(self, workspace: Path, codex_result: AgentResult, agent_name: str = "") -> SandboxResult:
|
|
268
|
+
"""Read workspace state after the agent exits to build the result."""
|
|
269
|
+
task_dir = workspace / "task"
|
|
270
|
+
|
|
271
|
+
# Hard integrity check — the agent cannot bypass this even if it
|
|
272
|
+
# modifies sandbox_verify.py or writes a fake success file.
|
|
273
|
+
hashes_file = workspace / ".immutable_hashes.json"
|
|
274
|
+
if hashes_file.exists():
|
|
275
|
+
expected = json.loads(hashes_file.read_text())
|
|
276
|
+
current = _compute_immutable_hashes(task_dir)
|
|
277
|
+
modified = [f for f in expected if expected[f] != current.get(f, "")]
|
|
278
|
+
if modified:
|
|
279
|
+
logger.warning("File integrity violation: %s", ", ".join(modified))
|
|
280
|
+
return SandboxResult(
|
|
281
|
+
success=False,
|
|
282
|
+
failure_json={
|
|
283
|
+
"stage": "integrity",
|
|
284
|
+
"return_code": 1,
|
|
285
|
+
"error_message": f"Agent modified immutable files: {', '.join(modified)}",
|
|
286
|
+
},
|
|
287
|
+
agent_output=codex_result.output,
|
|
288
|
+
raw_agent_output=codex_result.raw_output,
|
|
289
|
+
agent_name=agent_name,
|
|
290
|
+
files_changed=codex_result.files_changed,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
# Check for success
|
|
294
|
+
success_file = task_dir / "verification_success.json"
|
|
295
|
+
failure_file = task_dir / "failure.json"
|
|
296
|
+
|
|
297
|
+
success = success_file.exists()
|
|
298
|
+
|
|
299
|
+
# Read back only the two agent-editable scripts (the rest are templates)
|
|
300
|
+
docker_context: DockerContext | None = None
|
|
301
|
+
try:
|
|
302
|
+
pkg_sh = (
|
|
303
|
+
(task_dir / "docker_build_pkg.sh").read_text() if (task_dir / "docker_build_pkg.sh").exists() else ""
|
|
304
|
+
)
|
|
305
|
+
run_sh = (
|
|
306
|
+
(task_dir / "docker_build_run.sh").read_text() if (task_dir / "docker_build_run.sh").exists() else ""
|
|
307
|
+
)
|
|
308
|
+
docker_context = DockerContext(build_pkg_sh=pkg_sh, build_run_sh=run_sh)
|
|
309
|
+
except Exception:
|
|
310
|
+
logger.warning("Failed to read Docker context from workspace")
|
|
311
|
+
|
|
312
|
+
# Read failure.json if present
|
|
313
|
+
failure_json: dict | None = None
|
|
314
|
+
if failure_file.exists():
|
|
315
|
+
try:
|
|
316
|
+
failure_json = json.loads(failure_file.read_text())
|
|
317
|
+
except Exception:
|
|
318
|
+
logger.warning("Failed to parse failure.json")
|
|
319
|
+
|
|
320
|
+
if success:
|
|
321
|
+
logger.info("Sandbox synthesis succeeded")
|
|
322
|
+
else:
|
|
323
|
+
stage = failure_json.get("stage", "unknown") if failure_json else "unknown"
|
|
324
|
+
logger.warning("Sandbox synthesis failed at stage: %s", stage)
|
|
325
|
+
if stage == "unknown":
|
|
326
|
+
# No failure.json means sandbox_verify.py was never run or crashed.
|
|
327
|
+
# Log agent details to help diagnose why.
|
|
328
|
+
logger.warning(
|
|
329
|
+
"No failure.json found — agent likely never ran sandbox_verify.py. Agent error: %s",
|
|
330
|
+
codex_result.error[:500] if codex_result.error else "(none)",
|
|
331
|
+
)
|
|
332
|
+
if codex_result.output:
|
|
333
|
+
logger.info(
|
|
334
|
+
"Agent output (last 1000 chars): %s",
|
|
335
|
+
codex_result.output[-1000:],
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
# Extract resource_metrics from whichever JSON file was written
|
|
339
|
+
resource_metrics = _extract_resource_metrics(success_file, failure_file, failure_json)
|
|
340
|
+
|
|
341
|
+
return SandboxResult(
|
|
342
|
+
success=success,
|
|
343
|
+
docker_context=docker_context if success else None,
|
|
344
|
+
failure_json=failure_json,
|
|
345
|
+
agent_output=codex_result.output,
|
|
346
|
+
raw_agent_output=codex_result.raw_output,
|
|
347
|
+
agent_name=agent_name,
|
|
348
|
+
files_changed=codex_result.files_changed,
|
|
349
|
+
resource_metrics=resource_metrics,
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _extract_resource_metrics(
|
|
354
|
+
success_file: Path,
|
|
355
|
+
failure_file: Path,
|
|
356
|
+
failure_json: dict | None,
|
|
357
|
+
) -> dict:
|
|
358
|
+
"""Read ``resource_metrics`` from the verification JSON files.
|
|
359
|
+
|
|
360
|
+
``sandbox_verify.py`` writes metrics into both ``verification_success.json``
|
|
361
|
+
and ``failure.json``. We check the success file first (authoritative on
|
|
362
|
+
success), then fall back to the failure JSON dict (already parsed by caller).
|
|
363
|
+
"""
|
|
364
|
+
if success_file.exists():
|
|
365
|
+
try:
|
|
366
|
+
data = json.loads(success_file.read_text())
|
|
367
|
+
rm = data.get("resource_metrics")
|
|
368
|
+
if isinstance(rm, dict):
|
|
369
|
+
return dict(rm)
|
|
370
|
+
except Exception:
|
|
371
|
+
logger.debug("Failed to read resource_metrics from success file")
|
|
372
|
+
if isinstance(failure_json, dict):
|
|
373
|
+
metrics = failure_json.get("resource_metrics")
|
|
374
|
+
if isinstance(metrics, dict):
|
|
375
|
+
return metrics
|
|
376
|
+
return {}
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _generate_task_txt(
|
|
380
|
+
owner: str,
|
|
381
|
+
repo: str,
|
|
382
|
+
sha: str,
|
|
383
|
+
env_payload: str,
|
|
384
|
+
python_version: str,
|
|
385
|
+
repo_image: str = "",
|
|
386
|
+
) -> str:
|
|
387
|
+
"""Generate a task.txt file content."""
|
|
388
|
+
# Escape env_payload for repr
|
|
389
|
+
return (
|
|
390
|
+
f"Task(\n"
|
|
391
|
+
f" owner={owner!r},\n"
|
|
392
|
+
f" repo={repo!r},\n"
|
|
393
|
+
f" sha={sha!r},\n"
|
|
394
|
+
f" commit_date=0.0,\n"
|
|
395
|
+
f" env_payload={env_payload!r},\n"
|
|
396
|
+
f" python_version={python_version!r},\n"
|
|
397
|
+
f" tag='pkg',\n"
|
|
398
|
+
f" benchmarks='',\n"
|
|
399
|
+
f" repo_image={repo_image!r}\n"
|
|
400
|
+
f")\n"
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def _render_agents_md(
|
|
405
|
+
owner: str,
|
|
406
|
+
repo: str,
|
|
407
|
+
sha: str,
|
|
408
|
+
python_version: str,
|
|
409
|
+
pr_context: str,
|
|
410
|
+
) -> str:
|
|
411
|
+
"""Render the AGENTS.md template with task-specific variables."""
|
|
412
|
+
env = Environment(
|
|
413
|
+
loader=FileSystemLoader(str(_TEMPLATES_DIR)),
|
|
414
|
+
keep_trailing_newline=True,
|
|
415
|
+
autoescape=False,
|
|
416
|
+
)
|
|
417
|
+
template = env.get_template("AGENTS.md.j2")
|
|
418
|
+
return template.render(
|
|
419
|
+
owner=owner,
|
|
420
|
+
repo=repo,
|
|
421
|
+
sha=sha,
|
|
422
|
+
python_version=python_version,
|
|
423
|
+
pr_context=pr_context,
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def verify_context(
|
|
428
|
+
owner: str,
|
|
429
|
+
repo: str,
|
|
430
|
+
sha: str,
|
|
431
|
+
repo_image: str,
|
|
432
|
+
env_payload: str,
|
|
433
|
+
python_version: str,
|
|
434
|
+
context: DockerContext,
|
|
435
|
+
timeout_s: int = 3600,
|
|
436
|
+
) -> SandboxResult:
|
|
437
|
+
"""Build and verify a :class:`DockerContext` without launching an agent.
|
|
438
|
+
|
|
439
|
+
Used by ``Synthesizer.TRY_SIMILAR`` to test whether a previously
|
|
440
|
+
successful build context works for a new commit in the same repository.
|
|
441
|
+
"""
|
|
442
|
+
start = time.time()
|
|
443
|
+
docker_templates = Path(__file__).parents[1] / "docker" / "templates"
|
|
444
|
+
|
|
445
|
+
with tempfile.TemporaryDirectory(prefix="verify-ctx-") as tmpdir:
|
|
446
|
+
workspace = Path(tmpdir)
|
|
447
|
+
task_dir = workspace / "task"
|
|
448
|
+
task_dir.mkdir(parents=True, exist_ok=True)
|
|
449
|
+
|
|
450
|
+
# Copy template files
|
|
451
|
+
for fname in (
|
|
452
|
+
"Dockerfile.pr",
|
|
453
|
+
"docker_build_base.sh",
|
|
454
|
+
"docker_build_env.sh",
|
|
455
|
+
"docker_build_pkg.sh",
|
|
456
|
+
"docker_build_run.sh",
|
|
457
|
+
"docker_build_final.sh",
|
|
458
|
+
"profile.sh",
|
|
459
|
+
"entrypoint.sh",
|
|
460
|
+
):
|
|
461
|
+
src = docker_templates / fname
|
|
462
|
+
if src.exists():
|
|
463
|
+
shutil.copy2(str(src), str(task_dir / fname))
|
|
464
|
+
|
|
465
|
+
# Render run-tests.sh from Jinja2 template
|
|
466
|
+
run_tests_sh = _render_run_tests_sh(docker_templates, base_commit=sha)
|
|
467
|
+
(task_dir / "run-tests.sh").write_text(run_tests_sh)
|
|
468
|
+
|
|
469
|
+
# Write task.txt
|
|
470
|
+
task_txt = _generate_task_txt(owner, repo, sha, env_payload, python_version, repo_image)
|
|
471
|
+
(task_dir / "task.txt").write_text(task_txt)
|
|
472
|
+
|
|
473
|
+
# Override with the candidate context's editable scripts
|
|
474
|
+
if context.build_pkg_sh:
|
|
475
|
+
(task_dir / "docker_build_pkg.sh").write_text(context.build_pkg_sh)
|
|
476
|
+
if context.build_run_sh:
|
|
477
|
+
(task_dir / "docker_build_run.sh").write_text(context.build_run_sh)
|
|
478
|
+
|
|
479
|
+
# Copy sandbox_verify.py
|
|
480
|
+
src_verify = _TEMPLATES_DIR / "sandbox_verify.py"
|
|
481
|
+
shutil.copy2(str(src_verify), str(workspace / "sandbox_verify.py"))
|
|
482
|
+
|
|
483
|
+
# Run sandbox_verify.py directly (no agent)
|
|
484
|
+
try:
|
|
485
|
+
proc = subprocess.run(
|
|
486
|
+
[sys.executable, str(workspace / "sandbox_verify.py"), "--task", str(task_dir)],
|
|
487
|
+
capture_output=True,
|
|
488
|
+
text=True,
|
|
489
|
+
timeout=timeout_s,
|
|
490
|
+
)
|
|
491
|
+
output = proc.stdout
|
|
492
|
+
except subprocess.TimeoutExpired:
|
|
493
|
+
return SandboxResult(
|
|
494
|
+
success=False,
|
|
495
|
+
failure_json={
|
|
496
|
+
"stage": "timeout",
|
|
497
|
+
"return_code": 124,
|
|
498
|
+
"error_message": f"Verification timed out after {timeout_s}s",
|
|
499
|
+
},
|
|
500
|
+
duration_s=time.time() - start,
|
|
501
|
+
agent_output=f"Timed out after {timeout_s}s",
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
# Read results
|
|
505
|
+
success_file = task_dir / "verification_success.json"
|
|
506
|
+
failure_file = task_dir / "failure.json"
|
|
507
|
+
|
|
508
|
+
success = success_file.exists()
|
|
509
|
+
|
|
510
|
+
failure_json: dict | None = None
|
|
511
|
+
if failure_file.exists():
|
|
512
|
+
try:
|
|
513
|
+
failure_json = json.loads(failure_file.read_text())
|
|
514
|
+
except Exception:
|
|
515
|
+
logger.debug("Failed to parse failure.json in verify_context")
|
|
516
|
+
|
|
517
|
+
resource_metrics = _extract_resource_metrics(success_file, failure_file, failure_json)
|
|
518
|
+
|
|
519
|
+
return SandboxResult(
|
|
520
|
+
success=success,
|
|
521
|
+
docker_context=context if success else None,
|
|
522
|
+
failure_json=failure_json,
|
|
523
|
+
duration_s=time.time() - start,
|
|
524
|
+
agent_output=output,
|
|
525
|
+
resource_metrics=resource_metrics,
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def _render_run_tests_sh(docker_templates: Path, base_commit: str) -> str:
|
|
530
|
+
"""Render the run-tests.sh Jinja2 template with embedded scripts."""
|
|
531
|
+
env = Environment(
|
|
532
|
+
loader=FileSystemLoader(str(docker_templates)),
|
|
533
|
+
keep_trailing_newline=True,
|
|
534
|
+
autoescape=False,
|
|
535
|
+
)
|
|
536
|
+
template = env.get_template("run-tests.sh")
|
|
537
|
+
|
|
538
|
+
# Read the embedded scripts
|
|
539
|
+
pytest_runner = (docker_templates / "pytest_runner.py").read_text()
|
|
540
|
+
parser = (docker_templates / "parser.py").read_text()
|
|
541
|
+
|
|
542
|
+
return template.render(
|
|
543
|
+
base_commit=base_commit,
|
|
544
|
+
pytest_runner=pytest_runner,
|
|
545
|
+
parser=parser,
|
|
546
|
+
run_pytest=True,
|
|
547
|
+
)
|