fc-data 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datasmith/__init__.py +330 -0
- datasmith/__init__.pyi +194 -0
- datasmith/agents/__init__.py +31 -0
- datasmith/agents/classifiers.py +272 -0
- datasmith/agents/codex.py +25 -0
- datasmith/agents/config.py +108 -0
- datasmith/agents/extractors.py +197 -0
- datasmith/agents/installed/README.md +52 -0
- datasmith/agents/installed/__init__.py +22 -0
- datasmith/agents/installed/base.py +240 -0
- datasmith/agents/installed/claude.py +134 -0
- datasmith/agents/installed/codex.py +91 -0
- datasmith/agents/installed/gemini.py +118 -0
- datasmith/agents/installed/none.py +27 -0
- datasmith/agents/sandbox.py +547 -0
- datasmith/agents/synthesizer.py +439 -0
- datasmith/agents/templates/AGENTS.md.j2 +150 -0
- datasmith/agents/templates/sandbox_verify.py +428 -0
- datasmith/docker/__init__.py +31 -0
- datasmith/docker/context.py +112 -0
- datasmith/docker/images.py +158 -0
- datasmith/docker/publish.py +56 -0
- datasmith/docker/templates/Dockerfile.base +26 -0
- datasmith/docker/templates/Dockerfile.pr +42 -0
- datasmith/docker/templates/Dockerfile.repo +11 -0
- datasmith/docker/templates/docker_build_base.sh +780 -0
- datasmith/docker/templates/docker_build_env.sh +309 -0
- datasmith/docker/templates/docker_build_final.sh +106 -0
- datasmith/docker/templates/docker_build_pkg.sh +99 -0
- datasmith/docker/templates/docker_build_run.sh +124 -0
- datasmith/docker/templates/entrypoint.sh +62 -0
- datasmith/docker/templates/parser.py +1405 -0
- datasmith/docker/templates/profile.sh +199 -0
- datasmith/docker/templates/pytest_runner.py +692 -0
- datasmith/docker/templates/run-tests.sh +197 -0
- datasmith/docker/verifiers.py +131 -0
- datasmith/filters.py +154 -0
- datasmith/github/__init__.py +22 -0
- datasmith/github/client.py +333 -0
- datasmith/github/hooks.py +50 -0
- datasmith/github/links.py +110 -0
- datasmith/github/models.py +206 -0
- datasmith/github/render.py +173 -0
- datasmith/github/search.py +66 -0
- datasmith/github/templates/comment.md.j2 +5 -0
- datasmith/github/templates/final.md.j2 +66 -0
- datasmith/github/templates/issues.md.j2 +21 -0
- datasmith/github/templates/repo.md.j2 +1 -0
- datasmith/preflight.py +162 -0
- datasmith/publish/__init__.py +13 -0
- datasmith/publish/huggingface.py +104 -0
- datasmith/publish/pipeline.py +60 -0
- datasmith/publish/records.py +91 -0
- datasmith/py.typed +1 -0
- datasmith/resolution/__init__.py +14 -0
- datasmith/resolution/blocklist.py +145 -0
- datasmith/resolution/cache.py +120 -0
- datasmith/resolution/constants.py +277 -0
- datasmith/resolution/dependency_resolver.py +174 -0
- datasmith/resolution/git_utils.py +378 -0
- datasmith/resolution/import_analyzer.py +66 -0
- datasmith/resolution/metadata_parser.py +412 -0
- datasmith/resolution/models.py +41 -0
- datasmith/resolution/orchestrator.py +522 -0
- datasmith/resolution/package_filters.py +312 -0
- datasmith/resolution/python_manager.py +110 -0
- datasmith/runners/__init__.py +15 -0
- datasmith/runners/base.py +112 -0
- datasmith/runners/classify_prs.py +48 -0
- datasmith/runners/render_problems.py +113 -0
- datasmith/runners/resolve_packages.py +66 -0
- datasmith/runners/scrape_commits.py +166 -0
- datasmith/runners/scrape_repos.py +44 -0
- datasmith/runners/synthesize_images.py +310 -0
- datasmith/update/__init__.py +5 -0
- datasmith/update/cli.py +169 -0
- datasmith/update/offline.py +173 -0
- datasmith/update/pipeline.py +497 -0
- datasmith/utils/__init__.py +18 -0
- datasmith/utils/core.py +67 -0
- datasmith/utils/db.py +156 -0
- datasmith/utils/tokens.py +65 -0
- fc_data-0.2.0.dist-info/METADATA +441 -0
- fc_data-0.2.0.dist-info/RECORD +87 -0
- fc_data-0.2.0.dist-info/WHEEL +4 -0
- fc_data-0.2.0.dist-info/entry_points.txt +2 -0
- fc_data-0.2.0.dist-info/licenses/LICENSE +28 -0
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
import enum
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any, cast
|
|
7
|
+
|
|
8
|
+
from datasmith.agents.sandbox import SandboxResult, verify_context
|
|
9
|
+
from datasmith.docker.context import DockerContext
|
|
10
|
+
from datasmith.utils import get_client, get_logger
|
|
11
|
+
|
|
12
|
+
logger = get_logger("agents.synthesizer")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SynthesisState(str, enum.Enum):
|
|
16
|
+
CHECK_CACHE = "check_cache"
|
|
17
|
+
FIND_SIMILAR = "find_similar"
|
|
18
|
+
TRY_SIMILAR = "try_similar"
|
|
19
|
+
LLM_GENERATE = "llm_generate"
|
|
20
|
+
FAIL = "fail"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Synthesizer:
|
|
24
|
+
"""State machine for synthesizing Docker build contexts."""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
max_attempts: int = 2,
|
|
29
|
+
dry_run: bool = False,
|
|
30
|
+
agent: str | None = None,
|
|
31
|
+
force: bool = False,
|
|
32
|
+
) -> None:
|
|
33
|
+
self._max_attempts = max_attempts
|
|
34
|
+
self._dry_run = dry_run
|
|
35
|
+
self._agent = agent
|
|
36
|
+
self._force = force
|
|
37
|
+
self._trace: list[SynthesisState] = []
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def trace(self) -> list[SynthesisState]:
|
|
41
|
+
return list(self._trace)
|
|
42
|
+
|
|
43
|
+
def run(
|
|
44
|
+
self,
|
|
45
|
+
owner: str,
|
|
46
|
+
repo: str,
|
|
47
|
+
issue_number: int,
|
|
48
|
+
pr_context: str,
|
|
49
|
+
sha: str = "",
|
|
50
|
+
repo_image: str = "",
|
|
51
|
+
env_payload: str = "",
|
|
52
|
+
python_version: str = "",
|
|
53
|
+
force: bool = False,
|
|
54
|
+
) -> DockerContext | None:
|
|
55
|
+
"""Run the synthesis state machine. Returns DockerContext on success, None on failure."""
|
|
56
|
+
self._trace = []
|
|
57
|
+
force = force or self._force
|
|
58
|
+
|
|
59
|
+
# State: CHECK_CACHE
|
|
60
|
+
self._trace.append(SynthesisState.CHECK_CACHE)
|
|
61
|
+
cached = self._check_cache(owner, repo, sha)
|
|
62
|
+
if (not force) and (cached is not None):
|
|
63
|
+
logger.info("Cache hit for %s/%s@%s", owner, repo, sha[:12] if sha else "?")
|
|
64
|
+
return cached
|
|
65
|
+
|
|
66
|
+
# State: FIND_SIMILAR
|
|
67
|
+
self._trace.append(SynthesisState.FIND_SIMILAR)
|
|
68
|
+
similar_contexts = self._find_similar(owner, repo, issue_number)
|
|
69
|
+
|
|
70
|
+
# State: TRY_SIMILAR
|
|
71
|
+
failed_attempts: list[tuple[DockerContext, SandboxResult]] = []
|
|
72
|
+
if similar_contexts:
|
|
73
|
+
self._trace.append(SynthesisState.TRY_SIMILAR)
|
|
74
|
+
for ctx in similar_contexts:
|
|
75
|
+
result = verify_context(
|
|
76
|
+
owner=owner,
|
|
77
|
+
repo=repo,
|
|
78
|
+
sha=sha,
|
|
79
|
+
repo_image=repo_image,
|
|
80
|
+
env_payload=env_payload,
|
|
81
|
+
python_version=python_version,
|
|
82
|
+
context=ctx,
|
|
83
|
+
)
|
|
84
|
+
if result.success:
|
|
85
|
+
logger.info("Similar context passed for %s/%s#%d", owner, repo, issue_number)
|
|
86
|
+
self._save_context(
|
|
87
|
+
owner,
|
|
88
|
+
repo,
|
|
89
|
+
sha,
|
|
90
|
+
issue_number,
|
|
91
|
+
ctx,
|
|
92
|
+
resource_metrics=result.resource_metrics,
|
|
93
|
+
)
|
|
94
|
+
return ctx
|
|
95
|
+
failed_attempts.append((ctx, result))
|
|
96
|
+
|
|
97
|
+
# State: LLM_GENERATE (sandbox-based)
|
|
98
|
+
# Skip LLM generation when using the "none" agent — rely only on similar contexts.
|
|
99
|
+
if self._agent == "none":
|
|
100
|
+
self._trace.append(SynthesisState.FAIL)
|
|
101
|
+
logger.info(
|
|
102
|
+
"Agent is 'none' — skipping LLM generation for %s/%s#%d",
|
|
103
|
+
owner,
|
|
104
|
+
repo,
|
|
105
|
+
issue_number,
|
|
106
|
+
)
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
self._trace.append(SynthesisState.LLM_GENERATE)
|
|
110
|
+
prior_attempts = _format_prior_attempts(failed_attempts) if failed_attempts else ""
|
|
111
|
+
for attempt_idx in range(self._max_attempts):
|
|
112
|
+
generated, metrics = self._sandbox_generate(
|
|
113
|
+
owner=owner,
|
|
114
|
+
repo=repo,
|
|
115
|
+
sha=sha,
|
|
116
|
+
pr_context=pr_context,
|
|
117
|
+
repo_image=repo_image,
|
|
118
|
+
env_payload=env_payload,
|
|
119
|
+
python_version=python_version,
|
|
120
|
+
prior_attempts=prior_attempts,
|
|
121
|
+
issue_number=issue_number,
|
|
122
|
+
attempt_index=attempt_idx,
|
|
123
|
+
)
|
|
124
|
+
if generated is not None:
|
|
125
|
+
logger.info(
|
|
126
|
+
"Sandbox synthesis succeeded for %s/%s#%d (attempt %d)",
|
|
127
|
+
owner,
|
|
128
|
+
repo,
|
|
129
|
+
issue_number,
|
|
130
|
+
attempt_idx + 1,
|
|
131
|
+
)
|
|
132
|
+
self._save_context(owner, repo, sha, issue_number, generated, resource_metrics=metrics)
|
|
133
|
+
return generated
|
|
134
|
+
logger.warning(
|
|
135
|
+
"Sandbox synthesis attempt %d failed for %s/%s#%d",
|
|
136
|
+
attempt_idx + 1,
|
|
137
|
+
owner,
|
|
138
|
+
repo,
|
|
139
|
+
issue_number,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# State: FAIL
|
|
143
|
+
self._trace.append(SynthesisState.FAIL)
|
|
144
|
+
logger.warning("All synthesis attempts failed for %s/%s#%d", owner, repo, issue_number)
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
def _check_cache(self, owner: str, repo: str, sha: str) -> DockerContext | None:
|
|
148
|
+
if not sha:
|
|
149
|
+
return None
|
|
150
|
+
try:
|
|
151
|
+
client = get_client()
|
|
152
|
+
resp = (
|
|
153
|
+
client.table("candidate_containers")
|
|
154
|
+
.select("*")
|
|
155
|
+
.eq("owner", owner)
|
|
156
|
+
.eq("repo", repo)
|
|
157
|
+
.eq("sha", sha)
|
|
158
|
+
.execute()
|
|
159
|
+
)
|
|
160
|
+
if resp.data:
|
|
161
|
+
row = cast(dict[str, Any], resp.data[0])
|
|
162
|
+
return DockerContext(
|
|
163
|
+
dockerfile=row.get("dockerfile", ""),
|
|
164
|
+
build_base_sh=row.get("build_base_sh", ""),
|
|
165
|
+
build_env_sh=row.get("build_env_sh", ""),
|
|
166
|
+
build_pkg_sh=row.get("build_pkg_sh", ""),
|
|
167
|
+
build_run_sh=row.get("build_run_sh", ""),
|
|
168
|
+
build_final_sh=row.get("build_final_sh", ""),
|
|
169
|
+
profile_sh=row.get("profile_sh", ""),
|
|
170
|
+
run_tests_sh=row.get("run_tests_sh", ""),
|
|
171
|
+
entrypoint_sh=row.get("entrypoint_sh", ""),
|
|
172
|
+
)
|
|
173
|
+
except Exception:
|
|
174
|
+
logger.debug("Cache check failed, proceeding")
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
def _find_similar(self, owner: str, repo: str, issue_number: int) -> list[DockerContext]:
|
|
178
|
+
"""Find previously successful build contexts for the same repository.
|
|
179
|
+
|
|
180
|
+
Results are ordered by chronological proximity to the given PR so that
|
|
181
|
+
the most temporally adjacent contexts — most likely to share the same
|
|
182
|
+
dependency environment — are tried first.
|
|
183
|
+
"""
|
|
184
|
+
try:
|
|
185
|
+
client = get_client()
|
|
186
|
+
|
|
187
|
+
# Step 1: look up the current PR's creation date.
|
|
188
|
+
pr_resp = (
|
|
189
|
+
client.table("pull_requests")
|
|
190
|
+
.select("created_at")
|
|
191
|
+
.eq("owner", owner)
|
|
192
|
+
.eq("repo", repo)
|
|
193
|
+
.eq("issue_number", issue_number)
|
|
194
|
+
.execute()
|
|
195
|
+
)
|
|
196
|
+
current_date: datetime.datetime | None = None
|
|
197
|
+
if pr_resp.data:
|
|
198
|
+
raw = cast(dict[str, Any], pr_resp.data[0]).get("created_at")
|
|
199
|
+
if raw:
|
|
200
|
+
current_date = _parse_ts(raw)
|
|
201
|
+
|
|
202
|
+
# Step 2: fetch all non-empty contexts for this repo.
|
|
203
|
+
ctx_resp = (
|
|
204
|
+
client.table("candidate_containers")
|
|
205
|
+
.select("issue_number,build_pkg_sh,build_run_sh")
|
|
206
|
+
.eq("owner", owner)
|
|
207
|
+
.eq("repo", repo)
|
|
208
|
+
.execute()
|
|
209
|
+
)
|
|
210
|
+
rows = cast(list[dict[str, Any]], ctx_resp.data)
|
|
211
|
+
rows = [r for r in rows if r.get("build_pkg_sh")]
|
|
212
|
+
|
|
213
|
+
if not rows:
|
|
214
|
+
return []
|
|
215
|
+
|
|
216
|
+
# Step 3: if we have a reference date, sort by proximity.
|
|
217
|
+
if current_date is not None:
|
|
218
|
+
context_issue_numbers = [r["issue_number"] for r in rows if r.get("issue_number") is not None]
|
|
219
|
+
pr_dates: dict[int, datetime.datetime] = {}
|
|
220
|
+
if context_issue_numbers:
|
|
221
|
+
dates_resp = (
|
|
222
|
+
client.table("pull_requests")
|
|
223
|
+
.select("issue_number,created_at")
|
|
224
|
+
.eq("owner", owner)
|
|
225
|
+
.eq("repo", repo)
|
|
226
|
+
.in_("issue_number", context_issue_numbers)
|
|
227
|
+
.execute()
|
|
228
|
+
)
|
|
229
|
+
for p in cast(list[dict[str, Any]], dates_resp.data):
|
|
230
|
+
iss = p.get("issue_number")
|
|
231
|
+
raw_date = p.get("created_at")
|
|
232
|
+
if iss is not None and raw_date:
|
|
233
|
+
pr_dates[iss] = _parse_ts(raw_date)
|
|
234
|
+
|
|
235
|
+
_sentinel = datetime.timedelta.max
|
|
236
|
+
|
|
237
|
+
def _proximity(row: dict[str, Any]) -> datetime.timedelta:
|
|
238
|
+
iss = row.get("issue_number")
|
|
239
|
+
d = pr_dates.get(iss) if iss is not None else None
|
|
240
|
+
return abs(d - current_date) if d is not None else _sentinel
|
|
241
|
+
|
|
242
|
+
rows.sort(key=_proximity)
|
|
243
|
+
|
|
244
|
+
rows = rows[:5]
|
|
245
|
+
return [
|
|
246
|
+
DockerContext(
|
|
247
|
+
build_pkg_sh=r.get("build_pkg_sh", ""),
|
|
248
|
+
build_run_sh=r.get("build_run_sh", ""),
|
|
249
|
+
)
|
|
250
|
+
for r in rows
|
|
251
|
+
]
|
|
252
|
+
except Exception:
|
|
253
|
+
logger.debug("Similar context lookup failed")
|
|
254
|
+
return []
|
|
255
|
+
|
|
256
|
+
def _sandbox_generate(
|
|
257
|
+
self,
|
|
258
|
+
owner: str,
|
|
259
|
+
repo: str,
|
|
260
|
+
sha: str,
|
|
261
|
+
pr_context: str,
|
|
262
|
+
repo_image: str,
|
|
263
|
+
env_payload: str,
|
|
264
|
+
python_version: str,
|
|
265
|
+
prior_attempts: str = "",
|
|
266
|
+
issue_number: int = 0,
|
|
267
|
+
attempt_index: int = 0,
|
|
268
|
+
) -> tuple[DockerContext | None, dict]:
|
|
269
|
+
from datasmith.agents.sandbox import SandboxRunner
|
|
270
|
+
|
|
271
|
+
runner = SandboxRunner(agent=self._agent)
|
|
272
|
+
result = runner.run(
|
|
273
|
+
owner=owner,
|
|
274
|
+
repo=repo,
|
|
275
|
+
sha=sha,
|
|
276
|
+
repo_image=repo_image,
|
|
277
|
+
env_payload=env_payload,
|
|
278
|
+
python_version=python_version,
|
|
279
|
+
pr_context=pr_context,
|
|
280
|
+
prior_attempts=prior_attempts,
|
|
281
|
+
dry_run=self._dry_run,
|
|
282
|
+
)
|
|
283
|
+
self._log_attempt(
|
|
284
|
+
owner=owner,
|
|
285
|
+
repo=repo,
|
|
286
|
+
sha=sha,
|
|
287
|
+
issue_number=issue_number,
|
|
288
|
+
attempt_index=attempt_index,
|
|
289
|
+
result=result,
|
|
290
|
+
)
|
|
291
|
+
ctx = result.docker_context if result.success else None
|
|
292
|
+
return ctx, result.resource_metrics
|
|
293
|
+
|
|
294
|
+
def _log_attempt(
|
|
295
|
+
self,
|
|
296
|
+
owner: str,
|
|
297
|
+
repo: str,
|
|
298
|
+
sha: str,
|
|
299
|
+
issue_number: int,
|
|
300
|
+
attempt_index: int,
|
|
301
|
+
result: SandboxResult,
|
|
302
|
+
) -> None:
|
|
303
|
+
"""Persist agent output to the ``error_logs`` Supabase table."""
|
|
304
|
+
timestamp = datetime.datetime.now(tz=datetime.timezone.utc).isoformat()
|
|
305
|
+
|
|
306
|
+
failure = result.failure_json or {}
|
|
307
|
+
# Cap raw output at 100 KB for Supabase storage
|
|
308
|
+
raw_output = result.raw_agent_output
|
|
309
|
+
if len(raw_output) > 100_000:
|
|
310
|
+
raw_output = raw_output[-100_000:]
|
|
311
|
+
|
|
312
|
+
row = {
|
|
313
|
+
"owner": owner,
|
|
314
|
+
"repo": repo,
|
|
315
|
+
"sha": sha,
|
|
316
|
+
"issue_number": issue_number,
|
|
317
|
+
"attempt_index": attempt_index,
|
|
318
|
+
"agent_name": result.agent_name,
|
|
319
|
+
"success": result.success,
|
|
320
|
+
"duration_s": result.duration_s,
|
|
321
|
+
"failure_stage": failure.get("stage") or None,
|
|
322
|
+
"failure_return_code": failure.get("return_code") or None,
|
|
323
|
+
"error_message": (failure.get("error_message") or "")[-10_000:] or None,
|
|
324
|
+
"agent_output": raw_output or None,
|
|
325
|
+
"files_changed": json.dumps(result.files_changed),
|
|
326
|
+
"resource_metrics": result.resource_metrics or None,
|
|
327
|
+
"created_at": timestamp,
|
|
328
|
+
}
|
|
329
|
+
try:
|
|
330
|
+
client = get_client()
|
|
331
|
+
client.table("error_logs").insert(row).execute()
|
|
332
|
+
logger.info(
|
|
333
|
+
"Logged synthesis attempt to error_logs for %s/%s@%s attempt %d", owner, repo, sha[:12], attempt_index
|
|
334
|
+
)
|
|
335
|
+
except Exception:
|
|
336
|
+
logger.debug("Failed to log synthesis attempt to Supabase", exc_info=True)
|
|
337
|
+
|
|
338
|
+
def _save_context(
|
|
339
|
+
self,
|
|
340
|
+
owner: str,
|
|
341
|
+
repo: str,
|
|
342
|
+
sha: str,
|
|
343
|
+
issue_number: int,
|
|
344
|
+
ctx: DockerContext,
|
|
345
|
+
resource_metrics: dict | None = None,
|
|
346
|
+
) -> None:
|
|
347
|
+
"""Persist the agent-edited scripts to the ``candidate_containers`` table.
|
|
348
|
+
|
|
349
|
+
Only ``build_pkg_sh`` and ``build_run_sh`` are saved — the other
|
|
350
|
+
fields come from templates and don't need to be persisted.
|
|
351
|
+
"""
|
|
352
|
+
if not sha:
|
|
353
|
+
return
|
|
354
|
+
try:
|
|
355
|
+
client = get_client()
|
|
356
|
+
row: dict = {
|
|
357
|
+
"owner": owner,
|
|
358
|
+
"repo": repo,
|
|
359
|
+
"sha": sha,
|
|
360
|
+
"issue_number": issue_number,
|
|
361
|
+
"build_pkg_sh": ctx.build_pkg_sh,
|
|
362
|
+
"build_run_sh": ctx.build_run_sh,
|
|
363
|
+
}
|
|
364
|
+
if resource_metrics:
|
|
365
|
+
row["resource_metrics"] = resource_metrics
|
|
366
|
+
client.table("candidate_containers").upsert(row).execute()
|
|
367
|
+
logger.info("Saved context for %s/%s@%s", owner, repo, sha[:12])
|
|
368
|
+
except Exception:
|
|
369
|
+
logger.warning("Failed to save context for %s/%s@%s", owner, repo, sha[:12])
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _parse_ts(ts: str) -> datetime.datetime:
|
|
373
|
+
"""Parse an ISO-8601 timestamp string to a timezone-aware datetime.
|
|
374
|
+
|
|
375
|
+
Handles both ``Z`` and ``+HH:MM`` UTC offset suffixes, which is
|
|
376
|
+
necessary for Python 3.9/3.10 compatibility where ``fromisoformat``
|
|
377
|
+
does not accept the trailing ``Z``.
|
|
378
|
+
"""
|
|
379
|
+
return datetime.datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def _format_prior_attempts(attempts: list[tuple[DockerContext, SandboxResult]]) -> str:
|
|
383
|
+
"""Format failed TRY_SIMILAR attempts into context for the LLM agent."""
|
|
384
|
+
lines = [
|
|
385
|
+
"# Prior Attempts",
|
|
386
|
+
"",
|
|
387
|
+
"The following build contexts were tried and failed.",
|
|
388
|
+
"Use these failures to inform your approach — avoid repeating the same mistakes.",
|
|
389
|
+
"",
|
|
390
|
+
]
|
|
391
|
+
for i, (ctx, result) in enumerate(attempts, 1):
|
|
392
|
+
failure = result.failure_json or {}
|
|
393
|
+
stage = failure.get("stage", "unknown")
|
|
394
|
+
rc = failure.get("return_code", 1)
|
|
395
|
+
error = failure.get("error_message", "")
|
|
396
|
+
|
|
397
|
+
lines.append(f"## Attempt {i}")
|
|
398
|
+
lines.append("")
|
|
399
|
+
lines.append(f"**Stage**: {stage}")
|
|
400
|
+
lines.append(f"**Return code**: {rc}")
|
|
401
|
+
lines.append("")
|
|
402
|
+
|
|
403
|
+
lines.append("### docker_build_pkg.sh")
|
|
404
|
+
lines.append("```bash")
|
|
405
|
+
pkg = ctx.build_pkg_sh
|
|
406
|
+
if pkg and len(pkg) > 3000:
|
|
407
|
+
lines.append(pkg[:3000])
|
|
408
|
+
lines.append("# ... (truncated)")
|
|
409
|
+
else:
|
|
410
|
+
lines.append(pkg or "(empty)")
|
|
411
|
+
lines.append("```")
|
|
412
|
+
lines.append("")
|
|
413
|
+
|
|
414
|
+
lines.append("### docker_build_run.sh")
|
|
415
|
+
lines.append("```bash")
|
|
416
|
+
run = ctx.build_run_sh
|
|
417
|
+
if run and len(run) > 3000:
|
|
418
|
+
lines.append(run[:3000])
|
|
419
|
+
lines.append("# ... (truncated)")
|
|
420
|
+
else:
|
|
421
|
+
lines.append(run or "(empty)")
|
|
422
|
+
lines.append("```")
|
|
423
|
+
lines.append("")
|
|
424
|
+
|
|
425
|
+
if error:
|
|
426
|
+
lines.append("### Error output")
|
|
427
|
+
lines.append("```")
|
|
428
|
+
lines.append(error[-3000:])
|
|
429
|
+
lines.append("```")
|
|
430
|
+
lines.append("")
|
|
431
|
+
|
|
432
|
+
if result.agent_output:
|
|
433
|
+
stdout_tail = result.agent_output[-3000:]
|
|
434
|
+
lines.append("### Build output (last 3000 chars)")
|
|
435
|
+
lines.append("```")
|
|
436
|
+
lines.append(stdout_tail)
|
|
437
|
+
lines.append("```")
|
|
438
|
+
lines.append("")
|
|
439
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# FormulaCode Docker Build Verification Guide
|
|
2
|
+
|
|
3
|
+
You are fixing a Docker build context for the FormulaCode dataset. Your goal is to iteratively
|
|
4
|
+
fix the build until `sandbox_verify.py` succeeds.
|
|
5
|
+
|
|
6
|
+
## Task Info
|
|
7
|
+
|
|
8
|
+
- **Repository**: {{ owner }}/{{ repo }}
|
|
9
|
+
- **Commit SHA**: {{ sha }}
|
|
10
|
+
- **Python version**: {{ python_version }}
|
|
11
|
+
|
|
12
|
+
## Quick Start
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
python3 sandbox_verify.py
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
This runs the full verification pipeline: build → tests.
|
|
19
|
+
On failure it writes `task/failure.json`. On success it writes `task/verification_success.json`.
|
|
20
|
+
Both stages (build, tests) must pass for verification to succeed.
|
|
21
|
+
|
|
22
|
+
## PR Context
|
|
23
|
+
|
|
24
|
+
{{ pr_context }}
|
|
25
|
+
|
|
26
|
+
## Prior Attempts
|
|
27
|
+
|
|
28
|
+
If `prior_attempts.md` exists in this workspace, it contains build scripts that were already
|
|
29
|
+
tried and failed. **Read it first** to understand what went wrong and avoid repeating the same
|
|
30
|
+
mistakes.
|
|
31
|
+
|
|
32
|
+
## Workflow
|
|
33
|
+
|
|
34
|
+
Follow this iterative cycle:
|
|
35
|
+
|
|
36
|
+
1. Run `python3 sandbox_verify.py` (A good timeout is 60 minutes, but adjust as needed)
|
|
37
|
+
2. If it fails, read `task/failure.json` for error details
|
|
38
|
+
3. Edit `task/docker_build_pkg.sh` and/or `task/docker_build_run.sh`
|
|
39
|
+
4. Re-run `python3 sandbox_verify.py`
|
|
40
|
+
5. Repeat until `task/verification_success.json` is created
|
|
41
|
+
|
|
42
|
+
**IMPORTANT**: Only modify `task/docker_build_pkg.sh` and `task/docker_build_run.sh`.
|
|
43
|
+
Do **not** modify the Dockerfile, `task.txt`, or any other scripts.
|
|
44
|
+
|
|
45
|
+
## File Constraints
|
|
46
|
+
|
|
47
|
+
### Files you CAN edit
|
|
48
|
+
- `task/docker_build_pkg.sh` — Primary build script. Install the package, add build deps.
|
|
49
|
+
- `task/docker_build_run.sh` — Runtime setup. Add test/benchmark deps, repo-specific config.
|
|
50
|
+
|
|
51
|
+
### Files you MUST NOT edit
|
|
52
|
+
- `task/Dockerfile.pr` — Multi-stage build definition (base and repo stages are pre-built)
|
|
53
|
+
- `task/task.txt` — Task configuration (owner, repo, sha, deps)
|
|
54
|
+
- `task/docker_build_base.sh` — Base system setup (template)
|
|
55
|
+
- `task/docker_build_env.sh` — Python environment creation (template)
|
|
56
|
+
- `task/docker_build_final.sh` — Final image setup (template)
|
|
57
|
+
- `task/profile.sh` — ASV benchmark runner
|
|
58
|
+
- `task/run-tests.sh` — Test runner
|
|
59
|
+
- `task/entrypoint.sh` — Container entrypoint
|
|
60
|
+
|
|
61
|
+
## Understanding failure.json
|
|
62
|
+
|
|
63
|
+
When verification fails, `task/failure.json` contains:
|
|
64
|
+
|
|
65
|
+
```json
|
|
66
|
+
{
|
|
67
|
+
"stage": "build|tests",
|
|
68
|
+
"return_code": 1,
|
|
69
|
+
"stderr": "Full error output from the failed stage",
|
|
70
|
+
"stdout": "Full standard output from the failed stage",
|
|
71
|
+
"error_message": "Human-readable error description"
|
|
72
|
+
}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
- **build** failures: Usually missing dependencies or compilation errors → fix in `docker_build_pkg.sh`
|
|
76
|
+
- **tests** failures: Missing test deps, import errors, or benchmark setup issues → fix in `docker_build_run.sh` or `docker_build_pkg.sh`
|
|
77
|
+
|
|
78
|
+
## Common Fixes for docker_build_pkg.sh
|
|
79
|
+
|
|
80
|
+
### Add build dependency (conda)
|
|
81
|
+
```bash
|
|
82
|
+
micromamba install -y -n "$ENV_NAME" -c conda-forge some-build-tool
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Add build dependency (pip)
|
|
86
|
+
```bash
|
|
87
|
+
micromamba run -n "$ENV_NAME" pip install build-dependency
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Set compiler flags
|
|
91
|
+
```bash
|
|
92
|
+
export CFLAGS="${CFLAGS:--Wno-error=incompatible-pointer-types}"
|
|
93
|
+
export CXXFLAGS="${CXXFLAGS:--Wno-error}"
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Initialize git submodules
|
|
97
|
+
```bash
|
|
98
|
+
git -C "$REPO_ROOT" submodule update --init --recursive
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Build Cython extensions
|
|
102
|
+
```bash
|
|
103
|
+
micromamba install -y -n "$ENV_NAME" -c conda-forge cython
|
|
104
|
+
micromamba run -n "$ENV_NAME" python setup.py build_ext --inplace
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Patch source files
|
|
108
|
+
```bash
|
|
109
|
+
sed -i 's/problematic_code/fixed_code/' path/to/file.py
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Common Fixes for docker_build_run.sh
|
|
113
|
+
|
|
114
|
+
### Add runtime/test dependencies
|
|
115
|
+
```bash
|
|
116
|
+
source /etc/profile.d/asv_utils.sh
|
|
117
|
+
source /etc/profile.d/asv_build_vars.sh
|
|
118
|
+
micromamba run -n "$ENV_NAME" pip install pytest-xdist pytest-timeout
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Environment Variables Available
|
|
122
|
+
|
|
123
|
+
These are set by earlier build stages and available in both scripts:
|
|
124
|
+
|
|
125
|
+
- `REPO_ROOT` — Repository root path (usually `/workspace/repo`)
|
|
126
|
+
- `ENV_NAME` — Current environment name (e.g., `asv_3.10`)
|
|
127
|
+
- `ASV_PY_VERSIONS` — Space-separated Python versions
|
|
128
|
+
- `CONF_NAME` — Path to ASV config file
|
|
129
|
+
- `ALL_EXTRAS` — Extras to install (if any)
|
|
130
|
+
- `MAMBA_ROOT_PREFIX` — Micromamba root (`/opt/conda`)
|
|
131
|
+
|
|
132
|
+
## Docker Build Stages
|
|
133
|
+
|
|
134
|
+
The Dockerfile.pr has 4 stages: env → pkg → run → final (base and repo images are pre-built).
|
|
135
|
+
Only the `pkg` and `run` stages use your editable scripts.
|
|
136
|
+
|
|
137
|
+
- **pkg** stage: Runs `docker_build_pkg.sh` to install the package
|
|
138
|
+
- **run** stage: Runs `docker_build_run.sh` to prepare for tests/benchmarks
|
|
139
|
+
|
|
140
|
+
Changes to `docker_build_pkg.sh` rebuild from the pkg stage onward (fast iteration).
|
|
141
|
+
Changes to `docker_build_run.sh` rebuild only the run stage.
|
|
142
|
+
|
|
143
|
+
## Tips
|
|
144
|
+
|
|
145
|
+
- Read the full error in `failure.json` before making changes
|
|
146
|
+
- Start with the simplest fix (add a missing dep) before trying complex patches
|
|
147
|
+
- The package install command in `docker_build_pkg.sh` is usually `pip install --no-build-isolation -v -e .`
|
|
148
|
+
- If the build times out, look for ways to simplify or skip expensive steps
|
|
149
|
+
- Docker layer caching means earlier stages are cached — only your changed stage rebuilds
|
|
150
|
+
- A lot of bookkeeping code is used in downstream stages; avoid removing code unless you understand its purpose.
|