fc-data 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. datasmith/__init__.py +330 -0
  2. datasmith/__init__.pyi +194 -0
  3. datasmith/agents/__init__.py +31 -0
  4. datasmith/agents/classifiers.py +272 -0
  5. datasmith/agents/codex.py +25 -0
  6. datasmith/agents/config.py +108 -0
  7. datasmith/agents/extractors.py +197 -0
  8. datasmith/agents/installed/README.md +52 -0
  9. datasmith/agents/installed/__init__.py +22 -0
  10. datasmith/agents/installed/base.py +240 -0
  11. datasmith/agents/installed/claude.py +134 -0
  12. datasmith/agents/installed/codex.py +91 -0
  13. datasmith/agents/installed/gemini.py +118 -0
  14. datasmith/agents/installed/none.py +27 -0
  15. datasmith/agents/sandbox.py +547 -0
  16. datasmith/agents/synthesizer.py +439 -0
  17. datasmith/agents/templates/AGENTS.md.j2 +150 -0
  18. datasmith/agents/templates/sandbox_verify.py +428 -0
  19. datasmith/docker/__init__.py +31 -0
  20. datasmith/docker/context.py +112 -0
  21. datasmith/docker/images.py +158 -0
  22. datasmith/docker/publish.py +56 -0
  23. datasmith/docker/templates/Dockerfile.base +26 -0
  24. datasmith/docker/templates/Dockerfile.pr +42 -0
  25. datasmith/docker/templates/Dockerfile.repo +11 -0
  26. datasmith/docker/templates/docker_build_base.sh +780 -0
  27. datasmith/docker/templates/docker_build_env.sh +309 -0
  28. datasmith/docker/templates/docker_build_final.sh +106 -0
  29. datasmith/docker/templates/docker_build_pkg.sh +99 -0
  30. datasmith/docker/templates/docker_build_run.sh +124 -0
  31. datasmith/docker/templates/entrypoint.sh +62 -0
  32. datasmith/docker/templates/parser.py +1405 -0
  33. datasmith/docker/templates/profile.sh +199 -0
  34. datasmith/docker/templates/pytest_runner.py +692 -0
  35. datasmith/docker/templates/run-tests.sh +197 -0
  36. datasmith/docker/verifiers.py +131 -0
  37. datasmith/filters.py +154 -0
  38. datasmith/github/__init__.py +22 -0
  39. datasmith/github/client.py +333 -0
  40. datasmith/github/hooks.py +50 -0
  41. datasmith/github/links.py +110 -0
  42. datasmith/github/models.py +206 -0
  43. datasmith/github/render.py +173 -0
  44. datasmith/github/search.py +66 -0
  45. datasmith/github/templates/comment.md.j2 +5 -0
  46. datasmith/github/templates/final.md.j2 +66 -0
  47. datasmith/github/templates/issues.md.j2 +21 -0
  48. datasmith/github/templates/repo.md.j2 +1 -0
  49. datasmith/preflight.py +162 -0
  50. datasmith/publish/__init__.py +13 -0
  51. datasmith/publish/huggingface.py +104 -0
  52. datasmith/publish/pipeline.py +60 -0
  53. datasmith/publish/records.py +91 -0
  54. datasmith/py.typed +1 -0
  55. datasmith/resolution/__init__.py +14 -0
  56. datasmith/resolution/blocklist.py +145 -0
  57. datasmith/resolution/cache.py +120 -0
  58. datasmith/resolution/constants.py +277 -0
  59. datasmith/resolution/dependency_resolver.py +174 -0
  60. datasmith/resolution/git_utils.py +378 -0
  61. datasmith/resolution/import_analyzer.py +66 -0
  62. datasmith/resolution/metadata_parser.py +412 -0
  63. datasmith/resolution/models.py +41 -0
  64. datasmith/resolution/orchestrator.py +522 -0
  65. datasmith/resolution/package_filters.py +312 -0
  66. datasmith/resolution/python_manager.py +110 -0
  67. datasmith/runners/__init__.py +15 -0
  68. datasmith/runners/base.py +112 -0
  69. datasmith/runners/classify_prs.py +48 -0
  70. datasmith/runners/render_problems.py +113 -0
  71. datasmith/runners/resolve_packages.py +66 -0
  72. datasmith/runners/scrape_commits.py +166 -0
  73. datasmith/runners/scrape_repos.py +44 -0
  74. datasmith/runners/synthesize_images.py +310 -0
  75. datasmith/update/__init__.py +5 -0
  76. datasmith/update/cli.py +169 -0
  77. datasmith/update/offline.py +173 -0
  78. datasmith/update/pipeline.py +497 -0
  79. datasmith/utils/__init__.py +18 -0
  80. datasmith/utils/core.py +67 -0
  81. datasmith/utils/db.py +156 -0
  82. datasmith/utils/tokens.py +65 -0
  83. fc_data-0.2.0.dist-info/METADATA +441 -0
  84. fc_data-0.2.0.dist-info/RECORD +87 -0
  85. fc_data-0.2.0.dist-info/WHEEL +4 -0
  86. fc_data-0.2.0.dist-info/entry_points.txt +2 -0
  87. fc_data-0.2.0.dist-info/licenses/LICENSE +28 -0
@@ -0,0 +1,428 @@
1
+ #!/usr/bin/env python3
2
+ """Simplified verification script for sandbox-based synthesis.
3
+
4
+ Builds a Docker image from the task directory, then runs profile.sh and
5
+ run-tests.sh inside the container. Writes failure.json or
6
+ verification_success.json to the task directory.
7
+
8
+ This file is self-contained — no datasmith imports. It is copied into the
9
+ Codex sandbox workspace and executed by the agent.
10
+
11
+ Usage:
12
+ python sandbox_verify.py # verify task/ directory
13
+ python sandbox_verify.py --task /path # verify custom task directory
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import argparse
19
+ import hashlib
20
+ import json
21
+ import re
22
+ import subprocess as sp
23
+ import sys
24
+ import threading
25
+ import time
26
+ import uuid
27
+ from dataclasses import dataclass
28
+ from pathlib import Path
29
+
30
+ from python_on_whales import DockerClient
31
+ from python_on_whales.exceptions import DockerException
32
+
33
+ # Files the agent must NOT modify. Hashes are checked before every build.
34
+ _IMMUTABLE_FILES = (
35
+ "Dockerfile.pr",
36
+ "docker_build_base.sh",
37
+ "docker_build_env.sh",
38
+ "docker_build_final.sh",
39
+ "profile.sh",
40
+ "run-tests.sh",
41
+ "entrypoint.sh",
42
+ "task.txt",
43
+ )
44
+
45
+
46
+ @dataclass(frozen=True)
47
+ class Task:
48
+ """Mirrors the Task dataclass used in task.txt files."""
49
+
50
+ owner: str = ""
51
+ repo: str = ""
52
+ sha: str | None = None
53
+ commit_date: float = 0.0
54
+ env_payload: str = ""
55
+ python_version: str = ""
56
+ tag: str = "pkg"
57
+ benchmarks: str = ""
58
+ repo_image: str = ""
59
+
60
+
61
+ def _parse_task(task_dir: Path) -> Task:
62
+ text = (task_dir / "task.txt").read_text().strip()
63
+ return eval(text, {"__builtins__": {}}, {"Task": Task}) # noqa: S307
64
+
65
+
66
+ def _image_tag(task: Task, stage: str = "run") -> str:
67
+ sha_short = (task.sha or "unknown")[:12]
68
+ owner_repo = f"{task.owner}-{task.repo}".lower()
69
+ return f"formulacode/{owner_repo}:{sha_short}-{stage}"
70
+
71
+
72
+ def _write_failure(
73
+ task_dir: Path,
74
+ stage: str,
75
+ *,
76
+ stdout: str = "",
77
+ stderr: str = "",
78
+ rc: int = 1,
79
+ metrics: dict | None = None,
80
+ ) -> None:
81
+ # Truncate output to keep failure.json readable by CLI agents.
82
+ # The Read tool rejects files larger than ~25K tokens, causing cascading
83
+ # failures when the agent tries to read failure.json alongside other files.
84
+ _MAX_OUTPUT = 20_000 # chars — keeps total file well under the token limit
85
+ failure: dict = {
86
+ "stage": stage,
87
+ "return_code": rc,
88
+ "error_message": f"Verification failed during '{stage}' stage (rc={rc}).",
89
+ "stdout": stdout[-_MAX_OUTPUT:] if len(stdout) > _MAX_OUTPUT else stdout,
90
+ "stderr": stderr[-_MAX_OUTPUT:] if len(stderr) > _MAX_OUTPUT else stderr,
91
+ }
92
+ if metrics:
93
+ failure["resource_metrics"] = metrics
94
+ (task_dir / "failure.json").write_text(json.dumps(failure, indent=2))
95
+ # Also remove stale success file if present
96
+ success = task_dir / "verification_success.json"
97
+ if success.exists():
98
+ success.unlink()
99
+
100
+
101
+ def _write_success(task_dir: Path, image_tag: str, metrics: dict | None = None) -> None:
102
+ info: dict = {"local_image": image_tag, "verified_at": time.time()}
103
+ if metrics:
104
+ info["resource_metrics"] = metrics
105
+ (task_dir / "verification_success.json").write_text(json.dumps(info, indent=2))
106
+ # Remove stale failure file if present
107
+ failure = task_dir / "failure.json"
108
+ if failure.exists():
109
+ failure.unlink()
110
+
111
+
112
+ def build_image(
113
+ docker: DockerClient,
114
+ task_dir: Path,
115
+ task: Task,
116
+ target: str = "run",
117
+ metrics: dict | None = None,
118
+ ) -> str:
119
+ tag = _image_tag(task, target)
120
+
121
+ build_args = {
122
+ "REPO_IMAGE": task.repo_image,
123
+ "COMMIT_SHA": task.sha or "",
124
+ "ENV_PAYLOAD": task.env_payload,
125
+ }
126
+ if task.python_version:
127
+ build_args["PY_VERSION"] = task.python_version
128
+ if task.benchmarks:
129
+ build_args["BENCHMARKS"] = task.benchmarks
130
+
131
+ print(f"Building {tag} (target={target}) from {task_dir}")
132
+ log_lines: list[str] = []
133
+ start = time.time()
134
+ try:
135
+ for line in docker.build(
136
+ str(task_dir),
137
+ tags=[tag],
138
+ target=target,
139
+ build_args=build_args,
140
+ file=str(task_dir / "Dockerfile.pr"),
141
+ stream_logs=True,
142
+ ):
143
+ log_lines.append(line)
144
+ except DockerException as e:
145
+ stdout = "".join(log_lines)
146
+ if metrics is not None:
147
+ metrics["build_duration_s"] = round(time.time() - start, 2)
148
+ raise BuildError(
149
+ f"Docker build failed (rc={e.return_code})",
150
+ stdout=stdout,
151
+ stderr=e.stderr or "",
152
+ rc=e.return_code,
153
+ ) from e
154
+
155
+ if metrics is not None:
156
+ metrics["build_duration_s"] = round(time.time() - start, 2)
157
+ try:
158
+ img = docker.image.inspect(tag)
159
+ metrics["image_size_bytes"] = img.size
160
+ except Exception:
161
+ pass
162
+
163
+ return tag
164
+
165
+
166
+ class BuildError(Exception):
167
+ """Raised when a Docker build fails, carrying captured output."""
168
+
169
+ def __init__(self, message: str, stdout: str, stderr: str, rc: int) -> None:
170
+ super().__init__(message)
171
+ self.stdout = stdout
172
+ self.stderr = stderr
173
+ self.rc = rc
174
+
175
+
176
+ _MEM_UNITS = {"B": 1, "KIB": 1024, "MIB": 1024**2, "GIB": 1024**3, "TIB": 1024**4}
177
+ _MEM_RE = re.compile(r"([\d.]+)\s*((?:[KMGT]i)?B)", re.IGNORECASE)
178
+
179
+
180
+ def _parse_mem_usage(text: str) -> int:
181
+ """Parse a Docker memory string like ``'123.4MiB / 16GiB'`` → bytes (first value only)."""
182
+ m = _MEM_RE.search(text)
183
+ if not m:
184
+ return 0
185
+ value, unit = float(m.group(1)), m.group(2).upper()
186
+ return int(value * _MEM_UNITS.get(unit, 1))
187
+
188
+
189
+ def _run_container_with_timeout(
190
+ image_tag: str,
191
+ command: list[str],
192
+ timeout: int,
193
+ metrics: dict | None = None,
194
+ ) -> tuple[bool, str, str, int]:
195
+ """Run a Docker container with a host-side timeout.
196
+
197
+ Returns (timed_out, stdout, stderr, returncode).
198
+
199
+ When *metrics* is provided, records ``test_duration_s`` and
200
+ ``peak_memory_bytes`` by polling ``docker stats`` in a background thread.
201
+ """
202
+ name = f"fc-{uuid.uuid4().hex[:8]}"
203
+ cmd = ["docker", "run", "--name", name, "--pull", "never", image_tag, *command]
204
+ # No --rm: we clean up manually after collecting metrics.
205
+
206
+ peak_mem: list[int] = [0]
207
+ stop_event = threading.Event()
208
+
209
+ def _poll_stats() -> None:
210
+ """Background thread: poll ``docker stats`` every 2 s to track peak memory."""
211
+ while not stop_event.is_set():
212
+ try:
213
+ r = sp.run( # noqa: S603, S607
214
+ ["docker", "stats", name, "--no-stream", "--format", "{{.MemUsage}}"],
215
+ capture_output=True,
216
+ text=True,
217
+ timeout=10,
218
+ )
219
+ if r.returncode == 0 and r.stdout.strip():
220
+ current = _parse_mem_usage(r.stdout.strip())
221
+ if current > peak_mem[0]:
222
+ peak_mem[0] = current
223
+ except Exception:
224
+ pass
225
+ stop_event.wait(2.0)
226
+
227
+ poller = threading.Thread(target=_poll_stats, daemon=True)
228
+ poller.start()
229
+
230
+ start = time.time()
231
+ try:
232
+ result = sp.run(cmd, capture_output=True, text=True, timeout=timeout) # noqa: S603, S607
233
+ timed_out = False
234
+ stdout, stderr, rc = result.stdout, result.stderr, result.returncode
235
+ except (sp.TimeoutExpired, KeyboardInterrupt) as exc:
236
+ stdout = getattr(exc, "stdout", None) or ""
237
+ stderr = getattr(exc, "stderr", None) or ""
238
+ if isinstance(stdout, bytes):
239
+ stdout = stdout.decode(errors="replace")
240
+ if isinstance(stderr, bytes):
241
+ stderr = stderr.decode(errors="replace")
242
+ sp.run(["docker", "kill", name], capture_output=True) # noqa: S603, S607
243
+ if isinstance(exc, KeyboardInterrupt):
244
+ stop_event.set()
245
+ poller.join(timeout=5)
246
+ sp.run(["docker", "rm", "-f", name], capture_output=True) # noqa: S603, S607
247
+ raise
248
+ timed_out, rc = True, -1
249
+ finally:
250
+ stop_event.set()
251
+ poller.join(timeout=5)
252
+
253
+ wall_time = time.time() - start
254
+
255
+ if metrics is not None:
256
+ metrics["test_duration_s"] = round(wall_time, 2)
257
+ if peak_mem[0] > 0:
258
+ metrics["peak_memory_bytes"] = peak_mem[0]
259
+
260
+ # Cleanup container
261
+ sp.run(["docker", "rm", "-f", name], capture_output=True) # noqa: S603, S607
262
+ return timed_out, stdout, stderr, rc
263
+
264
+
265
+ def _parse_test_summary(stdout: str) -> dict | None:
266
+ """Extract the JSON summary between FORMULACODE_TESTS_START/END markers."""
267
+ start = "FORMULACODE_TESTS_START"
268
+ end = "FORMULACODE_TESTS_END"
269
+ s = stdout.find(start)
270
+ e = stdout.find(end)
271
+ if s == -1 or e == -1:
272
+ return None
273
+ payload = stdout[s + len(start) : e].strip()
274
+ try:
275
+ return json.loads(payload) # type: ignore[no-any-return]
276
+ except json.JSONDecodeError:
277
+ return None
278
+
279
+
280
+ def _parse_snapshot_summary(stdout: str) -> dict | None:
281
+ """Extract the JSON summary between FORMULACODE_SNAPSHOT_START/END markers."""
282
+ start = "FORMULACODE_SNAPSHOT_START"
283
+ end = "FORMULACODE_SNAPSHOT_END"
284
+ s = stdout.find(start)
285
+ e = stdout.find(end)
286
+ if s == -1 or e == -1:
287
+ return None
288
+ payload = stdout[s + len(start) : e].strip()
289
+ try:
290
+ return json.loads(payload)
291
+ except json.JSONDecodeError:
292
+ return None
293
+
294
+
295
+ def run_tests(image_tag: str, timeout: int = 720, metrics: dict | None = None) -> tuple[bool, str, str, int]:
296
+ timed_out, stdout, stderr, rc = _run_container_with_timeout(
297
+ image_tag, ["/run-tests.sh", "--all"], timeout, metrics=metrics
298
+ )
299
+ if timed_out:
300
+ return True, stdout, f"Tests timed out after {timeout}s (treated as success)", rc
301
+
302
+ # Exit code 78 (EX_CONFIG) or sentinel = no benchmarks discovered
303
+ if rc == 78 or "FORMULACODE_NO_BENCHMARKS" in stdout:
304
+ return False, stdout, "No ASV benchmarks discovered — task cannot be used in FormulaCode", rc
305
+
306
+ # Non-timeout failures
307
+ if rc != 0:
308
+ return False, stdout, stderr, rc
309
+
310
+ # run-tests.sh may exit 0 even on collection errors — check structured output
311
+ summary = _parse_test_summary(stdout)
312
+ if summary is not None:
313
+ if summary.get("total", 0) == 0 or summary.get("error", 0) > 0:
314
+ return False, stdout, stderr, rc
315
+
316
+ # Check that benchmarks were actually discovered
317
+ snapshot = _parse_snapshot_summary(stdout)
318
+ if snapshot is not None and snapshot.get("total", 0) == 0:
319
+ return False, stdout, "No ASV benchmarks discovered (snapshot total=0)", rc
320
+
321
+ return True, stdout, stderr, rc
322
+
323
+
324
+ def _check_file_integrity(task_dir: Path) -> str | None:
325
+ """Verify that immutable files haven't been modified since workspace setup.
326
+
327
+ Returns ``None`` if all files are intact, or an error description.
328
+ """
329
+ hashes_file = task_dir.parent / ".immutable_hashes.json"
330
+ if not hashes_file.exists():
331
+ return None # no hash manifest = no enforcement (e.g. manual runs)
332
+
333
+ expected: dict[str, str] = json.loads(hashes_file.read_text())
334
+ modified: list[str] = []
335
+ deleted: list[str] = []
336
+
337
+ for fname, expected_hash in expected.items():
338
+ fp = task_dir / fname
339
+ if not fp.exists():
340
+ deleted.append(fname)
341
+ elif hashlib.md5(fp.read_bytes()).hexdigest() != expected_hash: # noqa: S324
342
+ modified.append(fname)
343
+
344
+ if not modified and not deleted:
345
+ return None
346
+
347
+ parts = [
348
+ "File integrity violation — only docker_build_pkg.sh and "
349
+ "docker_build_run.sh may be edited.",
350
+ ]
351
+ if modified:
352
+ parts.append(f"Modified: {', '.join(modified)}")
353
+ if deleted:
354
+ parts.append(f"Deleted: {', '.join(deleted)}")
355
+ parts.append("Revert your changes to these files and try again.")
356
+ return "\n".join(parts)
357
+
358
+
359
+ def verify(task_dir: Path) -> bool:
360
+ docker = DockerClient()
361
+ metrics: dict = {}
362
+
363
+ # Check file integrity before anything else
364
+ integrity_error = _check_file_integrity(task_dir)
365
+ if integrity_error:
366
+ print(f"INTEGRITY ERROR:\n{integrity_error}")
367
+ _write_failure(task_dir, "integrity", stderr=integrity_error, metrics=metrics)
368
+ return False
369
+
370
+ # Parse task
371
+ try:
372
+ task = _parse_task(task_dir)
373
+ except Exception as e:
374
+ print(f"Failed to parse task in {task_dir}: {e}")
375
+ _write_failure(task_dir, "parse", stderr=str(e), metrics=metrics)
376
+ return False
377
+
378
+ if not task.sha:
379
+ print(f"Task in {task_dir} has no SHA")
380
+ _write_failure(task_dir, "parse", stderr="Task.sha is None", metrics=metrics)
381
+ return False
382
+
383
+ # Build
384
+ try:
385
+ tag = build_image(docker, task_dir, task, target="run", metrics=metrics)
386
+ print(f"Build succeeded: {tag}")
387
+ except BuildError as e:
388
+ print(f"Build failed: {e}")
389
+ _write_failure(task_dir, "build", stdout=e.stdout, stderr=e.stderr, rc=e.rc, metrics=metrics)
390
+ return False
391
+ except Exception as e:
392
+ print(f"Build failed: {str(e)[:200]}")
393
+ _write_failure(task_dir, "build", stderr=str(e), metrics=metrics)
394
+ return False
395
+
396
+ # Tests — mandatory, no skip option (profile.sh runs inside run-tests.sh)
397
+ ok, stdout, stderr, rc = run_tests(tag, metrics=metrics)
398
+ if not ok:
399
+ print(f"Tests failed for {task_dir.name}")
400
+ _write_failure(task_dir, "tests", stdout=stdout, stderr=stderr, rc=rc, metrics=metrics)
401
+ return False
402
+ print(f"Tests passed for {task_dir.name}")
403
+
404
+ _write_success(task_dir, tag, metrics=metrics)
405
+ print(f"SUCCESS: {task_dir.name}")
406
+ return True
407
+
408
+
409
+ def main() -> None:
410
+ parser = argparse.ArgumentParser(description="Verify Docker build context")
411
+ parser.add_argument(
412
+ "--task",
413
+ type=Path,
414
+ default=Path("task"),
415
+ help="Task directory (default: task/)",
416
+ )
417
+ args = parser.parse_args()
418
+
419
+ if not args.task.exists():
420
+ print(f"Task directory not found: {args.task}")
421
+ sys.exit(1)
422
+
423
+ ok = verify(args.task)
424
+ sys.exit(0 if ok else 1)
425
+
426
+
427
+ if __name__ == "__main__":
428
+ main()
@@ -0,0 +1,31 @@
1
+ """ds.docker — Docker image lifecycle, build contexts, verifiers, publishing."""
2
+
3
+ from datasmith.docker.context import DockerContext
4
+ from datasmith.docker.images import (
5
+ ImageManager,
6
+ get_base_image_name,
7
+ get_pr_image_name,
8
+ get_repo_image_name,
9
+ )
10
+ from datasmith.docker.publish import DockerHubPublisher
11
+ from datasmith.docker.verifiers import (
12
+ MultiObjVerifier,
13
+ ProfileVerifier,
14
+ PytestVerifier,
15
+ SmokeVerifier,
16
+ VerifyResult,
17
+ )
18
+
19
+ __all__ = [
20
+ "DockerContext",
21
+ "DockerHubPublisher",
22
+ "ImageManager",
23
+ "MultiObjVerifier",
24
+ "ProfileVerifier",
25
+ "PytestVerifier",
26
+ "SmokeVerifier",
27
+ "VerifyResult",
28
+ "get_base_image_name",
29
+ "get_pr_image_name",
30
+ "get_repo_image_name",
31
+ ]
@@ -0,0 +1,112 @@
1
+ """Docker build context model."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import io
6
+ import os
7
+ import tarfile
8
+ from typing import Any, ClassVar
9
+
10
+ from pydantic import BaseModel, ConfigDict
11
+
12
+
13
+ class DockerContext(BaseModel):
14
+ model_config = ConfigDict(frozen=False)
15
+
16
+ dockerfile: str = ""
17
+ build_base_sh: str = ""
18
+ build_env_sh: str = ""
19
+ build_pkg_sh: str = ""
20
+ build_run_sh: str = ""
21
+ build_final_sh: str = ""
22
+ profile_sh: str = ""
23
+ run_tests_sh: str = ""
24
+ entrypoint_sh: str = ""
25
+
26
+ _FILE_MAP: ClassVar[dict[str, str]] = {
27
+ "Dockerfile": "dockerfile",
28
+ "docker_build_base.sh": "build_base_sh",
29
+ "docker_build_env.sh": "build_env_sh",
30
+ "docker_build_pkg.sh": "build_pkg_sh",
31
+ "docker_build_run.sh": "build_run_sh",
32
+ "docker_build_final.sh": "build_final_sh",
33
+ "profile.sh": "profile_sh",
34
+ "run-tests.sh": "run_tests_sh",
35
+ "entrypoint.sh": "entrypoint_sh",
36
+ }
37
+
38
+ _LEGACY_MAP: ClassVar[dict[str, str]] = {
39
+ "dockerfile_data": "dockerfile",
40
+ "base_building_data": "build_base_sh",
41
+ "env_building_data": "build_env_sh",
42
+ "building_data": "build_pkg_sh",
43
+ "run_building_data": "build_run_sh",
44
+ "final_building_data": "build_final_sh",
45
+ "profile_data": "profile_sh",
46
+ "run_tests_data": "run_tests_sh",
47
+ "entrypoint_data": "entrypoint_sh",
48
+ }
49
+
50
+ def to_tar_bytes(self) -> bytes:
51
+ """Serialize context to in-memory tar for Docker build."""
52
+ buf = io.BytesIO()
53
+ with tarfile.open(fileobj=buf, mode="w:gz") as tar:
54
+ for filename, field in sorted(self._FILE_MAP.items()):
55
+ content = getattr(self, field)
56
+ if not content:
57
+ continue
58
+ data = content.encode("utf-8")
59
+ info = tarfile.TarInfo(name=filename)
60
+ info.size = len(data)
61
+ info.mtime = 0
62
+ info.uid = 0
63
+ info.gid = 0
64
+ info.uname = ""
65
+ info.gname = ""
66
+ tar.addfile(info, io.BytesIO(data))
67
+ return buf.getvalue()
68
+
69
+ def to_directory(self, path: str) -> None:
70
+ """Write all context files to a directory on disk."""
71
+ os.makedirs(path, exist_ok=True)
72
+ for filename, field in self._FILE_MAP.items():
73
+ content = getattr(self, field)
74
+ if content:
75
+ with open(os.path.join(path, filename), "w") as f:
76
+ f.write(content)
77
+
78
+ # Fallback filenames for backward compatibility (old name -> new name).
79
+ _COMPAT_MAP: ClassVar[dict[str, str]] = {
80
+ "run_tests.sh": "run-tests.sh",
81
+ }
82
+
83
+ @classmethod
84
+ def from_directory(cls, path: str) -> DockerContext:
85
+ """Load a DockerContext from a task directory."""
86
+
87
+ def _read(name: str) -> str:
88
+ fp = os.path.join(path, name)
89
+ if os.path.exists(fp):
90
+ with open(fp) as f:
91
+ return f.read()
92
+ # Try legacy filename
93
+ legacy = {v: k for k, v in cls._COMPAT_MAP.items()}.get(name)
94
+ if legacy:
95
+ fp = os.path.join(path, legacy)
96
+ if os.path.exists(fp):
97
+ with open(fp) as f:
98
+ return f.read()
99
+ return ""
100
+
101
+ kwargs = {field: _read(filename) for filename, field in cls._FILE_MAP.items()}
102
+ return cls(**kwargs)
103
+
104
+ @classmethod
105
+ def from_legacy_dict(cls, data: dict[str, Any]) -> DockerContext:
106
+ """Create a DockerContext from the old context registry format."""
107
+ kwargs = {}
108
+ for legacy_key, field in cls._LEGACY_MAP.items():
109
+ value = data.get(legacy_key, "")
110
+ if value:
111
+ kwargs[field] = value
112
+ return cls(**kwargs)