fc-data 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datasmith/__init__.py +330 -0
- datasmith/__init__.pyi +194 -0
- datasmith/agents/__init__.py +31 -0
- datasmith/agents/classifiers.py +272 -0
- datasmith/agents/codex.py +25 -0
- datasmith/agents/config.py +108 -0
- datasmith/agents/extractors.py +197 -0
- datasmith/agents/installed/README.md +52 -0
- datasmith/agents/installed/__init__.py +22 -0
- datasmith/agents/installed/base.py +240 -0
- datasmith/agents/installed/claude.py +134 -0
- datasmith/agents/installed/codex.py +91 -0
- datasmith/agents/installed/gemini.py +118 -0
- datasmith/agents/installed/none.py +27 -0
- datasmith/agents/sandbox.py +547 -0
- datasmith/agents/synthesizer.py +439 -0
- datasmith/agents/templates/AGENTS.md.j2 +150 -0
- datasmith/agents/templates/sandbox_verify.py +428 -0
- datasmith/docker/__init__.py +31 -0
- datasmith/docker/context.py +112 -0
- datasmith/docker/images.py +158 -0
- datasmith/docker/publish.py +56 -0
- datasmith/docker/templates/Dockerfile.base +26 -0
- datasmith/docker/templates/Dockerfile.pr +42 -0
- datasmith/docker/templates/Dockerfile.repo +11 -0
- datasmith/docker/templates/docker_build_base.sh +780 -0
- datasmith/docker/templates/docker_build_env.sh +309 -0
- datasmith/docker/templates/docker_build_final.sh +106 -0
- datasmith/docker/templates/docker_build_pkg.sh +99 -0
- datasmith/docker/templates/docker_build_run.sh +124 -0
- datasmith/docker/templates/entrypoint.sh +62 -0
- datasmith/docker/templates/parser.py +1405 -0
- datasmith/docker/templates/profile.sh +199 -0
- datasmith/docker/templates/pytest_runner.py +692 -0
- datasmith/docker/templates/run-tests.sh +197 -0
- datasmith/docker/verifiers.py +131 -0
- datasmith/filters.py +154 -0
- datasmith/github/__init__.py +22 -0
- datasmith/github/client.py +333 -0
- datasmith/github/hooks.py +50 -0
- datasmith/github/links.py +110 -0
- datasmith/github/models.py +206 -0
- datasmith/github/render.py +173 -0
- datasmith/github/search.py +66 -0
- datasmith/github/templates/comment.md.j2 +5 -0
- datasmith/github/templates/final.md.j2 +66 -0
- datasmith/github/templates/issues.md.j2 +21 -0
- datasmith/github/templates/repo.md.j2 +1 -0
- datasmith/preflight.py +162 -0
- datasmith/publish/__init__.py +13 -0
- datasmith/publish/huggingface.py +104 -0
- datasmith/publish/pipeline.py +60 -0
- datasmith/publish/records.py +91 -0
- datasmith/py.typed +1 -0
- datasmith/resolution/__init__.py +14 -0
- datasmith/resolution/blocklist.py +145 -0
- datasmith/resolution/cache.py +120 -0
- datasmith/resolution/constants.py +277 -0
- datasmith/resolution/dependency_resolver.py +174 -0
- datasmith/resolution/git_utils.py +378 -0
- datasmith/resolution/import_analyzer.py +66 -0
- datasmith/resolution/metadata_parser.py +412 -0
- datasmith/resolution/models.py +41 -0
- datasmith/resolution/orchestrator.py +522 -0
- datasmith/resolution/package_filters.py +312 -0
- datasmith/resolution/python_manager.py +110 -0
- datasmith/runners/__init__.py +15 -0
- datasmith/runners/base.py +112 -0
- datasmith/runners/classify_prs.py +48 -0
- datasmith/runners/render_problems.py +113 -0
- datasmith/runners/resolve_packages.py +66 -0
- datasmith/runners/scrape_commits.py +166 -0
- datasmith/runners/scrape_repos.py +44 -0
- datasmith/runners/synthesize_images.py +310 -0
- datasmith/update/__init__.py +5 -0
- datasmith/update/cli.py +169 -0
- datasmith/update/offline.py +173 -0
- datasmith/update/pipeline.py +497 -0
- datasmith/utils/__init__.py +18 -0
- datasmith/utils/core.py +67 -0
- datasmith/utils/db.py +156 -0
- datasmith/utils/tokens.py +65 -0
- fc_data-0.2.0.dist-info/METADATA +441 -0
- fc_data-0.2.0.dist-info/RECORD +87 -0
- fc_data-0.2.0.dist-info/WHEEL +4 -0
- fc_data-0.2.0.dist-info/entry_points.txt +2 -0
- fc_data-0.2.0.dist-info/licenses/LICENSE +28 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""Docker image management via python-on-whales."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from python_on_whales import DockerClient
|
|
9
|
+
|
|
10
|
+
from datasmith.utils import get_logger
|
|
11
|
+
from datasmith.utils.core import Settings
|
|
12
|
+
|
|
13
|
+
logger = get_logger("docker.images")
|
|
14
|
+
|
|
15
|
+
_TEMPLATES_DIR = Path(__file__).parent / "templates"
|
|
16
|
+
|
|
17
|
+
# The three-tier hierarchy (base -> repo -> PR) requires each image to be
|
|
18
|
+
# available locally for the next FROM. The built-in "default" builder uses
|
|
19
|
+
# the docker driver, which builds directly in the daemon's image store.
|
|
20
|
+
# Container-based builders (docker-container) run in isolation and cannot
|
|
21
|
+
# resolve locally-built images, so we pin to the docker driver here.
|
|
22
|
+
_BUILDER = "default"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _default_context() -> str:
|
|
26
|
+
"""Return the path to the built-in templates directory."""
|
|
27
|
+
return str(_TEMPLATES_DIR)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _docker_namespace() -> str:
|
|
31
|
+
"""Return the Docker namespace from settings (DOCKERHUB_USERNAME env var)."""
|
|
32
|
+
return Settings().dockerhub_username
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_base_image_name() -> str:
|
|
36
|
+
"""Return the canonical tag for the base image."""
|
|
37
|
+
return f"{_docker_namespace()}/base:latest"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_repo_image_name(owner: str, repo: str) -> str:
|
|
41
|
+
"""Return the canonical tag for a repository image."""
|
|
42
|
+
owner = owner.lower()
|
|
43
|
+
repo = repo.lower()
|
|
44
|
+
return f"{_docker_namespace()}/{owner}-{repo}:latest".lower()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_pr_image_name(owner: str, repo: str, issue_number: int) -> str:
|
|
48
|
+
"""Return the canonical tag for a PR image."""
|
|
49
|
+
return f"{_docker_namespace()}/{owner}-{repo}:{issue_number}".lower()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ImageManager:
|
|
53
|
+
def __init__(self, timeout: int = 3600) -> None:
|
|
54
|
+
self._docker = DockerClient()
|
|
55
|
+
self._timeout = timeout
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def _default_context() -> str:
|
|
59
|
+
"""Return the path to the built-in templates directory."""
|
|
60
|
+
return _default_context()
|
|
61
|
+
|
|
62
|
+
def build_base_image(
|
|
63
|
+
self,
|
|
64
|
+
context: str | None = None,
|
|
65
|
+
*,
|
|
66
|
+
py_version: str = "",
|
|
67
|
+
) -> str:
|
|
68
|
+
ctx = context or _default_context()
|
|
69
|
+
tag = get_base_image_name()
|
|
70
|
+
logger.info("Building base image: %s", tag)
|
|
71
|
+
kwargs: dict[str, object] = {
|
|
72
|
+
"tags": [tag],
|
|
73
|
+
"file": os.path.join(ctx, "Dockerfile.base"),
|
|
74
|
+
"builder": _BUILDER,
|
|
75
|
+
}
|
|
76
|
+
if py_version:
|
|
77
|
+
kwargs["build_args"] = {"PY_VERSION": py_version}
|
|
78
|
+
self._docker.build(ctx, **kwargs) # type: ignore[arg-type]
|
|
79
|
+
return tag
|
|
80
|
+
|
|
81
|
+
def build_repo_image(
|
|
82
|
+
self,
|
|
83
|
+
owner: str,
|
|
84
|
+
repo: str,
|
|
85
|
+
context: str | None = None,
|
|
86
|
+
*,
|
|
87
|
+
repo_url: str | None = None,
|
|
88
|
+
py_version: str = "",
|
|
89
|
+
) -> str:
|
|
90
|
+
ctx = context or _default_context()
|
|
91
|
+
url = repo_url or f"https://github.com/{owner}/{repo}.git"
|
|
92
|
+
tag = get_repo_image_name(owner, repo)
|
|
93
|
+
logger.info("Building repo image: %s", tag)
|
|
94
|
+
build_args: dict[str, str] = {
|
|
95
|
+
"BASE_IMAGE": get_base_image_name(),
|
|
96
|
+
"REPO_URL": url,
|
|
97
|
+
}
|
|
98
|
+
if py_version:
|
|
99
|
+
build_args["PY_VERSION"] = py_version
|
|
100
|
+
self._docker.build(
|
|
101
|
+
ctx,
|
|
102
|
+
tags=[tag],
|
|
103
|
+
file=os.path.join(ctx, "Dockerfile.repo"),
|
|
104
|
+
build_args=build_args,
|
|
105
|
+
builder=_BUILDER,
|
|
106
|
+
)
|
|
107
|
+
return tag
|
|
108
|
+
|
|
109
|
+
def build_pr_image(
|
|
110
|
+
self,
|
|
111
|
+
owner: str,
|
|
112
|
+
repo: str,
|
|
113
|
+
issue_number: int,
|
|
114
|
+
context: str | None = None,
|
|
115
|
+
build_script: str = "",
|
|
116
|
+
*,
|
|
117
|
+
commit_sha: str = "HEAD",
|
|
118
|
+
env_payload: str = "[]",
|
|
119
|
+
py_version: str = "",
|
|
120
|
+
) -> str:
|
|
121
|
+
ctx = context or _default_context()
|
|
122
|
+
tag = get_pr_image_name(owner, repo, issue_number)
|
|
123
|
+
repo_image = get_repo_image_name(owner, repo)
|
|
124
|
+
logger.info("Building PR image: %s", tag)
|
|
125
|
+
build_args: dict[str, str] = {
|
|
126
|
+
"REPO_IMAGE": repo_image,
|
|
127
|
+
"COMMIT_SHA": commit_sha,
|
|
128
|
+
"ENV_PAYLOAD": env_payload,
|
|
129
|
+
}
|
|
130
|
+
if build_script:
|
|
131
|
+
build_args["BUILD_SCRIPT"] = build_script
|
|
132
|
+
if py_version:
|
|
133
|
+
build_args["PY_VERSION"] = py_version
|
|
134
|
+
self._docker.build(
|
|
135
|
+
ctx,
|
|
136
|
+
tags=[tag],
|
|
137
|
+
file=os.path.join(ctx, "Dockerfile.pr"),
|
|
138
|
+
build_args=build_args,
|
|
139
|
+
builder=_BUILDER,
|
|
140
|
+
)
|
|
141
|
+
return tag
|
|
142
|
+
|
|
143
|
+
def image_exists(self, tag: str) -> bool:
|
|
144
|
+
try:
|
|
145
|
+
self._docker.image.inspect(tag)
|
|
146
|
+
except Exception:
|
|
147
|
+
return False
|
|
148
|
+
else:
|
|
149
|
+
return True
|
|
150
|
+
|
|
151
|
+
def remove_image(self, tag: str) -> None:
|
|
152
|
+
try:
|
|
153
|
+
self._docker.image.remove(tag, force=True)
|
|
154
|
+
except Exception:
|
|
155
|
+
logger.warning("Failed to remove image: %s", tag)
|
|
156
|
+
|
|
157
|
+
def prune_dangling(self) -> None:
|
|
158
|
+
self._docker.image.prune(all=False)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""DockerHub publisher."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
from python_on_whales import DockerClient
|
|
10
|
+
|
|
11
|
+
from datasmith.utils import get_logger, with_backoff
|
|
12
|
+
|
|
13
|
+
logger = get_logger("docker.publish")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DockerHubPublisher:
|
|
17
|
+
def __init__(self, namespace: str = "formulacode") -> None:
|
|
18
|
+
self._docker = DockerClient()
|
|
19
|
+
self._namespace = namespace
|
|
20
|
+
self._logged_in = False
|
|
21
|
+
|
|
22
|
+
def _login(self) -> None:
|
|
23
|
+
if self._logged_in:
|
|
24
|
+
return
|
|
25
|
+
username = os.environ.get("DOCKERHUB_USERNAME", "")
|
|
26
|
+
token = os.environ.get("DOCKERHUB_TOKEN", "")
|
|
27
|
+
if username and token:
|
|
28
|
+
self._docker.login(username=username, password=token)
|
|
29
|
+
self._logged_in = True
|
|
30
|
+
|
|
31
|
+
@with_backoff(max_retries=3, base_delay=2.0)
|
|
32
|
+
def push(self, image_tag: str) -> None:
|
|
33
|
+
self._login()
|
|
34
|
+
logger.info("Pushing image: %s", image_tag)
|
|
35
|
+
self._docker.push(image_tag)
|
|
36
|
+
|
|
37
|
+
def tag_with_version(self, image_tag: str) -> str:
|
|
38
|
+
version = datetime.now(tz=timezone.utc).strftime("@%Y-%m")
|
|
39
|
+
new_tag = f"{image_tag}{version}"
|
|
40
|
+
self._docker.tag(image_tag, new_tag)
|
|
41
|
+
return new_tag
|
|
42
|
+
|
|
43
|
+
def list_remote_tags(self, repo: str) -> list[str]:
|
|
44
|
+
"""List tags for a DockerHub repository."""
|
|
45
|
+
url = f"https://hub.docker.com/v2/repositories/{self._namespace}/{repo}/tags/"
|
|
46
|
+
try:
|
|
47
|
+
resp = httpx.get(url, timeout=10.0)
|
|
48
|
+
if resp.status_code == 200:
|
|
49
|
+
return [t["name"] for t in resp.json().get("results", [])]
|
|
50
|
+
except Exception:
|
|
51
|
+
logger.warning("Failed to list remote tags for %s", repo)
|
|
52
|
+
return []
|
|
53
|
+
|
|
54
|
+
def filter_unpublished(self, local_tags: list[str], remote_tags: list[str]) -> list[str]:
|
|
55
|
+
remote_set = set(remote_tags)
|
|
56
|
+
return [t for t in local_tags if t.split(":")[-1] not in remote_set]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# syntax=docker/dockerfile:1.7
|
|
2
|
+
|
|
3
|
+
ARG PY_VERSION=""
|
|
4
|
+
FROM buildpack-deps:jammy AS base
|
|
5
|
+
ARG PY_VERSION=""
|
|
6
|
+
|
|
7
|
+
ENV DEBIAN_FRONTEND=noninteractive \
|
|
8
|
+
MAMBA_ROOT_PREFIX=/opt/conda \
|
|
9
|
+
PATH=/opt/conda/bin:/root/.cargo/bin:/root/.local/bin:$PATH \
|
|
10
|
+
MAMBA_DOCKERFILE_ACTIVATE=1 \
|
|
11
|
+
OPENBLAS_NUM_THREADS=1 \
|
|
12
|
+
MKL_NUM_THREADS=1 \
|
|
13
|
+
OMP_NUM_THREADS=1
|
|
14
|
+
|
|
15
|
+
RUN mkdir -p /workspace /output
|
|
16
|
+
WORKDIR /workspace
|
|
17
|
+
|
|
18
|
+
COPY docker_build_base.sh /workspace/docker_build_base.sh
|
|
19
|
+
RUN chmod +x /workspace/docker_build_base.sh && \
|
|
20
|
+
if [ -n "${PY_VERSION}" ]; then \
|
|
21
|
+
PY_VERSION="${PY_VERSION}" /workspace/docker_build_base.sh --py-version "${PY_VERSION}"; \
|
|
22
|
+
else \
|
|
23
|
+
/workspace/docker_build_base.sh; \
|
|
24
|
+
fi
|
|
25
|
+
|
|
26
|
+
RUN micromamba clean --all --yes
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# syntax=docker/dockerfile:1.7
|
|
2
|
+
|
|
3
|
+
ARG REPO_IMAGE
|
|
4
|
+
|
|
5
|
+
FROM ${REPO_IMAGE} AS env
|
|
6
|
+
ARG COMMIT_SHA
|
|
7
|
+
ARG ENV_PAYLOAD="[]"
|
|
8
|
+
ARG PY_VERSION=""
|
|
9
|
+
RUN git checkout "$COMMIT_SHA"
|
|
10
|
+
LABEL vcs.ref="$COMMIT_SHA"
|
|
11
|
+
|
|
12
|
+
COPY docker_build_env.sh /workspace/repo/docker_build_env.sh
|
|
13
|
+
RUN chmod +x /workspace/repo/docker_build_env.sh && \
|
|
14
|
+
echo "${ENV_PAYLOAD}" > /tmp/env_payload.json && \
|
|
15
|
+
/workspace/repo/docker_build_env.sh --env-payload /tmp/env_payload.json
|
|
16
|
+
|
|
17
|
+
FROM env AS pkg
|
|
18
|
+
|
|
19
|
+
COPY docker_build_pkg.sh /workspace/repo/docker_build_pkg.sh
|
|
20
|
+
RUN chmod +x /workspace/repo/docker_build_pkg.sh && \
|
|
21
|
+
/workspace/repo/docker_build_pkg.sh
|
|
22
|
+
|
|
23
|
+
COPY profile.sh /profile.sh
|
|
24
|
+
COPY run-tests.sh /run-tests.sh
|
|
25
|
+
RUN chmod +x /profile.sh /run-tests.sh
|
|
26
|
+
|
|
27
|
+
RUN micromamba clean --all --yes
|
|
28
|
+
|
|
29
|
+
FROM pkg AS run
|
|
30
|
+
|
|
31
|
+
COPY docker_build_run.sh /docker_build_run.sh
|
|
32
|
+
RUN chmod +x /docker_build_run.sh \
|
|
33
|
+
&& /docker_build_run.sh
|
|
34
|
+
|
|
35
|
+
FROM run AS final
|
|
36
|
+
ARG BENCHMARKS=""
|
|
37
|
+
ARG BUILD_SCRIPT=""
|
|
38
|
+
|
|
39
|
+
COPY docker_build_final.sh /docker_build_final.sh
|
|
40
|
+
RUN chmod +x /docker_build_final.sh \
|
|
41
|
+
&& printf "%s\n" "${BENCHMARKS}" > /tmp/asv_benchmarks_fallback.txt; \
|
|
42
|
+
/docker_build_final.sh /tmp/asv_benchmarks_fallback.txt;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# syntax=docker/dockerfile:1.7
|
|
2
|
+
|
|
3
|
+
ARG BASE_IMAGE=formulacode/base:latest
|
|
4
|
+
|
|
5
|
+
FROM ${BASE_IMAGE} AS repo
|
|
6
|
+
ARG REPO_URL
|
|
7
|
+
ARG COMMIT_SHA=""
|
|
8
|
+
RUN git clone "$REPO_URL" /workspace/repo && \
|
|
9
|
+
if [ -n "$COMMIT_SHA" ]; then cd /workspace/repo && git checkout "$COMMIT_SHA"; fi
|
|
10
|
+
WORKDIR /workspace/repo
|
|
11
|
+
ENTRYPOINT ["/bin/bash"]
|