fc-data 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. datasmith/__init__.py +330 -0
  2. datasmith/__init__.pyi +194 -0
  3. datasmith/agents/__init__.py +31 -0
  4. datasmith/agents/classifiers.py +272 -0
  5. datasmith/agents/codex.py +25 -0
  6. datasmith/agents/config.py +108 -0
  7. datasmith/agents/extractors.py +197 -0
  8. datasmith/agents/installed/README.md +52 -0
  9. datasmith/agents/installed/__init__.py +22 -0
  10. datasmith/agents/installed/base.py +240 -0
  11. datasmith/agents/installed/claude.py +134 -0
  12. datasmith/agents/installed/codex.py +91 -0
  13. datasmith/agents/installed/gemini.py +118 -0
  14. datasmith/agents/installed/none.py +27 -0
  15. datasmith/agents/sandbox.py +547 -0
  16. datasmith/agents/synthesizer.py +439 -0
  17. datasmith/agents/templates/AGENTS.md.j2 +150 -0
  18. datasmith/agents/templates/sandbox_verify.py +428 -0
  19. datasmith/docker/__init__.py +31 -0
  20. datasmith/docker/context.py +112 -0
  21. datasmith/docker/images.py +158 -0
  22. datasmith/docker/publish.py +56 -0
  23. datasmith/docker/templates/Dockerfile.base +26 -0
  24. datasmith/docker/templates/Dockerfile.pr +42 -0
  25. datasmith/docker/templates/Dockerfile.repo +11 -0
  26. datasmith/docker/templates/docker_build_base.sh +780 -0
  27. datasmith/docker/templates/docker_build_env.sh +309 -0
  28. datasmith/docker/templates/docker_build_final.sh +106 -0
  29. datasmith/docker/templates/docker_build_pkg.sh +99 -0
  30. datasmith/docker/templates/docker_build_run.sh +124 -0
  31. datasmith/docker/templates/entrypoint.sh +62 -0
  32. datasmith/docker/templates/parser.py +1405 -0
  33. datasmith/docker/templates/profile.sh +199 -0
  34. datasmith/docker/templates/pytest_runner.py +692 -0
  35. datasmith/docker/templates/run-tests.sh +197 -0
  36. datasmith/docker/verifiers.py +131 -0
  37. datasmith/filters.py +154 -0
  38. datasmith/github/__init__.py +22 -0
  39. datasmith/github/client.py +333 -0
  40. datasmith/github/hooks.py +50 -0
  41. datasmith/github/links.py +110 -0
  42. datasmith/github/models.py +206 -0
  43. datasmith/github/render.py +173 -0
  44. datasmith/github/search.py +66 -0
  45. datasmith/github/templates/comment.md.j2 +5 -0
  46. datasmith/github/templates/final.md.j2 +66 -0
  47. datasmith/github/templates/issues.md.j2 +21 -0
  48. datasmith/github/templates/repo.md.j2 +1 -0
  49. datasmith/preflight.py +162 -0
  50. datasmith/publish/__init__.py +13 -0
  51. datasmith/publish/huggingface.py +104 -0
  52. datasmith/publish/pipeline.py +60 -0
  53. datasmith/publish/records.py +91 -0
  54. datasmith/py.typed +1 -0
  55. datasmith/resolution/__init__.py +14 -0
  56. datasmith/resolution/blocklist.py +145 -0
  57. datasmith/resolution/cache.py +120 -0
  58. datasmith/resolution/constants.py +277 -0
  59. datasmith/resolution/dependency_resolver.py +174 -0
  60. datasmith/resolution/git_utils.py +378 -0
  61. datasmith/resolution/import_analyzer.py +66 -0
  62. datasmith/resolution/metadata_parser.py +412 -0
  63. datasmith/resolution/models.py +41 -0
  64. datasmith/resolution/orchestrator.py +522 -0
  65. datasmith/resolution/package_filters.py +312 -0
  66. datasmith/resolution/python_manager.py +110 -0
  67. datasmith/runners/__init__.py +15 -0
  68. datasmith/runners/base.py +112 -0
  69. datasmith/runners/classify_prs.py +48 -0
  70. datasmith/runners/render_problems.py +113 -0
  71. datasmith/runners/resolve_packages.py +66 -0
  72. datasmith/runners/scrape_commits.py +166 -0
  73. datasmith/runners/scrape_repos.py +44 -0
  74. datasmith/runners/synthesize_images.py +310 -0
  75. datasmith/update/__init__.py +5 -0
  76. datasmith/update/cli.py +169 -0
  77. datasmith/update/offline.py +173 -0
  78. datasmith/update/pipeline.py +497 -0
  79. datasmith/utils/__init__.py +18 -0
  80. datasmith/utils/core.py +67 -0
  81. datasmith/utils/db.py +156 -0
  82. datasmith/utils/tokens.py +65 -0
  83. fc_data-0.2.0.dist-info/METADATA +441 -0
  84. fc_data-0.2.0.dist-info/RECORD +87 -0
  85. fc_data-0.2.0.dist-info/WHEEL +4 -0
  86. fc_data-0.2.0.dist-info/entry_points.txt +2 -0
  87. fc_data-0.2.0.dist-info/licenses/LICENSE +28 -0
@@ -0,0 +1,158 @@
1
+ """Docker image management via python-on-whales."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from pathlib import Path
7
+
8
+ from python_on_whales import DockerClient
9
+
10
+ from datasmith.utils import get_logger
11
+ from datasmith.utils.core import Settings
12
+
13
+ logger = get_logger("docker.images")
14
+
15
+ _TEMPLATES_DIR = Path(__file__).parent / "templates"
16
+
17
+ # The three-tier hierarchy (base -> repo -> PR) requires each image to be
18
+ # available locally for the next FROM. The built-in "default" builder uses
19
+ # the docker driver, which builds directly in the daemon's image store.
20
+ # Container-based builders (docker-container) run in isolation and cannot
21
+ # resolve locally-built images, so we pin to the docker driver here.
22
+ _BUILDER = "default"
23
+
24
+
25
+ def _default_context() -> str:
26
+ """Return the path to the built-in templates directory."""
27
+ return str(_TEMPLATES_DIR)
28
+
29
+
30
+ def _docker_namespace() -> str:
31
+ """Return the Docker namespace from settings (DOCKERHUB_USERNAME env var)."""
32
+ return Settings().dockerhub_username
33
+
34
+
35
+ def get_base_image_name() -> str:
36
+ """Return the canonical tag for the base image."""
37
+ return f"{_docker_namespace()}/base:latest"
38
+
39
+
40
+ def get_repo_image_name(owner: str, repo: str) -> str:
41
+ """Return the canonical tag for a repository image."""
42
+ owner = owner.lower()
43
+ repo = repo.lower()
44
+ return f"{_docker_namespace()}/{owner}-{repo}:latest".lower()
45
+
46
+
47
+ def get_pr_image_name(owner: str, repo: str, issue_number: int) -> str:
48
+ """Return the canonical tag for a PR image."""
49
+ return f"{_docker_namespace()}/{owner}-{repo}:{issue_number}".lower()
50
+
51
+
52
+ class ImageManager:
53
+ def __init__(self, timeout: int = 3600) -> None:
54
+ self._docker = DockerClient()
55
+ self._timeout = timeout
56
+
57
+ @staticmethod
58
+ def _default_context() -> str:
59
+ """Return the path to the built-in templates directory."""
60
+ return _default_context()
61
+
62
+ def build_base_image(
63
+ self,
64
+ context: str | None = None,
65
+ *,
66
+ py_version: str = "",
67
+ ) -> str:
68
+ ctx = context or _default_context()
69
+ tag = get_base_image_name()
70
+ logger.info("Building base image: %s", tag)
71
+ kwargs: dict[str, object] = {
72
+ "tags": [tag],
73
+ "file": os.path.join(ctx, "Dockerfile.base"),
74
+ "builder": _BUILDER,
75
+ }
76
+ if py_version:
77
+ kwargs["build_args"] = {"PY_VERSION": py_version}
78
+ self._docker.build(ctx, **kwargs) # type: ignore[arg-type]
79
+ return tag
80
+
81
+ def build_repo_image(
82
+ self,
83
+ owner: str,
84
+ repo: str,
85
+ context: str | None = None,
86
+ *,
87
+ repo_url: str | None = None,
88
+ py_version: str = "",
89
+ ) -> str:
90
+ ctx = context or _default_context()
91
+ url = repo_url or f"https://github.com/{owner}/{repo}.git"
92
+ tag = get_repo_image_name(owner, repo)
93
+ logger.info("Building repo image: %s", tag)
94
+ build_args: dict[str, str] = {
95
+ "BASE_IMAGE": get_base_image_name(),
96
+ "REPO_URL": url,
97
+ }
98
+ if py_version:
99
+ build_args["PY_VERSION"] = py_version
100
+ self._docker.build(
101
+ ctx,
102
+ tags=[tag],
103
+ file=os.path.join(ctx, "Dockerfile.repo"),
104
+ build_args=build_args,
105
+ builder=_BUILDER,
106
+ )
107
+ return tag
108
+
109
+ def build_pr_image(
110
+ self,
111
+ owner: str,
112
+ repo: str,
113
+ issue_number: int,
114
+ context: str | None = None,
115
+ build_script: str = "",
116
+ *,
117
+ commit_sha: str = "HEAD",
118
+ env_payload: str = "[]",
119
+ py_version: str = "",
120
+ ) -> str:
121
+ ctx = context or _default_context()
122
+ tag = get_pr_image_name(owner, repo, issue_number)
123
+ repo_image = get_repo_image_name(owner, repo)
124
+ logger.info("Building PR image: %s", tag)
125
+ build_args: dict[str, str] = {
126
+ "REPO_IMAGE": repo_image,
127
+ "COMMIT_SHA": commit_sha,
128
+ "ENV_PAYLOAD": env_payload,
129
+ }
130
+ if build_script:
131
+ build_args["BUILD_SCRIPT"] = build_script
132
+ if py_version:
133
+ build_args["PY_VERSION"] = py_version
134
+ self._docker.build(
135
+ ctx,
136
+ tags=[tag],
137
+ file=os.path.join(ctx, "Dockerfile.pr"),
138
+ build_args=build_args,
139
+ builder=_BUILDER,
140
+ )
141
+ return tag
142
+
143
+ def image_exists(self, tag: str) -> bool:
144
+ try:
145
+ self._docker.image.inspect(tag)
146
+ except Exception:
147
+ return False
148
+ else:
149
+ return True
150
+
151
+ def remove_image(self, tag: str) -> None:
152
+ try:
153
+ self._docker.image.remove(tag, force=True)
154
+ except Exception:
155
+ logger.warning("Failed to remove image: %s", tag)
156
+
157
+ def prune_dangling(self) -> None:
158
+ self._docker.image.prune(all=False)
@@ -0,0 +1,56 @@
1
+ """DockerHub publisher."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from datetime import datetime, timezone
7
+
8
+ import httpx
9
+ from python_on_whales import DockerClient
10
+
11
+ from datasmith.utils import get_logger, with_backoff
12
+
13
+ logger = get_logger("docker.publish")
14
+
15
+
16
+ class DockerHubPublisher:
17
+ def __init__(self, namespace: str = "formulacode") -> None:
18
+ self._docker = DockerClient()
19
+ self._namespace = namespace
20
+ self._logged_in = False
21
+
22
+ def _login(self) -> None:
23
+ if self._logged_in:
24
+ return
25
+ username = os.environ.get("DOCKERHUB_USERNAME", "")
26
+ token = os.environ.get("DOCKERHUB_TOKEN", "")
27
+ if username and token:
28
+ self._docker.login(username=username, password=token)
29
+ self._logged_in = True
30
+
31
+ @with_backoff(max_retries=3, base_delay=2.0)
32
+ def push(self, image_tag: str) -> None:
33
+ self._login()
34
+ logger.info("Pushing image: %s", image_tag)
35
+ self._docker.push(image_tag)
36
+
37
+ def tag_with_version(self, image_tag: str) -> str:
38
+ version = datetime.now(tz=timezone.utc).strftime("@%Y-%m")
39
+ new_tag = f"{image_tag}{version}"
40
+ self._docker.tag(image_tag, new_tag)
41
+ return new_tag
42
+
43
+ def list_remote_tags(self, repo: str) -> list[str]:
44
+ """List tags for a DockerHub repository."""
45
+ url = f"https://hub.docker.com/v2/repositories/{self._namespace}/{repo}/tags/"
46
+ try:
47
+ resp = httpx.get(url, timeout=10.0)
48
+ if resp.status_code == 200:
49
+ return [t["name"] for t in resp.json().get("results", [])]
50
+ except Exception:
51
+ logger.warning("Failed to list remote tags for %s", repo)
52
+ return []
53
+
54
+ def filter_unpublished(self, local_tags: list[str], remote_tags: list[str]) -> list[str]:
55
+ remote_set = set(remote_tags)
56
+ return [t for t in local_tags if t.split(":")[-1] not in remote_set]
@@ -0,0 +1,26 @@
1
+ # syntax=docker/dockerfile:1.7
2
+
3
+ ARG PY_VERSION=""
4
+ FROM buildpack-deps:jammy AS base
5
+ ARG PY_VERSION=""
6
+
7
+ ENV DEBIAN_FRONTEND=noninteractive \
8
+ MAMBA_ROOT_PREFIX=/opt/conda \
9
+ PATH=/opt/conda/bin:/root/.cargo/bin:/root/.local/bin:$PATH \
10
+ MAMBA_DOCKERFILE_ACTIVATE=1 \
11
+ OPENBLAS_NUM_THREADS=1 \
12
+ MKL_NUM_THREADS=1 \
13
+ OMP_NUM_THREADS=1
14
+
15
+ RUN mkdir -p /workspace /output
16
+ WORKDIR /workspace
17
+
18
+ COPY docker_build_base.sh /workspace/docker_build_base.sh
19
+ RUN chmod +x /workspace/docker_build_base.sh && \
20
+ if [ -n "${PY_VERSION}" ]; then \
21
+ PY_VERSION="${PY_VERSION}" /workspace/docker_build_base.sh --py-version "${PY_VERSION}"; \
22
+ else \
23
+ /workspace/docker_build_base.sh; \
24
+ fi
25
+
26
+ RUN micromamba clean --all --yes
@@ -0,0 +1,42 @@
1
+ # syntax=docker/dockerfile:1.7
2
+
3
+ ARG REPO_IMAGE
4
+
5
+ FROM ${REPO_IMAGE} AS env
6
+ ARG COMMIT_SHA
7
+ ARG ENV_PAYLOAD="[]"
8
+ ARG PY_VERSION=""
9
+ RUN git checkout "$COMMIT_SHA"
10
+ LABEL vcs.ref="$COMMIT_SHA"
11
+
12
+ COPY docker_build_env.sh /workspace/repo/docker_build_env.sh
13
+ RUN chmod +x /workspace/repo/docker_build_env.sh && \
14
+ echo "${ENV_PAYLOAD}" > /tmp/env_payload.json && \
15
+ /workspace/repo/docker_build_env.sh --env-payload /tmp/env_payload.json
16
+
17
+ FROM env AS pkg
18
+
19
+ COPY docker_build_pkg.sh /workspace/repo/docker_build_pkg.sh
20
+ RUN chmod +x /workspace/repo/docker_build_pkg.sh && \
21
+ /workspace/repo/docker_build_pkg.sh
22
+
23
+ COPY profile.sh /profile.sh
24
+ COPY run-tests.sh /run-tests.sh
25
+ RUN chmod +x /profile.sh /run-tests.sh
26
+
27
+ RUN micromamba clean --all --yes
28
+
29
+ FROM pkg AS run
30
+
31
+ COPY docker_build_run.sh /docker_build_run.sh
32
+ RUN chmod +x /docker_build_run.sh \
33
+ && /docker_build_run.sh
34
+
35
+ FROM run AS final
36
+ ARG BENCHMARKS=""
37
+ ARG BUILD_SCRIPT=""
38
+
39
+ COPY docker_build_final.sh /docker_build_final.sh
40
+ RUN chmod +x /docker_build_final.sh \
41
+ && printf "%s\n" "${BENCHMARKS}" > /tmp/asv_benchmarks_fallback.txt; \
42
+ /docker_build_final.sh /tmp/asv_benchmarks_fallback.txt;
@@ -0,0 +1,11 @@
1
+ # syntax=docker/dockerfile:1.7
2
+
3
+ ARG BASE_IMAGE=formulacode/base:latest
4
+
5
+ FROM ${BASE_IMAGE} AS repo
6
+ ARG REPO_URL
7
+ ARG COMMIT_SHA=""
8
+ RUN git clone "$REPO_URL" /workspace/repo && \
9
+ if [ -n "$COMMIT_SHA" ]; then cd /workspace/repo && git checkout "$COMMIT_SHA"; fi
10
+ WORKDIR /workspace/repo
11
+ ENTRYPOINT ["/bin/bash"]