fc-data 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datasmith/__init__.py +330 -0
- datasmith/__init__.pyi +194 -0
- datasmith/agents/__init__.py +31 -0
- datasmith/agents/classifiers.py +272 -0
- datasmith/agents/codex.py +25 -0
- datasmith/agents/config.py +108 -0
- datasmith/agents/extractors.py +197 -0
- datasmith/agents/installed/README.md +52 -0
- datasmith/agents/installed/__init__.py +22 -0
- datasmith/agents/installed/base.py +240 -0
- datasmith/agents/installed/claude.py +134 -0
- datasmith/agents/installed/codex.py +91 -0
- datasmith/agents/installed/gemini.py +118 -0
- datasmith/agents/installed/none.py +27 -0
- datasmith/agents/sandbox.py +547 -0
- datasmith/agents/synthesizer.py +439 -0
- datasmith/agents/templates/AGENTS.md.j2 +150 -0
- datasmith/agents/templates/sandbox_verify.py +428 -0
- datasmith/docker/__init__.py +31 -0
- datasmith/docker/context.py +112 -0
- datasmith/docker/images.py +158 -0
- datasmith/docker/publish.py +56 -0
- datasmith/docker/templates/Dockerfile.base +26 -0
- datasmith/docker/templates/Dockerfile.pr +42 -0
- datasmith/docker/templates/Dockerfile.repo +11 -0
- datasmith/docker/templates/docker_build_base.sh +780 -0
- datasmith/docker/templates/docker_build_env.sh +309 -0
- datasmith/docker/templates/docker_build_final.sh +106 -0
- datasmith/docker/templates/docker_build_pkg.sh +99 -0
- datasmith/docker/templates/docker_build_run.sh +124 -0
- datasmith/docker/templates/entrypoint.sh +62 -0
- datasmith/docker/templates/parser.py +1405 -0
- datasmith/docker/templates/profile.sh +199 -0
- datasmith/docker/templates/pytest_runner.py +692 -0
- datasmith/docker/templates/run-tests.sh +197 -0
- datasmith/docker/verifiers.py +131 -0
- datasmith/filters.py +154 -0
- datasmith/github/__init__.py +22 -0
- datasmith/github/client.py +333 -0
- datasmith/github/hooks.py +50 -0
- datasmith/github/links.py +110 -0
- datasmith/github/models.py +206 -0
- datasmith/github/render.py +173 -0
- datasmith/github/search.py +66 -0
- datasmith/github/templates/comment.md.j2 +5 -0
- datasmith/github/templates/final.md.j2 +66 -0
- datasmith/github/templates/issues.md.j2 +21 -0
- datasmith/github/templates/repo.md.j2 +1 -0
- datasmith/preflight.py +162 -0
- datasmith/publish/__init__.py +13 -0
- datasmith/publish/huggingface.py +104 -0
- datasmith/publish/pipeline.py +60 -0
- datasmith/publish/records.py +91 -0
- datasmith/py.typed +1 -0
- datasmith/resolution/__init__.py +14 -0
- datasmith/resolution/blocklist.py +145 -0
- datasmith/resolution/cache.py +120 -0
- datasmith/resolution/constants.py +277 -0
- datasmith/resolution/dependency_resolver.py +174 -0
- datasmith/resolution/git_utils.py +378 -0
- datasmith/resolution/import_analyzer.py +66 -0
- datasmith/resolution/metadata_parser.py +412 -0
- datasmith/resolution/models.py +41 -0
- datasmith/resolution/orchestrator.py +522 -0
- datasmith/resolution/package_filters.py +312 -0
- datasmith/resolution/python_manager.py +110 -0
- datasmith/runners/__init__.py +15 -0
- datasmith/runners/base.py +112 -0
- datasmith/runners/classify_prs.py +48 -0
- datasmith/runners/render_problems.py +113 -0
- datasmith/runners/resolve_packages.py +66 -0
- datasmith/runners/scrape_commits.py +166 -0
- datasmith/runners/scrape_repos.py +44 -0
- datasmith/runners/synthesize_images.py +310 -0
- datasmith/update/__init__.py +5 -0
- datasmith/update/cli.py +169 -0
- datasmith/update/offline.py +173 -0
- datasmith/update/pipeline.py +497 -0
- datasmith/utils/__init__.py +18 -0
- datasmith/utils/core.py +67 -0
- datasmith/utils/db.py +156 -0
- datasmith/utils/tokens.py +65 -0
- fc_data-0.2.0.dist-info/METADATA +441 -0
- fc_data-0.2.0.dist-info/RECORD +87 -0
- fc_data-0.2.0.dist-info/WHEEL +4 -0
- fc_data-0.2.0.dist-info/entry_points.txt +2 -0
- fc_data-0.2.0.dist-info/licenses/LICENSE +28 -0
datasmith/__init__.py
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
"""DataSmith — toolchain for building the FormulaCode benchmark dataset."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import importlib
|
|
6
|
+
import os
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
import dotenv
|
|
10
|
+
|
|
11
|
+
__version__ = "0.1.0"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def setup_environment() -> None:
|
|
15
|
+
"""Load environment variables from tokens.env if present."""
|
|
16
|
+
if os.path.exists("tokens.env"):
|
|
17
|
+
dotenv.load_dotenv("tokens.env")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
setup_environment()
|
|
21
|
+
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
# PEP 562 lazy loading
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
_SUBMODULES: set[str] = {
|
|
27
|
+
"github",
|
|
28
|
+
"agents",
|
|
29
|
+
"docker",
|
|
30
|
+
"runners",
|
|
31
|
+
"utils",
|
|
32
|
+
"update",
|
|
33
|
+
"publish",
|
|
34
|
+
"filters",
|
|
35
|
+
"preflight",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
_LAZY_IMPORTS: dict[str, tuple[str, str]] = {
|
|
39
|
+
# --- github ---
|
|
40
|
+
"PR": ("datasmith.github", "PR"),
|
|
41
|
+
"Anonymizer": ("datasmith.github", "Anonymizer"),
|
|
42
|
+
"FormulaCodeRecord": ("datasmith.github", "FormulaCodeRecord"),
|
|
43
|
+
"GitHubClient": ("datasmith.github", "GitHubClient"),
|
|
44
|
+
"HookRegistry": ("datasmith.github", "HookRegistry"),
|
|
45
|
+
"Issue": ("datasmith.github", "Issue"),
|
|
46
|
+
"IssueExpanded": ("datasmith.github", "IssueExpanded"),
|
|
47
|
+
"PRChangeSummary": ("datasmith.github", "PRChangeSummary"),
|
|
48
|
+
"PRFileChange": ("datasmith.github", "PRFileChange"),
|
|
49
|
+
"extract_references": ("datasmith.github", "extract_references"),
|
|
50
|
+
"render_problem_statement": ("datasmith.github", "render_problem_statement"),
|
|
51
|
+
"scrape_links": ("datasmith.github", "scrape_links"),
|
|
52
|
+
# --- utils ---
|
|
53
|
+
"Settings": ("datasmith.utils", "Settings"),
|
|
54
|
+
"TokenPool": ("datasmith.utils", "TokenPool"),
|
|
55
|
+
"batch_upsert": ("datasmith.utils", "batch_upsert"),
|
|
56
|
+
"fetch_all": ("datasmith.utils", "fetch_all"),
|
|
57
|
+
"get_async_client": ("datasmith.utils", "get_async_client"),
|
|
58
|
+
"get_client": ("datasmith.utils", "get_client"),
|
|
59
|
+
"get_logger": ("datasmith.utils", "get_logger"),
|
|
60
|
+
"stable_hash": ("datasmith.utils", "stable_hash"),
|
|
61
|
+
"supabase_cached": ("datasmith.utils", "supabase_cached"),
|
|
62
|
+
"with_backoff": ("datasmith.utils", "with_backoff"),
|
|
63
|
+
# --- update ---
|
|
64
|
+
"Pipeline": ("datasmith.update", "Pipeline"),
|
|
65
|
+
# --- docker ---
|
|
66
|
+
"DockerContext": ("datasmith.docker", "DockerContext"),
|
|
67
|
+
"DockerHubPublisher": ("datasmith.docker", "DockerHubPublisher"),
|
|
68
|
+
"ImageManager": ("datasmith.docker", "ImageManager"),
|
|
69
|
+
"MultiObjVerifier": ("datasmith.docker", "MultiObjVerifier"),
|
|
70
|
+
"ProfileVerifier": ("datasmith.docker", "ProfileVerifier"),
|
|
71
|
+
"PytestVerifier": ("datasmith.docker", "PytestVerifier"),
|
|
72
|
+
"SmokeVerifier": ("datasmith.docker", "SmokeVerifier"),
|
|
73
|
+
"VerifyResult": ("datasmith.docker", "VerifyResult"),
|
|
74
|
+
# --- agents ---
|
|
75
|
+
"AgentConfig": ("datasmith.agents", "AgentConfig"),
|
|
76
|
+
"ClassificationDecision": ("datasmith.agents", "ClassificationDecision"),
|
|
77
|
+
"ClassifyJudge": ("datasmith.agents", "ClassifyJudge"),
|
|
78
|
+
"CodexResult": ("datasmith.agents", "CodexResult"),
|
|
79
|
+
"OptimizationType": ("datasmith.agents", "OptimizationType"),
|
|
80
|
+
"PerfClassifier": ("datasmith.agents", "PerfClassifier"),
|
|
81
|
+
"ProblemExtraction": ("datasmith.agents", "ProblemExtraction"),
|
|
82
|
+
"ProblemExtractor": ("datasmith.agents", "ProblemExtractor"),
|
|
83
|
+
"SynthesisState": ("datasmith.agents", "SynthesisState"),
|
|
84
|
+
"Synthesizer": ("datasmith.agents", "Synthesizer"),
|
|
85
|
+
"codex_exec": ("datasmith.agents", "codex_exec"),
|
|
86
|
+
"configure_dspy": ("datasmith.agents", "configure_dspy"),
|
|
87
|
+
"ensure_configured": ("datasmith.agents", "ensure_configured"),
|
|
88
|
+
# --- runners ---
|
|
89
|
+
"BaseRunner": ("datasmith.runners", "BaseRunner"),
|
|
90
|
+
"ClassifyPRsRunner": ("datasmith.runners", "ClassifyPRsRunner"),
|
|
91
|
+
"ScrapeCommitsRunner": ("datasmith.runners", "ScrapeCommitsRunner"),
|
|
92
|
+
"ScrapeReposRunner": ("datasmith.runners", "ScrapeReposRunner"),
|
|
93
|
+
"SynthesizeImagesRunner": ("datasmith.runners", "SynthesizeImagesRunner"),
|
|
94
|
+
# --- publish ---
|
|
95
|
+
"HuggingFacePublisher": ("datasmith.publish", "HuggingFacePublisher"),
|
|
96
|
+
"publish_pipeline": ("datasmith.publish", "publish_pipeline"),
|
|
97
|
+
"records_from_parquet": ("datasmith.publish", "records_from_parquet"),
|
|
98
|
+
"records_from_supabase": ("datasmith.publish", "records_from_supabase"),
|
|
99
|
+
"records_to_parquet": ("datasmith.publish", "records_to_parquet"),
|
|
100
|
+
# --- filters ---
|
|
101
|
+
"symbolic_compliance": ("datasmith.filters", "symbolic_compliance"),
|
|
102
|
+
"message_filter": ("datasmith.filters", "message_filter"),
|
|
103
|
+
"has_core_file": ("datasmith.filters", "has_core_file"),
|
|
104
|
+
"check_patch_size": ("datasmith.filters", "check_patch_size"),
|
|
105
|
+
"check_file_compliance": ("datasmith.filters", "check_file_compliance"),
|
|
106
|
+
"estimate_tokens": ("datasmith.filters", "estimate_tokens"),
|
|
107
|
+
# --- preflight ---
|
|
108
|
+
"run_preflight": ("datasmith.preflight", "run_preflight"),
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
__all__ = [
|
|
112
|
+
"__version__",
|
|
113
|
+
"setup_environment",
|
|
114
|
+
*sorted(_SUBMODULES),
|
|
115
|
+
*sorted(_LAZY_IMPORTS),
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def __getattr__(name: str) -> object:
|
|
120
|
+
if name in _SUBMODULES:
|
|
121
|
+
mod = importlib.import_module(f"datasmith.{name}")
|
|
122
|
+
globals()[name] = mod
|
|
123
|
+
return mod
|
|
124
|
+
|
|
125
|
+
if name in _LAZY_IMPORTS:
|
|
126
|
+
module_path, attr_name = _LAZY_IMPORTS[name]
|
|
127
|
+
mod = importlib.import_module(module_path)
|
|
128
|
+
val = getattr(mod, attr_name)
|
|
129
|
+
globals()[name] = val
|
|
130
|
+
return val
|
|
131
|
+
|
|
132
|
+
raise AttributeError(f"module 'datasmith' has no attribute {name!r}")
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def __dir__() -> list[str]:
|
|
136
|
+
return __all__
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# ---------------------------------------------------------------------------
|
|
140
|
+
# Static type-checking imports (never executed at runtime)
|
|
141
|
+
# ---------------------------------------------------------------------------
|
|
142
|
+
if TYPE_CHECKING:
|
|
143
|
+
from datasmith import agents as agents
|
|
144
|
+
from datasmith import docker as docker
|
|
145
|
+
from datasmith import filters as filters
|
|
146
|
+
from datasmith import github as github
|
|
147
|
+
from datasmith import preflight as preflight
|
|
148
|
+
from datasmith import publish as publish
|
|
149
|
+
from datasmith import runners as runners
|
|
150
|
+
from datasmith import update as update
|
|
151
|
+
from datasmith import utils as utils
|
|
152
|
+
from datasmith.agents import (
|
|
153
|
+
AgentConfig as AgentConfig,
|
|
154
|
+
)
|
|
155
|
+
from datasmith.agents import (
|
|
156
|
+
ClassificationDecision as ClassificationDecision,
|
|
157
|
+
)
|
|
158
|
+
from datasmith.agents import (
|
|
159
|
+
ClassifyJudge as ClassifyJudge,
|
|
160
|
+
)
|
|
161
|
+
from datasmith.agents import (
|
|
162
|
+
CodexResult as CodexResult,
|
|
163
|
+
)
|
|
164
|
+
from datasmith.agents import (
|
|
165
|
+
OptimizationType as OptimizationType,
|
|
166
|
+
)
|
|
167
|
+
from datasmith.agents import (
|
|
168
|
+
PerfClassifier as PerfClassifier,
|
|
169
|
+
)
|
|
170
|
+
from datasmith.agents import (
|
|
171
|
+
ProblemExtraction as ProblemExtraction,
|
|
172
|
+
)
|
|
173
|
+
from datasmith.agents import (
|
|
174
|
+
ProblemExtractor as ProblemExtractor,
|
|
175
|
+
)
|
|
176
|
+
from datasmith.agents import (
|
|
177
|
+
SynthesisState as SynthesisState,
|
|
178
|
+
)
|
|
179
|
+
from datasmith.agents import (
|
|
180
|
+
Synthesizer as Synthesizer,
|
|
181
|
+
)
|
|
182
|
+
from datasmith.agents import (
|
|
183
|
+
codex_exec as codex_exec,
|
|
184
|
+
)
|
|
185
|
+
from datasmith.agents import (
|
|
186
|
+
configure_dspy as configure_dspy,
|
|
187
|
+
)
|
|
188
|
+
from datasmith.agents import (
|
|
189
|
+
ensure_configured as ensure_configured,
|
|
190
|
+
)
|
|
191
|
+
from datasmith.docker import (
|
|
192
|
+
DockerContext as DockerContext,
|
|
193
|
+
)
|
|
194
|
+
from datasmith.docker import (
|
|
195
|
+
DockerHubPublisher as DockerHubPublisher,
|
|
196
|
+
)
|
|
197
|
+
from datasmith.docker import (
|
|
198
|
+
ImageManager as ImageManager,
|
|
199
|
+
)
|
|
200
|
+
from datasmith.docker import (
|
|
201
|
+
MultiObjVerifier as MultiObjVerifier,
|
|
202
|
+
)
|
|
203
|
+
from datasmith.docker import (
|
|
204
|
+
ProfileVerifier as ProfileVerifier,
|
|
205
|
+
)
|
|
206
|
+
from datasmith.docker import (
|
|
207
|
+
PytestVerifier as PytestVerifier,
|
|
208
|
+
)
|
|
209
|
+
from datasmith.docker import (
|
|
210
|
+
SmokeVerifier as SmokeVerifier,
|
|
211
|
+
)
|
|
212
|
+
from datasmith.docker import (
|
|
213
|
+
VerifyResult as VerifyResult,
|
|
214
|
+
)
|
|
215
|
+
from datasmith.filters import (
|
|
216
|
+
check_file_compliance as check_file_compliance,
|
|
217
|
+
)
|
|
218
|
+
from datasmith.filters import (
|
|
219
|
+
check_patch_size as check_patch_size,
|
|
220
|
+
)
|
|
221
|
+
from datasmith.filters import (
|
|
222
|
+
estimate_tokens as estimate_tokens,
|
|
223
|
+
)
|
|
224
|
+
from datasmith.filters import (
|
|
225
|
+
has_core_file as has_core_file,
|
|
226
|
+
)
|
|
227
|
+
from datasmith.filters import (
|
|
228
|
+
message_filter as message_filter,
|
|
229
|
+
)
|
|
230
|
+
from datasmith.filters import (
|
|
231
|
+
symbolic_compliance as symbolic_compliance,
|
|
232
|
+
)
|
|
233
|
+
from datasmith.github import (
|
|
234
|
+
PR as PR,
|
|
235
|
+
)
|
|
236
|
+
from datasmith.github import (
|
|
237
|
+
Anonymizer as Anonymizer,
|
|
238
|
+
)
|
|
239
|
+
from datasmith.github import (
|
|
240
|
+
FormulaCodeRecord as FormulaCodeRecord,
|
|
241
|
+
)
|
|
242
|
+
from datasmith.github import (
|
|
243
|
+
GitHubClient as GitHubClient,
|
|
244
|
+
)
|
|
245
|
+
from datasmith.github import (
|
|
246
|
+
HookRegistry as HookRegistry,
|
|
247
|
+
)
|
|
248
|
+
from datasmith.github import (
|
|
249
|
+
Issue as Issue,
|
|
250
|
+
)
|
|
251
|
+
from datasmith.github import (
|
|
252
|
+
IssueExpanded as IssueExpanded,
|
|
253
|
+
)
|
|
254
|
+
from datasmith.github import (
|
|
255
|
+
PRChangeSummary as PRChangeSummary,
|
|
256
|
+
)
|
|
257
|
+
from datasmith.github import (
|
|
258
|
+
PRFileChange as PRFileChange,
|
|
259
|
+
)
|
|
260
|
+
from datasmith.github import (
|
|
261
|
+
extract_references as extract_references,
|
|
262
|
+
)
|
|
263
|
+
from datasmith.github import (
|
|
264
|
+
render_problem_statement as render_problem_statement,
|
|
265
|
+
)
|
|
266
|
+
from datasmith.github import (
|
|
267
|
+
scrape_links as scrape_links,
|
|
268
|
+
)
|
|
269
|
+
from datasmith.preflight import run_preflight as run_preflight
|
|
270
|
+
from datasmith.publish import (
|
|
271
|
+
HuggingFacePublisher as HuggingFacePublisher,
|
|
272
|
+
)
|
|
273
|
+
from datasmith.publish import (
|
|
274
|
+
publish_pipeline as publish_pipeline,
|
|
275
|
+
)
|
|
276
|
+
from datasmith.publish import (
|
|
277
|
+
records_from_parquet as records_from_parquet,
|
|
278
|
+
)
|
|
279
|
+
from datasmith.publish import (
|
|
280
|
+
records_from_supabase as records_from_supabase,
|
|
281
|
+
)
|
|
282
|
+
from datasmith.publish import (
|
|
283
|
+
records_to_parquet as records_to_parquet,
|
|
284
|
+
)
|
|
285
|
+
from datasmith.runners import (
|
|
286
|
+
BaseRunner as BaseRunner,
|
|
287
|
+
)
|
|
288
|
+
from datasmith.runners import (
|
|
289
|
+
ClassifyPRsRunner as ClassifyPRsRunner,
|
|
290
|
+
)
|
|
291
|
+
from datasmith.runners import (
|
|
292
|
+
ScrapeCommitsRunner as ScrapeCommitsRunner,
|
|
293
|
+
)
|
|
294
|
+
from datasmith.runners import (
|
|
295
|
+
ScrapeReposRunner as ScrapeReposRunner,
|
|
296
|
+
)
|
|
297
|
+
from datasmith.runners import (
|
|
298
|
+
SynthesizeImagesRunner as SynthesizeImagesRunner,
|
|
299
|
+
)
|
|
300
|
+
from datasmith.update import Pipeline as Pipeline
|
|
301
|
+
from datasmith.utils import (
|
|
302
|
+
Settings as Settings,
|
|
303
|
+
)
|
|
304
|
+
from datasmith.utils import (
|
|
305
|
+
TokenPool as TokenPool,
|
|
306
|
+
)
|
|
307
|
+
from datasmith.utils import (
|
|
308
|
+
batch_upsert as batch_upsert,
|
|
309
|
+
)
|
|
310
|
+
from datasmith.utils import (
|
|
311
|
+
fetch_all as fetch_all,
|
|
312
|
+
)
|
|
313
|
+
from datasmith.utils import (
|
|
314
|
+
get_async_client as get_async_client,
|
|
315
|
+
)
|
|
316
|
+
from datasmith.utils import (
|
|
317
|
+
get_client as get_client,
|
|
318
|
+
)
|
|
319
|
+
from datasmith.utils import (
|
|
320
|
+
get_logger as get_logger,
|
|
321
|
+
)
|
|
322
|
+
from datasmith.utils import (
|
|
323
|
+
stable_hash as stable_hash,
|
|
324
|
+
)
|
|
325
|
+
from datasmith.utils import (
|
|
326
|
+
supabase_cached as supabase_cached,
|
|
327
|
+
)
|
|
328
|
+
from datasmith.utils import (
|
|
329
|
+
with_backoff as with_backoff,
|
|
330
|
+
)
|
datasmith/__init__.pyi
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""Type stub for datasmith — keeps mypy happy with PEP 562 lazy loading."""
|
|
2
|
+
|
|
3
|
+
from datasmith import agents as agents
|
|
4
|
+
from datasmith import docker as docker
|
|
5
|
+
from datasmith import filters as filters
|
|
6
|
+
from datasmith import github as github
|
|
7
|
+
from datasmith import preflight as preflight
|
|
8
|
+
from datasmith import publish as publish
|
|
9
|
+
from datasmith import runners as runners
|
|
10
|
+
from datasmith import update as update
|
|
11
|
+
from datasmith import utils as utils
|
|
12
|
+
from datasmith.agents import (
|
|
13
|
+
AgentConfig as AgentConfig,
|
|
14
|
+
)
|
|
15
|
+
from datasmith.agents import (
|
|
16
|
+
ClassificationDecision as ClassificationDecision,
|
|
17
|
+
)
|
|
18
|
+
from datasmith.agents import (
|
|
19
|
+
ClassifyJudge as ClassifyJudge,
|
|
20
|
+
)
|
|
21
|
+
from datasmith.agents import (
|
|
22
|
+
CodexResult as CodexResult,
|
|
23
|
+
)
|
|
24
|
+
from datasmith.agents import (
|
|
25
|
+
OptimizationType as OptimizationType,
|
|
26
|
+
)
|
|
27
|
+
from datasmith.agents import (
|
|
28
|
+
PerfClassifier as PerfClassifier,
|
|
29
|
+
)
|
|
30
|
+
from datasmith.agents import (
|
|
31
|
+
ProblemExtraction as ProblemExtraction,
|
|
32
|
+
)
|
|
33
|
+
from datasmith.agents import (
|
|
34
|
+
ProblemExtractor as ProblemExtractor,
|
|
35
|
+
)
|
|
36
|
+
from datasmith.agents import (
|
|
37
|
+
SynthesisState as SynthesisState,
|
|
38
|
+
)
|
|
39
|
+
from datasmith.agents import (
|
|
40
|
+
Synthesizer as Synthesizer,
|
|
41
|
+
)
|
|
42
|
+
from datasmith.agents import (
|
|
43
|
+
codex_exec as codex_exec,
|
|
44
|
+
)
|
|
45
|
+
from datasmith.agents import (
|
|
46
|
+
configure_dspy as configure_dspy,
|
|
47
|
+
)
|
|
48
|
+
from datasmith.agents import (
|
|
49
|
+
ensure_configured as ensure_configured,
|
|
50
|
+
)
|
|
51
|
+
from datasmith.docker import (
|
|
52
|
+
DockerContext as DockerContext,
|
|
53
|
+
)
|
|
54
|
+
from datasmith.docker import (
|
|
55
|
+
DockerHubPublisher as DockerHubPublisher,
|
|
56
|
+
)
|
|
57
|
+
from datasmith.docker import (
|
|
58
|
+
ImageManager as ImageManager,
|
|
59
|
+
)
|
|
60
|
+
from datasmith.docker import (
|
|
61
|
+
MultiObjVerifier as MultiObjVerifier,
|
|
62
|
+
)
|
|
63
|
+
from datasmith.docker import (
|
|
64
|
+
ProfileVerifier as ProfileVerifier,
|
|
65
|
+
)
|
|
66
|
+
from datasmith.docker import (
|
|
67
|
+
PytestVerifier as PytestVerifier,
|
|
68
|
+
)
|
|
69
|
+
from datasmith.docker import (
|
|
70
|
+
SmokeVerifier as SmokeVerifier,
|
|
71
|
+
)
|
|
72
|
+
from datasmith.docker import (
|
|
73
|
+
VerifyResult as VerifyResult,
|
|
74
|
+
)
|
|
75
|
+
from datasmith.filters import (
|
|
76
|
+
check_file_compliance as check_file_compliance,
|
|
77
|
+
)
|
|
78
|
+
from datasmith.filters import (
|
|
79
|
+
check_patch_size as check_patch_size,
|
|
80
|
+
)
|
|
81
|
+
from datasmith.filters import (
|
|
82
|
+
estimate_tokens as estimate_tokens,
|
|
83
|
+
)
|
|
84
|
+
from datasmith.filters import (
|
|
85
|
+
has_core_file as has_core_file,
|
|
86
|
+
)
|
|
87
|
+
from datasmith.filters import (
|
|
88
|
+
message_filter as message_filter,
|
|
89
|
+
)
|
|
90
|
+
from datasmith.filters import (
|
|
91
|
+
symbolic_compliance as symbolic_compliance,
|
|
92
|
+
)
|
|
93
|
+
from datasmith.github import (
|
|
94
|
+
PR as PR,
|
|
95
|
+
)
|
|
96
|
+
from datasmith.github import (
|
|
97
|
+
Anonymizer as Anonymizer,
|
|
98
|
+
)
|
|
99
|
+
from datasmith.github import (
|
|
100
|
+
FormulaCodeRecord as FormulaCodeRecord,
|
|
101
|
+
)
|
|
102
|
+
from datasmith.github import (
|
|
103
|
+
GitHubClient as GitHubClient,
|
|
104
|
+
)
|
|
105
|
+
from datasmith.github import (
|
|
106
|
+
HookRegistry as HookRegistry,
|
|
107
|
+
)
|
|
108
|
+
from datasmith.github import (
|
|
109
|
+
Issue as Issue,
|
|
110
|
+
)
|
|
111
|
+
from datasmith.github import (
|
|
112
|
+
IssueExpanded as IssueExpanded,
|
|
113
|
+
)
|
|
114
|
+
from datasmith.github import (
|
|
115
|
+
PRChangeSummary as PRChangeSummary,
|
|
116
|
+
)
|
|
117
|
+
from datasmith.github import (
|
|
118
|
+
PRFileChange as PRFileChange,
|
|
119
|
+
)
|
|
120
|
+
from datasmith.github import (
|
|
121
|
+
extract_references as extract_references,
|
|
122
|
+
)
|
|
123
|
+
from datasmith.github import (
|
|
124
|
+
render_problem_statement as render_problem_statement,
|
|
125
|
+
)
|
|
126
|
+
from datasmith.github import (
|
|
127
|
+
scrape_links as scrape_links,
|
|
128
|
+
)
|
|
129
|
+
from datasmith.preflight import run_preflight as run_preflight
|
|
130
|
+
from datasmith.publish import (
|
|
131
|
+
HuggingFacePublisher as HuggingFacePublisher,
|
|
132
|
+
)
|
|
133
|
+
from datasmith.publish import (
|
|
134
|
+
publish_pipeline as publish_pipeline,
|
|
135
|
+
)
|
|
136
|
+
from datasmith.publish import (
|
|
137
|
+
records_from_parquet as records_from_parquet,
|
|
138
|
+
)
|
|
139
|
+
from datasmith.publish import (
|
|
140
|
+
records_from_supabase as records_from_supabase,
|
|
141
|
+
)
|
|
142
|
+
from datasmith.publish import (
|
|
143
|
+
records_to_parquet as records_to_parquet,
|
|
144
|
+
)
|
|
145
|
+
from datasmith.runners import (
|
|
146
|
+
BaseRunner as BaseRunner,
|
|
147
|
+
)
|
|
148
|
+
from datasmith.runners import (
|
|
149
|
+
ClassifyPRsRunner as ClassifyPRsRunner,
|
|
150
|
+
)
|
|
151
|
+
from datasmith.runners import (
|
|
152
|
+
ScrapeCommitsRunner as ScrapeCommitsRunner,
|
|
153
|
+
)
|
|
154
|
+
from datasmith.runners import (
|
|
155
|
+
ScrapeReposRunner as ScrapeReposRunner,
|
|
156
|
+
)
|
|
157
|
+
from datasmith.runners import (
|
|
158
|
+
SynthesizeImagesRunner as SynthesizeImagesRunner,
|
|
159
|
+
)
|
|
160
|
+
from datasmith.update import Pipeline as Pipeline
|
|
161
|
+
from datasmith.utils import (
|
|
162
|
+
Settings as Settings,
|
|
163
|
+
)
|
|
164
|
+
from datasmith.utils import (
|
|
165
|
+
TokenPool as TokenPool,
|
|
166
|
+
)
|
|
167
|
+
from datasmith.utils import (
|
|
168
|
+
batch_upsert as batch_upsert,
|
|
169
|
+
)
|
|
170
|
+
from datasmith.utils import (
|
|
171
|
+
fetch_all as fetch_all,
|
|
172
|
+
)
|
|
173
|
+
from datasmith.utils import (
|
|
174
|
+
get_async_client as get_async_client,
|
|
175
|
+
)
|
|
176
|
+
from datasmith.utils import (
|
|
177
|
+
get_client as get_client,
|
|
178
|
+
)
|
|
179
|
+
from datasmith.utils import (
|
|
180
|
+
get_logger as get_logger,
|
|
181
|
+
)
|
|
182
|
+
from datasmith.utils import (
|
|
183
|
+
stable_hash as stable_hash,
|
|
184
|
+
)
|
|
185
|
+
from datasmith.utils import (
|
|
186
|
+
supabase_cached as supabase_cached,
|
|
187
|
+
)
|
|
188
|
+
from datasmith.utils import (
|
|
189
|
+
with_backoff as with_backoff,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
__version__: str
|
|
193
|
+
|
|
194
|
+
def setup_environment() -> None: ...
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""ds.agents — LLM agents for classification, extraction, synthesis."""
|
|
2
|
+
|
|
3
|
+
from datasmith.agents.classifiers import ClassificationDecision, ClassifyJudge, OptimizationType, PerfClassifier
|
|
4
|
+
from datasmith.agents.codex import CodexResult, codex_exec
|
|
5
|
+
from datasmith.agents.config import AgentConfig, configure_dspy, ensure_configured
|
|
6
|
+
from datasmith.agents.extractors import ProblemExtraction, ProblemExtractor
|
|
7
|
+
from datasmith.agents.installed import AgentResult, InstalledAgent, get_agent
|
|
8
|
+
from datasmith.agents.sandbox import SandboxConfig, SandboxResult, SandboxRunner
|
|
9
|
+
from datasmith.agents.synthesizer import SynthesisState, Synthesizer
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"AgentConfig",
|
|
13
|
+
"AgentResult",
|
|
14
|
+
"ClassificationDecision",
|
|
15
|
+
"ClassifyJudge",
|
|
16
|
+
"CodexResult",
|
|
17
|
+
"InstalledAgent",
|
|
18
|
+
"OptimizationType",
|
|
19
|
+
"PerfClassifier",
|
|
20
|
+
"ProblemExtraction",
|
|
21
|
+
"ProblemExtractor",
|
|
22
|
+
"SandboxConfig",
|
|
23
|
+
"SandboxResult",
|
|
24
|
+
"SandboxRunner",
|
|
25
|
+
"SynthesisState",
|
|
26
|
+
"Synthesizer",
|
|
27
|
+
"codex_exec",
|
|
28
|
+
"configure_dspy",
|
|
29
|
+
"ensure_configured",
|
|
30
|
+
"get_agent",
|
|
31
|
+
]
|