fc-data 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datasmith/__init__.py +330 -0
- datasmith/__init__.pyi +194 -0
- datasmith/agents/__init__.py +31 -0
- datasmith/agents/classifiers.py +272 -0
- datasmith/agents/codex.py +25 -0
- datasmith/agents/config.py +108 -0
- datasmith/agents/extractors.py +197 -0
- datasmith/agents/installed/README.md +52 -0
- datasmith/agents/installed/__init__.py +22 -0
- datasmith/agents/installed/base.py +240 -0
- datasmith/agents/installed/claude.py +134 -0
- datasmith/agents/installed/codex.py +91 -0
- datasmith/agents/installed/gemini.py +118 -0
- datasmith/agents/installed/none.py +27 -0
- datasmith/agents/sandbox.py +547 -0
- datasmith/agents/synthesizer.py +439 -0
- datasmith/agents/templates/AGENTS.md.j2 +150 -0
- datasmith/agents/templates/sandbox_verify.py +428 -0
- datasmith/docker/__init__.py +31 -0
- datasmith/docker/context.py +112 -0
- datasmith/docker/images.py +158 -0
- datasmith/docker/publish.py +56 -0
- datasmith/docker/templates/Dockerfile.base +26 -0
- datasmith/docker/templates/Dockerfile.pr +42 -0
- datasmith/docker/templates/Dockerfile.repo +11 -0
- datasmith/docker/templates/docker_build_base.sh +780 -0
- datasmith/docker/templates/docker_build_env.sh +309 -0
- datasmith/docker/templates/docker_build_final.sh +106 -0
- datasmith/docker/templates/docker_build_pkg.sh +99 -0
- datasmith/docker/templates/docker_build_run.sh +124 -0
- datasmith/docker/templates/entrypoint.sh +62 -0
- datasmith/docker/templates/parser.py +1405 -0
- datasmith/docker/templates/profile.sh +199 -0
- datasmith/docker/templates/pytest_runner.py +692 -0
- datasmith/docker/templates/run-tests.sh +197 -0
- datasmith/docker/verifiers.py +131 -0
- datasmith/filters.py +154 -0
- datasmith/github/__init__.py +22 -0
- datasmith/github/client.py +333 -0
- datasmith/github/hooks.py +50 -0
- datasmith/github/links.py +110 -0
- datasmith/github/models.py +206 -0
- datasmith/github/render.py +173 -0
- datasmith/github/search.py +66 -0
- datasmith/github/templates/comment.md.j2 +5 -0
- datasmith/github/templates/final.md.j2 +66 -0
- datasmith/github/templates/issues.md.j2 +21 -0
- datasmith/github/templates/repo.md.j2 +1 -0
- datasmith/preflight.py +162 -0
- datasmith/publish/__init__.py +13 -0
- datasmith/publish/huggingface.py +104 -0
- datasmith/publish/pipeline.py +60 -0
- datasmith/publish/records.py +91 -0
- datasmith/py.typed +1 -0
- datasmith/resolution/__init__.py +14 -0
- datasmith/resolution/blocklist.py +145 -0
- datasmith/resolution/cache.py +120 -0
- datasmith/resolution/constants.py +277 -0
- datasmith/resolution/dependency_resolver.py +174 -0
- datasmith/resolution/git_utils.py +378 -0
- datasmith/resolution/import_analyzer.py +66 -0
- datasmith/resolution/metadata_parser.py +412 -0
- datasmith/resolution/models.py +41 -0
- datasmith/resolution/orchestrator.py +522 -0
- datasmith/resolution/package_filters.py +312 -0
- datasmith/resolution/python_manager.py +110 -0
- datasmith/runners/__init__.py +15 -0
- datasmith/runners/base.py +112 -0
- datasmith/runners/classify_prs.py +48 -0
- datasmith/runners/render_problems.py +113 -0
- datasmith/runners/resolve_packages.py +66 -0
- datasmith/runners/scrape_commits.py +166 -0
- datasmith/runners/scrape_repos.py +44 -0
- datasmith/runners/synthesize_images.py +310 -0
- datasmith/update/__init__.py +5 -0
- datasmith/update/cli.py +169 -0
- datasmith/update/offline.py +173 -0
- datasmith/update/pipeline.py +497 -0
- datasmith/utils/__init__.py +18 -0
- datasmith/utils/core.py +67 -0
- datasmith/utils/db.py +156 -0
- datasmith/utils/tokens.py +65 -0
- fc_data-0.2.0.dist-info/METADATA +441 -0
- fc_data-0.2.0.dist-info/RECORD +87 -0
- fc_data-0.2.0.dist-info/WHEEL +4 -0
- fc_data-0.2.0.dist-info/entry_points.txt +2 -0
- fc_data-0.2.0.dist-info/licenses/LICENSE +28 -0
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import enum
|
|
4
|
+
import os
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from datasmith.utils import get_logger
|
|
9
|
+
|
|
10
|
+
logger = get_logger("agents.classifiers")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class OptimizationType(str, enum.Enum):
|
|
14
|
+
USE_BETTER_ALGORITHM = "use_better_algorithm"
|
|
15
|
+
USE_BETTER_DATA_STRUCTURE_AND_LAYOUT = "use_better_data_structure_and_layout"
|
|
16
|
+
USE_LOWER_LEVEL_SYSTEM = "use_lower_level_system"
|
|
17
|
+
ACCEPT_LESS_PRECISE_SOLUTION = "accept_less_precise_solution"
|
|
18
|
+
USE_PARALLELIZATION = "use_parallelization"
|
|
19
|
+
REMOVE_OR_REDUCE_WORK = "remove_or_reduce_work"
|
|
20
|
+
CACHE_AND_REUSE = "cache_and_reuse"
|
|
21
|
+
DO_IT_EARLIER_BATCH_THROTTLE = "do_it_earlier_batch_throttle"
|
|
22
|
+
SCALE_PLATFORM = "scale_platform"
|
|
23
|
+
DATABASE_AND_STORAGE_TUNING = "database_and_storage_tuning"
|
|
24
|
+
MICRO_OPTIMIZATIONS = "micro_optimizations"
|
|
25
|
+
IO_AND_LATENCY_HIDING = "io_and_latency_hiding"
|
|
26
|
+
USE_HIGHER_LEVEL_SYSTEM = "use_higher_level_system"
|
|
27
|
+
UNCATEGORIZED = "uncategorized"
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def description(self) -> str:
|
|
31
|
+
return _OPTIMIZATION_DESCRIPTIONS.get(self.value, "")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
_OPTIMIZATION_DESCRIPTIONS: dict[str, str] = {
|
|
35
|
+
"use_better_algorithm": (
|
|
36
|
+
"Complexity reduction or switching to a faster algorithm "
|
|
37
|
+
"(e.g. O(n^2) -> O(n log n), better sorting, smarter search)."
|
|
38
|
+
),
|
|
39
|
+
"use_better_data_structure_and_layout": (
|
|
40
|
+
"Switching to a more efficient data structure or improving memory layout "
|
|
41
|
+
"(e.g. list -> set/dict for lookups, struct-of-arrays, contiguous buffers)."
|
|
42
|
+
),
|
|
43
|
+
"use_lower_level_system": (
|
|
44
|
+
"Offloading work to C/Cython/Rust/Fortran extensions, NumPy vectorized ops, "
|
|
45
|
+
"or native SIMD intrinsics instead of pure Python."
|
|
46
|
+
),
|
|
47
|
+
"accept_less_precise_solution": (
|
|
48
|
+
"Trading accuracy for speed via approximations, heuristics, sampling, "
|
|
49
|
+
"or reduced precision (e.g. float32 instead of float64)."
|
|
50
|
+
),
|
|
51
|
+
"use_parallelization": (
|
|
52
|
+
"Using threads, multiprocessing, GPU kernels, or parallel algorithms "
|
|
53
|
+
"to split work across cores (not just async I/O)."
|
|
54
|
+
),
|
|
55
|
+
"remove_or_reduce_work": (
|
|
56
|
+
"Eliminating unnecessary computation, short-circuiting, early exits, "
|
|
57
|
+
"skipping redundant steps, or simplifying requirements."
|
|
58
|
+
),
|
|
59
|
+
"cache_and_reuse": (
|
|
60
|
+
"Memoization, LRU caches, materialized views, precomputed lookup tables, "
|
|
61
|
+
"or reusing expensive results across calls."
|
|
62
|
+
),
|
|
63
|
+
"do_it_earlier_batch_throttle": (
|
|
64
|
+
"Batching small operations, lazy evaluation, deferred computation, "
|
|
65
|
+
"throttling, or moving work to an earlier/better time."
|
|
66
|
+
),
|
|
67
|
+
"scale_platform": (
|
|
68
|
+
"Horizontal/vertical scaling, load balancing, sharding, or infrastructure-level capacity changes."
|
|
69
|
+
),
|
|
70
|
+
"database_and_storage_tuning": (
|
|
71
|
+
"Adding indices, optimizing queries, denormalization, partitioning, "
|
|
72
|
+
"connection pooling, or storage engine configuration."
|
|
73
|
+
),
|
|
74
|
+
"micro_optimizations": (
|
|
75
|
+
"Hot-path tweaks: inlining, branch reordering, avoiding temporary objects, "
|
|
76
|
+
"strength reduction, guard clauses, or tight-loop tuning."
|
|
77
|
+
),
|
|
78
|
+
"io_and_latency_hiding": (
|
|
79
|
+
"Async/non-blocking I/O, overlapping I/O with compute, prefetching, pipelining, or reducing round-trip latency."
|
|
80
|
+
),
|
|
81
|
+
"use_higher_level_system": (
|
|
82
|
+
"Replacing hand-rolled logic with an optimized library or framework "
|
|
83
|
+
"(e.g. pandas, polars, scipy, BLAS) that handles performance internally."
|
|
84
|
+
),
|
|
85
|
+
"uncategorized": ("Performance-related change that does not clearly fit any of the above categories."),
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class DifficultyLevel(str, enum.Enum):
|
|
90
|
+
EASY = "easy"
|
|
91
|
+
MEDIUM = "medium"
|
|
92
|
+
HARD = "hard"
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@dataclass
|
|
96
|
+
class ClassificationDecision:
|
|
97
|
+
reason: str = ""
|
|
98
|
+
category: str = ""
|
|
99
|
+
difficulty: str = ""
|
|
100
|
+
confidence: int = 0
|
|
101
|
+
|
|
102
|
+
def __post_init__(self) -> None:
|
|
103
|
+
self.confidence = max(0, min(100, self.confidence))
|
|
104
|
+
|
|
105
|
+
@classmethod
|
|
106
|
+
def from_prediction(cls, prediction: Any) -> ClassificationDecision:
|
|
107
|
+
"""Create a decision object from a DSPy prediction response."""
|
|
108
|
+
reasoning = getattr(prediction, "reasoning", "") or ""
|
|
109
|
+
category = getattr(prediction, "category", "") or ""
|
|
110
|
+
difficulty = getattr(prediction, "difficulty", "") or ""
|
|
111
|
+
raw_confidence = getattr(prediction, "confidence", None)
|
|
112
|
+
|
|
113
|
+
confidence: int
|
|
114
|
+
if isinstance(raw_confidence, int):
|
|
115
|
+
confidence = raw_confidence
|
|
116
|
+
else:
|
|
117
|
+
try:
|
|
118
|
+
confidence = int(str(raw_confidence).strip()) if raw_confidence is not None else 0
|
|
119
|
+
except (TypeError, ValueError):
|
|
120
|
+
confidence = 0
|
|
121
|
+
|
|
122
|
+
return cls(
|
|
123
|
+
reason=str(reasoning).strip(),
|
|
124
|
+
category=str(category).strip(),
|
|
125
|
+
difficulty=str(difficulty).strip(),
|
|
126
|
+
confidence=confidence,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class PerfClassifier:
|
|
131
|
+
"""Binary classifier: is this PR a performance improvement?"""
|
|
132
|
+
|
|
133
|
+
def __init__(self) -> None:
|
|
134
|
+
self._predictor: Any | None = None
|
|
135
|
+
|
|
136
|
+
def _get_predictor(self) -> Any:
|
|
137
|
+
if self._predictor is None:
|
|
138
|
+
from datasmith.agents.config import ensure_configured
|
|
139
|
+
|
|
140
|
+
ensure_configured()
|
|
141
|
+
import dspy
|
|
142
|
+
|
|
143
|
+
class JudgeSignature(dspy.Signature):
|
|
144
|
+
"""Decide if this commit's PRIMARY intent is to improve product/runtime performance.
|
|
145
|
+
|
|
146
|
+
Label YES only when there is CLEAR, EXPLICIT evidence in the description and/or patch that the
|
|
147
|
+
runtime gets faster (e.g., algorithm change, fewer allocations, caching, vectorization, reduced I/O,
|
|
148
|
+
async/non-blocking for throughput, latency reduction, memory footprint reduction, fix a speed regression).
|
|
149
|
+
|
|
150
|
+
Strong positive signals (weigh these collectively):
|
|
151
|
+
- PR title/body contains performance intent (e.g., "PERF:", "speed up", "faster", "performance").
|
|
152
|
+
- Linked issues/comments include benchmark links or timings demonstrating impact.
|
|
153
|
+
- Low-level/hot-path tweaks (e.g., reuse global context, avoid per-call init/teardown, vectorize C/NumPy).
|
|
154
|
+
|
|
155
|
+
Hard NO (non-performance) examples: tests/ASV/harness-only changes; CI/workflows/build/packaging; coverage;
|
|
156
|
+
pre-commit/format/lints (clippy/ruff/black); docs; version bumps; terminology/renames; pure refactors without
|
|
157
|
+
performance claims; changes aimed at making perf tests pass but not improving runtime.
|
|
158
|
+
|
|
159
|
+
If ambiguous, weigh the concrete code changes and problem description together. When there are
|
|
160
|
+
specific performance cues (title keywords, measured timings, fewer allocations, vectorization,
|
|
161
|
+
caching/reuse) lean YES; otherwise NO.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
problem_description: str = dspy.InputField(desc="Problem statement and technical context from PR/issue")
|
|
165
|
+
github_patch: str = dspy.InputField(desc="Git diff showing actual code changes")
|
|
166
|
+
file_change_summary: str = dspy.InputField(
|
|
167
|
+
desc="A markdown table summarizing all the files changed in the commit along with lines added/removed.",
|
|
168
|
+
default="",
|
|
169
|
+
)
|
|
170
|
+
reasoning: str = dspy.OutputField(desc="Deductive reasoning steps leading to the classification.")
|
|
171
|
+
label: str = dspy.OutputField(desc='Final label: "YES" for performance-related, "NO" otherwise.')
|
|
172
|
+
|
|
173
|
+
self._predictor = dspy.Predict(JudgeSignature)
|
|
174
|
+
return self._predictor
|
|
175
|
+
|
|
176
|
+
def classify(
|
|
177
|
+
self, problem_description: str, github_patch: str = "", file_change_summary: str = ""
|
|
178
|
+
) -> tuple[bool, str]:
|
|
179
|
+
try:
|
|
180
|
+
predictor = self._get_predictor()
|
|
181
|
+
result = predictor(
|
|
182
|
+
problem_description=problem_description,
|
|
183
|
+
github_patch=github_patch,
|
|
184
|
+
file_change_summary=file_change_summary,
|
|
185
|
+
)
|
|
186
|
+
label = str(getattr(result, "label", "NO"))
|
|
187
|
+
is_perf = label.strip().upper().startswith("YES")
|
|
188
|
+
reasoning = str(getattr(result, "reasoning", ""))
|
|
189
|
+
except Exception:
|
|
190
|
+
logger.exception("PerfClassifier failed")
|
|
191
|
+
return False, "Classification failed"
|
|
192
|
+
else:
|
|
193
|
+
return is_perf, reasoning
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
class ClassifyJudge:
|
|
197
|
+
"""Classify optimization type and difficulty."""
|
|
198
|
+
|
|
199
|
+
def __init__(self, max_tokens: int | None = None) -> None:
|
|
200
|
+
self._max_tokens = max_tokens or int(os.getenv("DSPY_MAX_TOKENS", "16000"))
|
|
201
|
+
self._predictor: Any | None = None
|
|
202
|
+
|
|
203
|
+
def _get_predictor(self) -> Any:
|
|
204
|
+
if self._predictor is None:
|
|
205
|
+
from datasmith.agents.config import ensure_configured
|
|
206
|
+
|
|
207
|
+
ensure_configured()
|
|
208
|
+
import dspy
|
|
209
|
+
|
|
210
|
+
cat_lines = "\n".join(f"- {t.value}: {t.description}" for t in OptimizationType)
|
|
211
|
+
cat_values = ", ".join(t.value for t in OptimizationType)
|
|
212
|
+
|
|
213
|
+
class ClassifySignature(dspy.Signature):
|
|
214
|
+
"""Decide the PRIMARY performance optimization technique and difficulty level."""
|
|
215
|
+
|
|
216
|
+
problem_description: str = dspy.InputField(desc="Problem statement and technical context from PR/issue")
|
|
217
|
+
github_patch: str = dspy.InputField(desc="Git patch showing code changes")
|
|
218
|
+
category: str = dspy.OutputField(desc=f"One of: {cat_values}")
|
|
219
|
+
difficulty: str = dspy.OutputField(desc="One of: easy, medium, hard")
|
|
220
|
+
reasoning: str = dspy.OutputField(desc="Brief explanation of the classification")
|
|
221
|
+
|
|
222
|
+
ClassifySignature.__doc__ = (
|
|
223
|
+
"Decide the PRIMARY performance optimization technique and difficulty level.\n\n"
|
|
224
|
+
f"Category mapping (pick the single best match):\n{cat_lines}\n\n"
|
|
225
|
+
"Difficulty levels:\n"
|
|
226
|
+
"- easy: localized change (<50 lines), minimal risk\n"
|
|
227
|
+
"- medium: module-level refactor, data structure changes\n"
|
|
228
|
+
"- hard: algorithm rewrite or architectural change"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
self._predictor = dspy.Predict(ClassifySignature)
|
|
232
|
+
return self._predictor
|
|
233
|
+
|
|
234
|
+
def truncate_patch(self, patch: str) -> str:
|
|
235
|
+
try:
|
|
236
|
+
import tiktoken
|
|
237
|
+
|
|
238
|
+
enc = tiktoken.get_encoding("cl100k_base")
|
|
239
|
+
tokens = enc.encode(patch)
|
|
240
|
+
if len(tokens) > self._max_tokens:
|
|
241
|
+
tokens = tokens[: self._max_tokens - 10]
|
|
242
|
+
truncated = enc.decode(tokens)
|
|
243
|
+
return truncated + "\n\n// [TRUNCATED DUE TO LENGTH]"
|
|
244
|
+
except Exception: # noqa: S110
|
|
245
|
+
pass # tiktoken not available, return untruncated
|
|
246
|
+
return patch
|
|
247
|
+
|
|
248
|
+
def classify(self, problem_description: str, github_patch: str = "") -> ClassificationDecision:
|
|
249
|
+
github_patch = self.truncate_patch(github_patch)
|
|
250
|
+
try:
|
|
251
|
+
predictor = self._get_predictor()
|
|
252
|
+
result = predictor(problem_description=problem_description, github_patch=github_patch)
|
|
253
|
+
|
|
254
|
+
cat = str(getattr(result, "category", "")).strip().lower()
|
|
255
|
+
valid_cats = {t.value for t in OptimizationType}
|
|
256
|
+
if cat not in valid_cats:
|
|
257
|
+
cat = "uncategorized"
|
|
258
|
+
|
|
259
|
+
diff = str(getattr(result, "difficulty", "")).strip().lower()
|
|
260
|
+
if diff not in ("easy", "medium", "hard"):
|
|
261
|
+
diff = "medium"
|
|
262
|
+
|
|
263
|
+
return ClassificationDecision(
|
|
264
|
+
reason=str(getattr(result, "reasoning", "")),
|
|
265
|
+
category=cat,
|
|
266
|
+
difficulty=diff,
|
|
267
|
+
)
|
|
268
|
+
except Exception:
|
|
269
|
+
logger.exception("ClassifyJudge failed")
|
|
270
|
+
return ClassificationDecision(
|
|
271
|
+
reason="Classification failed", category="uncategorized", difficulty="medium", confidence=0
|
|
272
|
+
)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Backward-compatibility shim — real logic lives in agents.installed.codex."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datasmith.agents.installed import AgentResult as CodexResult
|
|
6
|
+
from datasmith.agents.installed import CodexAgent
|
|
7
|
+
from datasmith.agents.installed.codex import _parse_codex_stdout
|
|
8
|
+
|
|
9
|
+
__all__ = ["CodexResult", "_parse_codex_stdout", "codex_exec"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def codex_exec(
|
|
13
|
+
prompt: str,
|
|
14
|
+
timeout: int = 900,
|
|
15
|
+
workdir: str | None = None,
|
|
16
|
+
dry_run: bool = False,
|
|
17
|
+
full_auto: bool = False,
|
|
18
|
+
sandbox: str = "",
|
|
19
|
+
) -> CodexResult:
|
|
20
|
+
"""Execute a prompt via the Codex CLI.
|
|
21
|
+
|
|
22
|
+
Thin wrapper around :class:`~datasmith.agents.installed.codex.CodexAgent`.
|
|
23
|
+
"""
|
|
24
|
+
agent = CodexAgent(full_auto=full_auto, sandbox=sandbox)
|
|
25
|
+
return agent.exec_or_dry_run(prompt, timeout=timeout, workdir=workdir, dry_run=dry_run)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
import os
|
|
5
|
+
import threading
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from datasmith.utils import get_logger
|
|
10
|
+
|
|
11
|
+
logger = get_logger("agents.config")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class AgentConfig:
|
|
16
|
+
"""Configuration for LLM agent backends."""
|
|
17
|
+
|
|
18
|
+
primary_model: str = ""
|
|
19
|
+
fallback_model: str = ""
|
|
20
|
+
api_key: str = ""
|
|
21
|
+
api_base: str = ""
|
|
22
|
+
max_tokens: int = 16000
|
|
23
|
+
temperature: float = 0.0
|
|
24
|
+
portkey_api_key: str = ""
|
|
25
|
+
portkey_model_name: str = ""
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def from_env(cls) -> AgentConfig:
|
|
29
|
+
return cls(
|
|
30
|
+
primary_model=os.environ.get("DSPY_MODEL", "openai/gpt-oss-120b"),
|
|
31
|
+
fallback_model=os.environ.get("DSPY_FALLBACK_MODEL", ""),
|
|
32
|
+
api_key=os.environ.get("DSPY_API_KEY", "local"),
|
|
33
|
+
api_base=os.environ.get("DSPY_API_BASE", "http://localhost:30001/v1"),
|
|
34
|
+
max_tokens=int(os.environ.get("DSPY_MAX_TOKENS", "16000")),
|
|
35
|
+
temperature=float(os.environ.get("DSPY_TEMPERATURE", "0")),
|
|
36
|
+
portkey_api_key=os.environ.get("PORTKEY_API_KEY", ""),
|
|
37
|
+
portkey_model_name=os.environ.get("PORTKEY_MODEL_NAME", ""),
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# Module-level state for lazy DSPy configuration.
|
|
42
|
+
_configured = False
|
|
43
|
+
_lock = threading.Lock()
|
|
44
|
+
_lm: Any = None # Stores the dspy.LM instance for async-safe reuse
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def configure_dspy(config: AgentConfig) -> None:
|
|
48
|
+
"""Configure DSPy backends from AgentConfig."""
|
|
49
|
+
global _lm
|
|
50
|
+
import dspy
|
|
51
|
+
|
|
52
|
+
kwargs: dict[str, Any] = {
|
|
53
|
+
"temperature": config.temperature,
|
|
54
|
+
"max_tokens": config.max_tokens,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if config.api_key and config.primary_model:
|
|
58
|
+
_lm = dspy.LM(
|
|
59
|
+
model=config.primary_model,
|
|
60
|
+
api_key=config.api_key,
|
|
61
|
+
api_base=config.api_base or None,
|
|
62
|
+
**kwargs,
|
|
63
|
+
)
|
|
64
|
+
model_name = config.primary_model
|
|
65
|
+
elif config.portkey_api_key:
|
|
66
|
+
from portkey_ai import PORTKEY_GATEWAY_URL
|
|
67
|
+
|
|
68
|
+
model_name = config.portkey_model_name or "@anthropic/claude-3-5-sonnet-latest"
|
|
69
|
+
kwargs["api_base"] = PORTKEY_GATEWAY_URL
|
|
70
|
+
kwargs["api_key"] = "unused-by-portkey"
|
|
71
|
+
kwargs["headers"] = {
|
|
72
|
+
"x-portkey-api-key": config.portkey_api_key,
|
|
73
|
+
"x-portkey-provider": model_name.split("/")[0].lstrip("@"),
|
|
74
|
+
}
|
|
75
|
+
kwargs["custom_llm_provider"] = "openai"
|
|
76
|
+
_lm = dspy.LM(model=model_name, **kwargs)
|
|
77
|
+
else:
|
|
78
|
+
logger.warning("No LM backend configured")
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
with contextlib.suppress(RuntimeError):
|
|
82
|
+
dspy.configure(lm=_lm)
|
|
83
|
+
logger.info("Configured DSPy with model: %s", model_name)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def ensure_configured() -> None:
|
|
87
|
+
"""Lazy-initialize DSPy on first LLM call. Thread- and async-safe.
|
|
88
|
+
|
|
89
|
+
Uses double-checked locking to avoid repeated configuration.
|
|
90
|
+
If ``dspy.configure()`` was already called from a different async task,
|
|
91
|
+
the stored LM is applied via ``dspy.context()`` instead.
|
|
92
|
+
"""
|
|
93
|
+
global _configured
|
|
94
|
+
if _configured:
|
|
95
|
+
# DSPy was configured, but possibly from a different async task.
|
|
96
|
+
# Re-apply the LM via dspy.context() which is async-safe.
|
|
97
|
+
if _lm is not None:
|
|
98
|
+
import dspy
|
|
99
|
+
|
|
100
|
+
with contextlib.suppress(RuntimeError):
|
|
101
|
+
dspy.configure(lm=_lm)
|
|
102
|
+
return
|
|
103
|
+
with _lock:
|
|
104
|
+
if _configured:
|
|
105
|
+
return
|
|
106
|
+
config = AgentConfig.from_env()
|
|
107
|
+
configure_dspy(config)
|
|
108
|
+
_configured = True
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ProblemExtractor: Extractive-first approach for problem statement generation.
|
|
3
|
+
|
|
4
|
+
Key principles:
|
|
5
|
+
- 90% extractive, 10% abstractive
|
|
6
|
+
- Preserve code snippets verbatim (character-exact)
|
|
7
|
+
- Keep technical terms exactly as written
|
|
8
|
+
- Natural structure over imposed templates
|
|
9
|
+
- Preserve disagreements and different viewpoints
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from datasmith.utils import get_logger
|
|
19
|
+
|
|
20
|
+
logger = get_logger("agents.extractors")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class ProblemExtraction:
|
|
25
|
+
"""Structured extraction from a PR description.
|
|
26
|
+
|
|
27
|
+
Captures four phases of a performance optimization or bug fix:
|
|
28
|
+
1. initial_observations: Objective symptoms of the problematic behavior
|
|
29
|
+
2. triage_attempts: Investigative steps and reasoning used to narrow down the issue
|
|
30
|
+
3. solution_overview: Description of the change(s) made
|
|
31
|
+
4. solution_observations: Observations after applying the change
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
initial_observations: str = ""
|
|
35
|
+
triage_attempts: str = ""
|
|
36
|
+
solution_overview: str = ""
|
|
37
|
+
solution_observations: str = ""
|
|
38
|
+
|
|
39
|
+
def to_problem_markdown(self) -> str:
|
|
40
|
+
"""Render only the problem portion (initial observations)."""
|
|
41
|
+
text = (self.initial_observations or "").strip()
|
|
42
|
+
text = re.sub(r"^```json\s*$", "", text, flags=re.MULTILINE)
|
|
43
|
+
return text
|
|
44
|
+
|
|
45
|
+
def _normalise_section(self, content: str | None, header_variants: list[str]) -> str | None:
|
|
46
|
+
"""Remove redundant headers from section content."""
|
|
47
|
+
if not content:
|
|
48
|
+
return None
|
|
49
|
+
content = content.strip()
|
|
50
|
+
low = content.lstrip().lower()
|
|
51
|
+
for variant in header_variants:
|
|
52
|
+
if low.startswith(variant.lower()):
|
|
53
|
+
lines = content.splitlines()
|
|
54
|
+
if lines:
|
|
55
|
+
lines = lines[1:]
|
|
56
|
+
content = "\n".join(lines).lstrip()
|
|
57
|
+
break
|
|
58
|
+
return content
|
|
59
|
+
|
|
60
|
+
def to_full_markdown(self) -> str:
|
|
61
|
+
"""Render all sections with headers."""
|
|
62
|
+
sections: list[str] = []
|
|
63
|
+
|
|
64
|
+
initial_obs = (self.initial_observations or "").strip()
|
|
65
|
+
if initial_obs:
|
|
66
|
+
sections.append(initial_obs)
|
|
67
|
+
|
|
68
|
+
triage = self._normalise_section(
|
|
69
|
+
self.triage_attempts,
|
|
70
|
+
["## triage attempts", "**triage attempts**"],
|
|
71
|
+
)
|
|
72
|
+
if triage:
|
|
73
|
+
sections.append(f"## Triage Attempts\n\n{triage}")
|
|
74
|
+
|
|
75
|
+
solution = self._normalise_section(
|
|
76
|
+
self.solution_overview,
|
|
77
|
+
["## solution overview", "**solution overview**"],
|
|
78
|
+
)
|
|
79
|
+
if solution:
|
|
80
|
+
sections.append(f"## Solution Overview\n\n{solution}")
|
|
81
|
+
|
|
82
|
+
solution_obs = self._normalise_section(
|
|
83
|
+
self.solution_observations,
|
|
84
|
+
["## solution observations", "**solution observations**"],
|
|
85
|
+
)
|
|
86
|
+
if solution_obs:
|
|
87
|
+
sections.append(f"## Solution Observations\n\n{solution_obs}")
|
|
88
|
+
|
|
89
|
+
text = "\n\n".join(sections).strip()
|
|
90
|
+
text = re.sub(r"^```json\s*$", "", text, flags=re.MULTILINE)
|
|
91
|
+
return text
|
|
92
|
+
|
|
93
|
+
def to_dict(self) -> dict[str, str]:
|
|
94
|
+
return {
|
|
95
|
+
"initial_observations": self.initial_observations,
|
|
96
|
+
"triage_attempts": self.triage_attempts,
|
|
97
|
+
"solution_overview": self.solution_overview,
|
|
98
|
+
"solution_observations": self.solution_observations,
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class ProblemExtractor:
|
|
103
|
+
"""Extractive problem/solution bucketizer using DSPy."""
|
|
104
|
+
|
|
105
|
+
def __init__(self) -> None:
|
|
106
|
+
self._predictor: Any | None = None
|
|
107
|
+
|
|
108
|
+
def _get_predictor(self) -> Any:
|
|
109
|
+
if self._predictor is None:
|
|
110
|
+
from datasmith.agents.config import ensure_configured
|
|
111
|
+
|
|
112
|
+
ensure_configured()
|
|
113
|
+
import dspy
|
|
114
|
+
|
|
115
|
+
class ProblemExtractorSignature(dspy.Signature):
|
|
116
|
+
"""What problem is this Github PR trying to solve? Extract near-verbatim relevant text following the given JSON output. If no relevant context exists for a field, return an empty string for it."""
|
|
117
|
+
|
|
118
|
+
pr_title: str = dspy.InputField(desc="The GitHub PR title")
|
|
119
|
+
pr_body: str = dspy.InputField(desc="The GitHub PR description")
|
|
120
|
+
pr_comments: str = dspy.InputField(desc="Comments on the PR thread.")
|
|
121
|
+
initial_observations: str = dspy.OutputField(
|
|
122
|
+
desc="Objective symptoms of the problematic behavior, described in the present tense. Focus strictly on what is happening (metrics, user impact, frequency). Do not include causes, hypotheses, or explanations."
|
|
123
|
+
)
|
|
124
|
+
triage_attempts: str = dspy.OutputField(
|
|
125
|
+
desc="The investigative steps and reasoning used to narrow down contributing factors—what you checked, what you ruled out, and what evidence you gathered to understand where the issue originates."
|
|
126
|
+
)
|
|
127
|
+
solution_overview: str = dspy.OutputField(
|
|
128
|
+
desc="A concise description of the change(s) made and how they address the identified bottleneck or constraint."
|
|
129
|
+
)
|
|
130
|
+
solution_observations: str = dspy.OutputField(
|
|
131
|
+
desc="What you observe after applying the change—new measurements, behavior differences, and any regressions or trade-offs that appeared."
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
self._predictor = dspy.Predict(ProblemExtractorSignature)
|
|
135
|
+
return self._predictor
|
|
136
|
+
|
|
137
|
+
def extract_problem(self, pr_title: str, pr_body: str, pr_comments: str = "") -> ProblemExtraction:
|
|
138
|
+
try:
|
|
139
|
+
predictor = self._get_predictor()
|
|
140
|
+
result = predictor(pr_title=pr_title, pr_body=pr_body, pr_comments=pr_comments)
|
|
141
|
+
return self._build_extraction(result)
|
|
142
|
+
except Exception:
|
|
143
|
+
logger.exception("Problem extraction failed, returning empty")
|
|
144
|
+
return ProblemExtraction(initial_observations=pr_body[:500] if pr_body else "")
|
|
145
|
+
|
|
146
|
+
def _clean_text(self, value: Any | None) -> str | None:
|
|
147
|
+
"""Clean and normalize text values from predictions."""
|
|
148
|
+
if value is None:
|
|
149
|
+
return None
|
|
150
|
+
if isinstance(value, list):
|
|
151
|
+
try:
|
|
152
|
+
flat: list[str] = []
|
|
153
|
+
for v in value:
|
|
154
|
+
if isinstance(v, list):
|
|
155
|
+
flat.extend(str(x) for x in v)
|
|
156
|
+
else:
|
|
157
|
+
flat.append(str(v))
|
|
158
|
+
value = "\n".join(flat)
|
|
159
|
+
except Exception:
|
|
160
|
+
value = "\n".join(str(v) for v in value)
|
|
161
|
+
if not isinstance(value, str):
|
|
162
|
+
value = str(value)
|
|
163
|
+
stripped = value.strip()
|
|
164
|
+
if stripped.lower() in {"null", "none", "undefined", "n/a", ""}:
|
|
165
|
+
return None
|
|
166
|
+
return stripped or None
|
|
167
|
+
|
|
168
|
+
def _build_extraction(self, prediction: Any) -> ProblemExtraction:
|
|
169
|
+
"""Normalize the raw DSPy prediction into a ProblemExtraction."""
|
|
170
|
+
initial_obs = self._clean_text(getattr(prediction, "initial_observations", None))
|
|
171
|
+
triage = self._clean_text(getattr(prediction, "triage_attempts", None))
|
|
172
|
+
solution = self._clean_text(getattr(prediction, "solution_overview", None))
|
|
173
|
+
solution_obs = self._clean_text(getattr(prediction, "solution_observations", None))
|
|
174
|
+
|
|
175
|
+
def plausible(s: str | None, *, min_len: int = 20) -> bool:
|
|
176
|
+
if s is None:
|
|
177
|
+
return False
|
|
178
|
+
stripped = s.strip()
|
|
179
|
+
if len(stripped) < min_len:
|
|
180
|
+
return False
|
|
181
|
+
return bool(re.search(r"[A-Za-z]", stripped))
|
|
182
|
+
|
|
183
|
+
if not plausible(initial_obs, min_len=20):
|
|
184
|
+
initial_obs = None
|
|
185
|
+
if not plausible(triage, min_len=10):
|
|
186
|
+
triage = None
|
|
187
|
+
if not plausible(solution, min_len=10):
|
|
188
|
+
solution = None
|
|
189
|
+
if not plausible(solution_obs, min_len=10):
|
|
190
|
+
solution_obs = None
|
|
191
|
+
|
|
192
|
+
return ProblemExtraction(
|
|
193
|
+
initial_observations=initial_obs or "",
|
|
194
|
+
triage_attempts=triage or "",
|
|
195
|
+
solution_overview=solution or "",
|
|
196
|
+
solution_observations=solution_obs or "",
|
|
197
|
+
)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# Installed Agent Abstraction
|
|
2
|
+
|
|
3
|
+
An **installed agent** is a CLI coding agent installed on the host machine that
|
|
4
|
+
can execute prompts non-interactively, auto-approve tool calls, and return
|
|
5
|
+
structured output.
|
|
6
|
+
|
|
7
|
+
## Supported agents
|
|
8
|
+
|
|
9
|
+
| Agent | CLI binary | Install |
|
|
10
|
+
|-------|-----------|---------|
|
|
11
|
+
| Claude Code | `claude` | `npm install -g @anthropic-ai/claude-code` |
|
|
12
|
+
| Codex | `codex` | `npm install -g @openai/codex` |
|
|
13
|
+
| Gemini CLI | `gemini` | `npm install -g @anthropic-ai/gemini-cli` |
|
|
14
|
+
|
|
15
|
+
## Interface contract
|
|
16
|
+
|
|
17
|
+
Every `InstalledAgent` implementation must satisfy these requirements:
|
|
18
|
+
|
|
19
|
+
1. **Non-interactive execution** — run a prompt, return when done
|
|
20
|
+
2. **Auto-approve all tool calls** — no human-in-the-loop
|
|
21
|
+
3. **JSON/structured output** — parseable stdout with agent messages and file changes
|
|
22
|
+
4. **Working directory** — operate in a specified directory (via subprocess `cwd=`)
|
|
23
|
+
5. **Ephemeral sessions** — don't persist state across runs
|
|
24
|
+
6. **Shell + file editing** — can run bash and edit files in the workspace
|
|
25
|
+
7. **External timeout** — can be killed via subprocess timeout
|
|
26
|
+
|
|
27
|
+
## Auto-detection
|
|
28
|
+
|
|
29
|
+
`get_agent()` tries agents in preference order (default: `claude → codex → gemini`)
|
|
30
|
+
and returns the first one whose CLI binary is on `PATH`:
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from datasmith.agents.installed import get_agent
|
|
34
|
+
|
|
35
|
+
agent = get_agent() # auto-detect
|
|
36
|
+
agent = get_agent(preference=["codex"]) # force codex
|
|
37
|
+
result = agent.exec("Fix the build", timeout=600, workdir="/tmp/workspace")
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Adding a new agent
|
|
41
|
+
|
|
42
|
+
1. Create `src/datasmith/agents/installed/<name>.py`
|
|
43
|
+
2. Subclass `InstalledAgent` and implement `name()`, `is_available()`, `exec()`
|
|
44
|
+
3. Add a `_parse_<name>_stdout()` function to normalise CLI output
|
|
45
|
+
4. Register the class in `base.py`'s `get_agent()` registry dict
|
|
46
|
+
5. Re-export from `__init__.py`
|
|
47
|
+
|
|
48
|
+
## Output parsing
|
|
49
|
+
|
|
50
|
+
Each agent's CLI emits a different JSON schema. The `_parse_*_stdout()` function
|
|
51
|
+
for each agent normalises the output into `(output_lines, files_changed)` which
|
|
52
|
+
is then wrapped in an `AgentResult`.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Installed CLI agent abstraction.
|
|
2
|
+
|
|
3
|
+
Provides a unified interface for CLI-based coding agents (Codex, Claude Code,
|
|
4
|
+
Gemini CLI) with auto-detection of whichever is available on the host.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from datasmith.agents.installed.base import AgentResult, CodexResult, InstalledAgent, get_agent
|
|
8
|
+
from datasmith.agents.installed.claude import ClaudeAgent
|
|
9
|
+
from datasmith.agents.installed.codex import CodexAgent
|
|
10
|
+
from datasmith.agents.installed.gemini import GeminiAgent
|
|
11
|
+
from datasmith.agents.installed.none import NoneAgent
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"AgentResult",
|
|
15
|
+
"ClaudeAgent",
|
|
16
|
+
"CodexAgent",
|
|
17
|
+
"CodexResult",
|
|
18
|
+
"GeminiAgent",
|
|
19
|
+
"InstalledAgent",
|
|
20
|
+
"NoneAgent",
|
|
21
|
+
"get_agent",
|
|
22
|
+
]
|