fc-data 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. datasmith/__init__.py +330 -0
  2. datasmith/__init__.pyi +194 -0
  3. datasmith/agents/__init__.py +31 -0
  4. datasmith/agents/classifiers.py +272 -0
  5. datasmith/agents/codex.py +25 -0
  6. datasmith/agents/config.py +108 -0
  7. datasmith/agents/extractors.py +197 -0
  8. datasmith/agents/installed/README.md +52 -0
  9. datasmith/agents/installed/__init__.py +22 -0
  10. datasmith/agents/installed/base.py +240 -0
  11. datasmith/agents/installed/claude.py +134 -0
  12. datasmith/agents/installed/codex.py +91 -0
  13. datasmith/agents/installed/gemini.py +118 -0
  14. datasmith/agents/installed/none.py +27 -0
  15. datasmith/agents/sandbox.py +547 -0
  16. datasmith/agents/synthesizer.py +439 -0
  17. datasmith/agents/templates/AGENTS.md.j2 +150 -0
  18. datasmith/agents/templates/sandbox_verify.py +428 -0
  19. datasmith/docker/__init__.py +31 -0
  20. datasmith/docker/context.py +112 -0
  21. datasmith/docker/images.py +158 -0
  22. datasmith/docker/publish.py +56 -0
  23. datasmith/docker/templates/Dockerfile.base +26 -0
  24. datasmith/docker/templates/Dockerfile.pr +42 -0
  25. datasmith/docker/templates/Dockerfile.repo +11 -0
  26. datasmith/docker/templates/docker_build_base.sh +780 -0
  27. datasmith/docker/templates/docker_build_env.sh +309 -0
  28. datasmith/docker/templates/docker_build_final.sh +106 -0
  29. datasmith/docker/templates/docker_build_pkg.sh +99 -0
  30. datasmith/docker/templates/docker_build_run.sh +124 -0
  31. datasmith/docker/templates/entrypoint.sh +62 -0
  32. datasmith/docker/templates/parser.py +1405 -0
  33. datasmith/docker/templates/profile.sh +199 -0
  34. datasmith/docker/templates/pytest_runner.py +692 -0
  35. datasmith/docker/templates/run-tests.sh +197 -0
  36. datasmith/docker/verifiers.py +131 -0
  37. datasmith/filters.py +154 -0
  38. datasmith/github/__init__.py +22 -0
  39. datasmith/github/client.py +333 -0
  40. datasmith/github/hooks.py +50 -0
  41. datasmith/github/links.py +110 -0
  42. datasmith/github/models.py +206 -0
  43. datasmith/github/render.py +173 -0
  44. datasmith/github/search.py +66 -0
  45. datasmith/github/templates/comment.md.j2 +5 -0
  46. datasmith/github/templates/final.md.j2 +66 -0
  47. datasmith/github/templates/issues.md.j2 +21 -0
  48. datasmith/github/templates/repo.md.j2 +1 -0
  49. datasmith/preflight.py +162 -0
  50. datasmith/publish/__init__.py +13 -0
  51. datasmith/publish/huggingface.py +104 -0
  52. datasmith/publish/pipeline.py +60 -0
  53. datasmith/publish/records.py +91 -0
  54. datasmith/py.typed +1 -0
  55. datasmith/resolution/__init__.py +14 -0
  56. datasmith/resolution/blocklist.py +145 -0
  57. datasmith/resolution/cache.py +120 -0
  58. datasmith/resolution/constants.py +277 -0
  59. datasmith/resolution/dependency_resolver.py +174 -0
  60. datasmith/resolution/git_utils.py +378 -0
  61. datasmith/resolution/import_analyzer.py +66 -0
  62. datasmith/resolution/metadata_parser.py +412 -0
  63. datasmith/resolution/models.py +41 -0
  64. datasmith/resolution/orchestrator.py +522 -0
  65. datasmith/resolution/package_filters.py +312 -0
  66. datasmith/resolution/python_manager.py +110 -0
  67. datasmith/runners/__init__.py +15 -0
  68. datasmith/runners/base.py +112 -0
  69. datasmith/runners/classify_prs.py +48 -0
  70. datasmith/runners/render_problems.py +113 -0
  71. datasmith/runners/resolve_packages.py +66 -0
  72. datasmith/runners/scrape_commits.py +166 -0
  73. datasmith/runners/scrape_repos.py +44 -0
  74. datasmith/runners/synthesize_images.py +310 -0
  75. datasmith/update/__init__.py +5 -0
  76. datasmith/update/cli.py +169 -0
  77. datasmith/update/offline.py +173 -0
  78. datasmith/update/pipeline.py +497 -0
  79. datasmith/utils/__init__.py +18 -0
  80. datasmith/utils/core.py +67 -0
  81. datasmith/utils/db.py +156 -0
  82. datasmith/utils/tokens.py +65 -0
  83. fc_data-0.2.0.dist-info/METADATA +441 -0
  84. fc_data-0.2.0.dist-info/RECORD +87 -0
  85. fc_data-0.2.0.dist-info/WHEEL +4 -0
  86. fc_data-0.2.0.dist-info/entry_points.txt +2 -0
  87. fc_data-0.2.0.dist-info/licenses/LICENSE +28 -0
@@ -0,0 +1,240 @@
1
+ """Abstract interface for CLI-based coding agents."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import os
7
+ import shutil
8
+ import signal
9
+ import subprocess
10
+ import threading
11
+ import time
12
+ from abc import ABC, abstractmethod
13
+ from dataclasses import dataclass, field
14
+
15
+ from datasmith.utils import get_logger
16
+
17
+ logger = get_logger("agents.installed")
18
+
19
+
20
+ @dataclass
21
+ class AgentResult:
22
+ """Unified result from any installed CLI agent."""
23
+
24
+ success: bool
25
+ output: str = ""
26
+ raw_output: str = ""
27
+ files_changed: list[str] = field(default_factory=list)
28
+ duration_s: float = 0.0
29
+ error: str = ""
30
+
31
+
32
+ # Backward-compat alias
33
+ CodexResult = AgentResult
34
+
35
+
36
+ class InstalledAgent(ABC):
37
+ """Abstract interface for CLI-based coding agents.
38
+
39
+ Each subclass wraps a specific CLI tool (Codex, Claude Code, Gemini CLI)
40
+ and normalises its output into an ``AgentResult``.
41
+ """
42
+
43
+ @abstractmethod
44
+ def name(self) -> str:
45
+ """Human-readable agent name."""
46
+
47
+ @abstractmethod
48
+ def is_available(self) -> bool:
49
+ """Check if the CLI binary is on PATH."""
50
+
51
+ @abstractmethod
52
+ def exec(
53
+ self,
54
+ prompt: str,
55
+ timeout: int = 3600,
56
+ workdir: str | None = None,
57
+ ) -> AgentResult:
58
+ """Run a prompt non-interactively. Returns AgentResult."""
59
+
60
+ def exec_or_dry_run(
61
+ self,
62
+ prompt: str,
63
+ timeout: int = 3600,
64
+ workdir: str | None = None,
65
+ dry_run: bool = False,
66
+ ) -> AgentResult:
67
+ """Shared dry-run wrapper around :meth:`exec`."""
68
+ if dry_run:
69
+ logger.info("[DRY RUN] %s command for prompt (%d chars): %.500s", self.name(), len(prompt), prompt)
70
+ return AgentResult(
71
+ success=True,
72
+ output="[dry run — no execution]",
73
+ duration_s=0.0,
74
+ )
75
+ return self.exec(prompt, timeout=timeout, workdir=workdir)
76
+
77
+ @staticmethod
78
+ def _which(binary: str) -> bool:
79
+ return shutil.which(binary) is not None
80
+
81
+
82
+ def _kill_process_group(proc: subprocess.Popen[str], sig: int = signal.SIGTERM) -> None:
83
+ """Send *sig* to the process group of *proc*, swallowing errors."""
84
+ with contextlib.suppress(ProcessLookupError, OSError):
85
+ os.killpg(os.getpgid(proc.pid), sig)
86
+
87
+
88
+ # ---------------------------------------------------------------------------
89
+ # Global subprocess registry — allows the SIGINT handler to reach agent
90
+ # processes that live in their own sessions (start_new_session=True) and
91
+ # therefore don't receive CTRL+C from the terminal.
92
+ # ---------------------------------------------------------------------------
93
+ _active_procs: set[subprocess.Popen[str]] = set()
94
+ _active_procs_lock = threading.Lock()
95
+
96
+
97
+ def _register_proc(proc: subprocess.Popen[str]) -> None:
98
+ with _active_procs_lock:
99
+ _active_procs.add(proc)
100
+
101
+
102
+ def _unregister_proc(proc: subprocess.Popen[str]) -> None:
103
+ with _active_procs_lock:
104
+ _active_procs.discard(proc)
105
+
106
+
107
+ def terminate_all_agents(force: bool = False) -> None:
108
+ """Kill every tracked agent subprocess.
109
+
110
+ Called from the CLI signal handler so that threads blocked on
111
+ ``proc.communicate()`` can unblock and the process can exit.
112
+
113
+ With *force=True* sends SIGKILL instead of SIGTERM.
114
+ """
115
+ sig = signal.SIGKILL if force else signal.SIGTERM
116
+ # list() snapshot avoids issues with concurrent set mutation.
117
+ for proc in list(_active_procs):
118
+ _kill_process_group(proc, sig)
119
+
120
+
121
+ def _terminate_and_wait(proc: subprocess.Popen[str]) -> None:
122
+ """Send SIGTERM, wait, escalate to SIGKILL if needed."""
123
+ _kill_process_group(proc, signal.SIGTERM)
124
+ try:
125
+ proc.wait(timeout=10)
126
+ except subprocess.TimeoutExpired:
127
+ _kill_process_group(proc, signal.SIGKILL)
128
+ proc.wait()
129
+
130
+
131
+ def run_agent_subprocess(
132
+ cmd: list[str],
133
+ *,
134
+ timeout: int = 3600,
135
+ cwd: str | None = None,
136
+ env: dict[str, str] | None = None,
137
+ agent_name: str = "agent",
138
+ ) -> tuple[int, str, str, float]:
139
+ """Run an agent CLI command with process-group cleanup on interrupt or timeout.
140
+
141
+ Returns ``(returncode, stdout, stderr, duration_s)``.
142
+
143
+ On timeout the process is killed and any partial output captured so far
144
+ is returned with ``returncode=-1``.
145
+
146
+ Raises ``FileNotFoundError`` if the binary is missing and
147
+ re-raises ``KeyboardInterrupt`` (after cleanup).
148
+ """
149
+ start = time.time()
150
+ proc: subprocess.Popen[str] | None = None
151
+ try:
152
+ proc = subprocess.Popen(
153
+ cmd,
154
+ stdout=subprocess.PIPE,
155
+ stderr=subprocess.PIPE,
156
+ text=True,
157
+ cwd=cwd,
158
+ env=env,
159
+ start_new_session=True,
160
+ )
161
+ _register_proc(proc)
162
+ stdout, stderr = proc.communicate(timeout=timeout)
163
+ duration = time.time() - start
164
+ return proc.returncode, stdout, stderr, duration
165
+ except subprocess.TimeoutExpired as exc:
166
+ logger.warning("%s timed out after %ds — capturing partial output", agent_name, timeout)
167
+ partial_stdout, partial_stderr = _collect_partial_output(exc, proc)
168
+ duration = time.time() - start
169
+ return -1, partial_stdout, partial_stderr, duration
170
+ except KeyboardInterrupt:
171
+ if proc is not None:
172
+ _terminate_and_wait(proc)
173
+ raise
174
+ finally:
175
+ if proc is not None:
176
+ _unregister_proc(proc)
177
+ if proc.poll() is None:
178
+ _kill_process_group(proc, signal.SIGKILL)
179
+ proc.wait()
180
+
181
+
182
+ def _collect_partial_output(
183
+ exc: subprocess.TimeoutExpired,
184
+ proc: subprocess.Popen[str] | None,
185
+ ) -> tuple[str, str]:
186
+ """Extract whatever output was buffered before a timeout."""
187
+ partial_stdout = ""
188
+ partial_stderr = ""
189
+ if exc.stdout:
190
+ partial_stdout = exc.stdout if isinstance(exc.stdout, str) else exc.stdout.decode(errors="replace")
191
+ if exc.stderr:
192
+ partial_stderr = exc.stderr if isinstance(exc.stderr, str) else exc.stderr.decode(errors="replace")
193
+ if proc is not None:
194
+ _terminate_and_wait(proc)
195
+ try:
196
+ remaining_out, remaining_err = proc.communicate(timeout=5)
197
+ partial_stdout += remaining_out or ""
198
+ partial_stderr += remaining_err or ""
199
+ except Exception:
200
+ logger.debug("Failed to read remaining output after timeout", exc_info=True)
201
+ return partial_stdout, partial_stderr
202
+
203
+
204
+ # Registry of concrete agents in preference order.
205
+ # Populated by __init__.py after all subclasses are importable.
206
+ _AGENT_CLASSES: list[type[InstalledAgent]] = []
207
+
208
+
209
+ def get_agent(preference: list[str] | None = None) -> InstalledAgent:
210
+ """Auto-detect and return the first available agent.
211
+
212
+ *preference* is a list of agent names (lowercase) to try in order.
213
+ Default: ``["claude", "codex", "gemini"]``.
214
+
215
+ Raises ``RuntimeError`` if none are available.
216
+ """
217
+ from datasmith.agents.installed.claude import ClaudeAgent
218
+ from datasmith.agents.installed.codex import CodexAgent
219
+ from datasmith.agents.installed.gemini import GeminiAgent
220
+ from datasmith.agents.installed.none import NoneAgent
221
+
222
+ registry: dict[str, type[InstalledAgent]] = {
223
+ "claude": ClaudeAgent,
224
+ "codex": CodexAgent,
225
+ "gemini": GeminiAgent,
226
+ "none": NoneAgent,
227
+ }
228
+
229
+ order = preference or ["claude", "codex", "gemini"]
230
+ for name in order:
231
+ cls = registry.get(name)
232
+ if cls is None:
233
+ continue
234
+ agent = cls()
235
+ if agent.is_available():
236
+ logger.info("Auto-detected agent: %s", agent.name())
237
+ return agent
238
+
239
+ available = list(registry.keys())
240
+ raise RuntimeError(f"No installed CLI agent found. Tried: {order}. Install one of: {available}")
@@ -0,0 +1,134 @@
1
+ """Claude Code CLI agent implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+
8
+ from datasmith.agents.installed.base import AgentResult, InstalledAgent, run_agent_subprocess
9
+ from datasmith.utils import get_logger
10
+
11
+ logger = get_logger("agents.installed.claude")
12
+
13
+
14
+ def _extract_assistant_text(message: object) -> str:
15
+ """Extract text from a Claude assistant message payload."""
16
+ if isinstance(message, str):
17
+ return message
18
+ if isinstance(message, dict):
19
+ content = message.get("content", "")
20
+ if isinstance(content, str):
21
+ return content
22
+ if isinstance(content, list):
23
+ parts = []
24
+ for block in content:
25
+ if isinstance(block, dict) and block.get("type") == "text":
26
+ parts.append(block.get("text", ""))
27
+ return "\n".join(parts)
28
+ return ""
29
+
30
+
31
+ def _parse_claude_stdout(stdout: str) -> tuple[list[str], list[str]]:
32
+ """Parse Claude Code stream-json output into (output_lines, files_changed)."""
33
+ files_changed: list[str] = []
34
+ output_lines: list[str] = []
35
+
36
+ for line in stdout.splitlines():
37
+ line = line.strip()
38
+ if not line:
39
+ continue
40
+ try:
41
+ obj = json.loads(line)
42
+ if not isinstance(obj, dict):
43
+ continue
44
+
45
+ msg_type = obj.get("type", "")
46
+
47
+ if msg_type == "assistant" and "message" in obj:
48
+ text = _extract_assistant_text(obj["message"])
49
+ if text:
50
+ output_lines.append(text)
51
+ elif msg_type == "result":
52
+ result_text = obj.get("result", "")
53
+ if isinstance(result_text, str) and result_text:
54
+ output_lines.append(result_text)
55
+ elif msg_type == "tool_use":
56
+ _collect_file_change(obj, files_changed)
57
+
58
+ except json.JSONDecodeError:
59
+ output_lines.append(line)
60
+
61
+ return output_lines, files_changed
62
+
63
+
64
+ _FILE_TOOL_NAMES = {"Write", "Edit", "write_file", "edit_file"}
65
+
66
+
67
+ def _collect_file_change(obj: dict, files_changed: list[str]) -> None:
68
+ """Extract file path from a tool_use event if it's a file-editing tool."""
69
+ tool_name = obj.get("name", "")
70
+ tool_input = obj.get("input", {})
71
+ if tool_name in _FILE_TOOL_NAMES and isinstance(tool_input, dict):
72
+ path = tool_input.get("file_path") or tool_input.get("path", "")
73
+ if path:
74
+ files_changed.append(path)
75
+
76
+
77
+ class ClaudeAgent(InstalledAgent):
78
+ """Claude Code CLI agent."""
79
+
80
+ def name(self) -> str:
81
+ return "claude"
82
+
83
+ def is_available(self) -> bool:
84
+ return self._which("claude")
85
+
86
+ def exec(
87
+ self,
88
+ prompt: str,
89
+ timeout: int = 3600,
90
+ workdir: str | None = None,
91
+ ) -> AgentResult:
92
+ cmd = [
93
+ "claude",
94
+ "-p",
95
+ prompt,
96
+ "--dangerously-skip-permissions",
97
+ "--output-format",
98
+ "stream-json",
99
+ "--no-session-persistence",
100
+ "--verbose",
101
+ "--model",
102
+ "sonnet",
103
+ "--effort",
104
+ "medium",
105
+ ]
106
+
107
+ logger.debug("claude command: %s", " ".join(cmd))
108
+
109
+ # Nesting guard: unset Claude Code env vars to avoid
110
+ # "cannot be launched inside another Claude Code session" error.
111
+ env = os.environ.copy()
112
+ env.pop("CLAUDE_CODE_ENTRYPOINT", None)
113
+ env.pop("CLAUDECODE", None)
114
+
115
+ try:
116
+ returncode, stdout, stderr, duration = run_agent_subprocess(
117
+ cmd, timeout=timeout, cwd=workdir, env=env, agent_name="claude"
118
+ )
119
+ output_lines, files_changed = _parse_claude_stdout(stdout)
120
+
121
+ return AgentResult(
122
+ success=returncode == 0,
123
+ output="\n".join(output_lines) if output_lines else stdout,
124
+ raw_output=stdout,
125
+ files_changed=files_changed,
126
+ duration_s=duration,
127
+ error=stderr if returncode != 0 else "",
128
+ )
129
+ except FileNotFoundError:
130
+ return AgentResult(
131
+ success=False,
132
+ duration_s=0.0,
133
+ error="claude CLI not found. Install with: npm install -g @anthropic-ai/claude-code",
134
+ )
@@ -0,0 +1,91 @@
1
+ """Codex CLI agent implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+
7
+ from datasmith.agents.installed.base import AgentResult, InstalledAgent, run_agent_subprocess
8
+ from datasmith.utils import get_logger
9
+
10
+ logger = get_logger("agents.installed.codex")
11
+
12
+
13
+ def _parse_codex_stdout(stdout: str) -> tuple[list[str], list[str]]:
14
+ """Parse JSON stream from Codex stdout into (output_lines, files_changed)."""
15
+ files_changed: list[str] = []
16
+ output_lines: list[str] = []
17
+
18
+ for line in stdout.splitlines():
19
+ line = line.strip()
20
+ if not line:
21
+ continue
22
+ try:
23
+ obj = json.loads(line)
24
+ if isinstance(obj, dict):
25
+ if "file" in obj:
26
+ files_changed.append(obj["file"])
27
+ # codex >=0.114 item.completed format
28
+ item = obj.get("item")
29
+ if isinstance(item, dict) and item.get("type") == "agent_message":
30
+ text = item.get("text", "")
31
+ if text:
32
+ output_lines.append(text)
33
+ elif "output" in obj:
34
+ output_lines.append(obj["output"])
35
+ elif "message" in obj:
36
+ output_lines.append(obj["message"])
37
+ except json.JSONDecodeError:
38
+ output_lines.append(line)
39
+
40
+ return output_lines, files_changed
41
+
42
+
43
+ class CodexAgent(InstalledAgent):
44
+ """Codex CLI (``codex exec``) agent."""
45
+
46
+ def __init__(self, full_auto: bool = False, sandbox: str = "") -> None:
47
+ self._full_auto = full_auto
48
+ self._sandbox = sandbox
49
+
50
+ def name(self) -> str:
51
+ return "codex"
52
+
53
+ def is_available(self) -> bool:
54
+ return self._which("codex")
55
+
56
+ def exec(
57
+ self,
58
+ prompt: str,
59
+ timeout: int = 3600,
60
+ workdir: str | None = None,
61
+ ) -> AgentResult:
62
+ cmd = ["codex", "exec", "--model", "gpt-5.4-mini", "-c", "model_reasoning_effort=medium"]
63
+ if self._full_auto and self._sandbox:
64
+ cmd.extend(["--full-auto", "--sandbox", self._sandbox])
65
+ else:
66
+ cmd.append("--dangerously-bypass-approvals-and-sandbox")
67
+ cmd.extend(["--json", "--ephemeral"])
68
+ cmd.append(prompt)
69
+
70
+ logger.debug("codex command: %s", " ".join(cmd))
71
+
72
+ try:
73
+ returncode, stdout, stderr, duration = run_agent_subprocess(
74
+ cmd, timeout=timeout, cwd=workdir, agent_name="codex"
75
+ )
76
+ output_lines, files_changed = _parse_codex_stdout(stdout)
77
+
78
+ return AgentResult(
79
+ success=returncode == 0,
80
+ output="\n".join(output_lines) if output_lines else stdout,
81
+ raw_output=stdout,
82
+ files_changed=files_changed,
83
+ duration_s=duration,
84
+ error=stderr if returncode != 0 else "",
85
+ )
86
+ except FileNotFoundError:
87
+ return AgentResult(
88
+ success=False,
89
+ duration_s=0.0,
90
+ error="codex CLI not found. Install with: npm install -g @openai/codex",
91
+ )
@@ -0,0 +1,118 @@
1
+ """Gemini CLI agent implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+
7
+ from datasmith.agents.installed.base import AgentResult, InstalledAgent, run_agent_subprocess
8
+ from datasmith.utils import get_logger
9
+
10
+ logger = get_logger("agents.installed.gemini")
11
+
12
+
13
+ _TEXT_TYPES = {"assistant", "response", "text"}
14
+ _FILE_TOOL_NAMES = {"write_file", "edit_file", "Write", "Edit", "create_file", "update_file"}
15
+
16
+
17
+ def _parse_gemini_stdout(stdout: str) -> tuple[list[str], list[str]]:
18
+ """Parse Gemini CLI stream-json output into (output_lines, files_changed)."""
19
+ files_changed: list[str] = []
20
+ output_lines: list[str] = []
21
+
22
+ for line in stdout.splitlines():
23
+ line = line.strip()
24
+ if not line:
25
+ continue
26
+ try:
27
+ obj = json.loads(line)
28
+ if not isinstance(obj, dict):
29
+ continue
30
+
31
+ msg_type = obj.get("type", "")
32
+
33
+ if msg_type in _TEXT_TYPES:
34
+ _append_text(obj, output_lines)
35
+ elif msg_type == "result":
36
+ _append_result(obj, output_lines)
37
+ elif msg_type in ("tool_use", "action"):
38
+ _collect_gemini_file(obj, files_changed)
39
+ elif "output" in obj:
40
+ output_lines.append(obj["output"])
41
+ elif "message" in obj:
42
+ output_lines.append(obj["message"])
43
+
44
+ except json.JSONDecodeError:
45
+ output_lines.append(line)
46
+
47
+ return output_lines, files_changed
48
+
49
+
50
+ def _append_text(obj: dict, output_lines: list[str]) -> None:
51
+ text = obj.get("text") or obj.get("message") or obj.get("content", "")
52
+ if isinstance(text, str) and text:
53
+ output_lines.append(text)
54
+
55
+
56
+ def _append_result(obj: dict, output_lines: list[str]) -> None:
57
+ result_text = obj.get("result") or obj.get("text", "")
58
+ if isinstance(result_text, str) and result_text:
59
+ output_lines.append(result_text)
60
+
61
+
62
+ def _collect_gemini_file(obj: dict, files_changed: list[str]) -> None:
63
+ tool_name = obj.get("name") or obj.get("tool", "")
64
+ tool_input = obj.get("input") or obj.get("args", {})
65
+ if isinstance(tool_input, dict) and tool_name in _FILE_TOOL_NAMES:
66
+ path = tool_input.get("file_path") or tool_input.get("path", "")
67
+ if path:
68
+ files_changed.append(path)
69
+
70
+
71
+ class GeminiAgent(InstalledAgent):
72
+ """Gemini CLI agent."""
73
+
74
+ def name(self) -> str:
75
+ return "gemini"
76
+
77
+ def is_available(self) -> bool:
78
+ return self._which("gemini")
79
+
80
+ def exec(
81
+ self,
82
+ prompt: str,
83
+ timeout: int = 3600,
84
+ workdir: str | None = None,
85
+ ) -> AgentResult:
86
+ cmd = [
87
+ "gemini",
88
+ "-p",
89
+ prompt,
90
+ "--yolo",
91
+ "-o",
92
+ "json",
93
+ "--model",
94
+ "gemini-2.5-flash",
95
+ ]
96
+
97
+ logger.debug("gemini command: %s", " ".join(cmd))
98
+
99
+ try:
100
+ returncode, stdout, stderr, duration = run_agent_subprocess(
101
+ cmd, timeout=timeout, cwd=workdir, agent_name="gemini"
102
+ )
103
+ output_lines, files_changed = _parse_gemini_stdout(stdout)
104
+
105
+ return AgentResult(
106
+ success=returncode == 0,
107
+ output="\n".join(output_lines) if output_lines else stdout,
108
+ raw_output=stdout,
109
+ files_changed=files_changed,
110
+ duration_s=duration,
111
+ error=stderr if returncode != 0 else "",
112
+ )
113
+ except FileNotFoundError:
114
+ return AgentResult(
115
+ success=False,
116
+ duration_s=0.0,
117
+ error="gemini CLI not found. Install with: npm install -g @anthropic-ai/gemini-cli",
118
+ )
@@ -0,0 +1,27 @@
1
+ """No-op agent that skips LLM generation, relying solely on similar-context matching."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datasmith.agents.installed.base import AgentResult, InstalledAgent
6
+
7
+
8
+ class NoneAgent(InstalledAgent):
9
+ """A no-op agent that is always available and never executes."""
10
+
11
+ def name(self) -> str:
12
+ return "none"
13
+
14
+ def is_available(self) -> bool:
15
+ return True
16
+
17
+ def exec(
18
+ self,
19
+ prompt: str,
20
+ timeout: int = 3600,
21
+ workdir: str | None = None,
22
+ ) -> AgentResult:
23
+ return AgentResult(
24
+ success=False,
25
+ output="[none agent — no LLM execution]",
26
+ error="NoneAgent does not execute",
27
+ )