agenttester 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ """AgentTester: Multi-agent comparison tool.
2
+
3
+ Usable as a CLI (``agenttester run …``), a Docker container, or a Python
4
+ library::
5
+
6
+ from agenttester import Orchestrator, AgentConfig, load_config
7
+ """
8
+
9
+ from .agent_runner import AgentResult, run_agent
10
+ from .config import AgentConfig, load_config
11
+ from .git_manager import DiffStats, GitManager
12
+ from .orchestrator import Orchestrator
13
+
14
+ __all__ = [
15
+ "AgentConfig",
16
+ "AgentResult",
17
+ "DiffStats",
18
+ "GitManager",
19
+ "Orchestrator",
20
+ "load_config",
21
+ "run_agent",
22
+ ]
23
+
24
+ __version__ = "0.1.0"
@@ -0,0 +1,341 @@
1
+ """Run a single agent process in a worktree."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import os
7
+ import shlex
8
+ import signal
9
+ import subprocess
10
+ import tempfile
11
+ import time
12
+ from dataclasses import dataclass
13
+ from pathlib import Path
14
+
15
+ import rich.markup
16
+ from rich.console import Console
17
+
18
+ from .config import AgentConfig
19
+
20
+
21
+ @dataclass
22
+ class AgentResult:
23
+ """Result of a single agent run."""
24
+
25
+ agent_name: str
26
+ exit_code: int
27
+ duration: float
28
+ stdout: str
29
+ stderr: str
30
+ error: str | None = None
31
+
32
+
33
+ # ── helpers for remote execution ──────────────────────────────────────
34
+
35
+
36
+ def _rsync_to_remote(local: Path, host: str, remote_dir: str) -> None:
37
+ """Push a local directory to a remote host via rsync."""
38
+ subprocess.run(
39
+ [
40
+ "rsync",
41
+ "-az",
42
+ "--delete",
43
+ f"{local}/",
44
+ f"{host}:{remote_dir}/",
45
+ ],
46
+ check=True,
47
+ capture_output=True,
48
+ )
49
+
50
+
51
+ def _rsync_from_remote(host: str, remote_dir: str, local: Path) -> None:
52
+ """Pull a remote directory back to local via rsync."""
53
+ subprocess.run(
54
+ [
55
+ "rsync",
56
+ "-az",
57
+ "--delete",
58
+ f"{host}:{remote_dir}/",
59
+ f"{local}/",
60
+ ],
61
+ check=True,
62
+ capture_output=True,
63
+ )
64
+
65
+
66
+ def _build_ssh_command(
67
+ agent: AgentConfig,
68
+ remote_dir: str,
69
+ cmd: str,
70
+ ) -> str:
71
+ """Wrap *cmd* in an SSH invocation on the agent's host."""
72
+ env_exports = ""
73
+ if agent.env:
74
+ parts = " ".join(f"{k}={shlex.quote(v)}" for k, v in agent.env.items())
75
+ env_exports = f"export {parts} && "
76
+ inner = f"{env_exports}cd {shlex.quote(remote_dir)} && {cmd}"
77
+ return f"ssh {agent.host} {shlex.quote(inner)}"
78
+
79
+
80
+ # ── core entry point ──────────────────────────────────────────────────
81
+
82
+
83
+ def _prepare_command(agent: AgentConfig, prompt: str) -> tuple[str, Path | None, bool]:
84
+ """Substitute placeholders and decide stdin mode.
85
+
86
+ Returns (final_cmd, prompt_file_path | None, pipe_stdin).
87
+ """
88
+ cmd = agent.command
89
+ prompt_file_path: Path | None = None
90
+
91
+ if "{prompt_file}" in cmd:
92
+ fd, path_str = tempfile.mkstemp(suffix=".md", prefix="agenttester-prompt-")
93
+ prompt_file_path = Path(path_str)
94
+ with os.fdopen(fd, "w") as f:
95
+ f.write(prompt)
96
+ cmd = cmd.replace("{prompt_file}", path_str)
97
+
98
+ if "{prompt}" in cmd:
99
+ cmd = cmd.replace("{prompt}", shlex.quote(prompt))
100
+
101
+ has_placeholder = "{prompt}" in agent.command or "{prompt_file}" in agent.command
102
+ return cmd, prompt_file_path, not has_placeholder
103
+
104
+
105
+ async def run_agent(
106
+ agent: AgentConfig,
107
+ worktree_path: Path,
108
+ prompt: str,
109
+ console: Console,
110
+ color: str,
111
+ output_lock: asyncio.Lock,
112
+ ) -> AgentResult:
113
+ """Run an agent locally or on a remote host."""
114
+ if agent.is_remote:
115
+ return await _run_remote(
116
+ agent, worktree_path, prompt, console, color, output_lock
117
+ )
118
+ return await _run_local(agent, worktree_path, prompt, console, color, output_lock)
119
+
120
+
121
+ async def _run_local(
122
+ agent: AgentConfig,
123
+ worktree_path: Path,
124
+ prompt: str,
125
+ console: Console,
126
+ color: str,
127
+ output_lock: asyncio.Lock,
128
+ ) -> AgentResult:
129
+ """Run an agent as a local subprocess."""
130
+ start = time.monotonic()
131
+ cmd, prompt_file_path, pipe_stdin = _prepare_command(agent, prompt)
132
+
133
+ full_env = os.environ.copy()
134
+ full_env.update(agent.env)
135
+
136
+ prefix = f"[{color}]\\[{agent.name}][/{color}]"
137
+ stdout_lines: list[str] = []
138
+ stderr_lines: list[str] = []
139
+
140
+ proc: asyncio.subprocess.Process | None = None
141
+ try:
142
+ proc = await asyncio.create_subprocess_shell(
143
+ cmd,
144
+ stdin=asyncio.subprocess.PIPE if pipe_stdin else None,
145
+ stdout=asyncio.subprocess.PIPE,
146
+ stderr=asyncio.subprocess.PIPE,
147
+ cwd=worktree_path,
148
+ env=full_env,
149
+ start_new_session=True,
150
+ )
151
+
152
+ if pipe_stdin and proc.stdin:
153
+ proc.stdin.write(prompt.encode())
154
+ await proc.stdin.drain()
155
+ proc.stdin.close()
156
+
157
+ await _stream_and_wait(
158
+ proc,
159
+ agent.timeout,
160
+ stdout_lines,
161
+ stderr_lines,
162
+ console,
163
+ prefix,
164
+ output_lock,
165
+ )
166
+
167
+ return AgentResult(
168
+ agent_name=agent.name,
169
+ exit_code=proc.returncode or 0,
170
+ duration=time.monotonic() - start,
171
+ stdout="\n".join(stdout_lines),
172
+ stderr="\n".join(stderr_lines),
173
+ )
174
+
175
+ except TimeoutError:
176
+ _kill_proc_tree(proc)
177
+ return AgentResult(
178
+ agent_name=agent.name,
179
+ exit_code=-1,
180
+ duration=time.monotonic() - start,
181
+ stdout="\n".join(stdout_lines),
182
+ stderr="\n".join(stderr_lines),
183
+ error=f"Timed out after {agent.timeout}s",
184
+ )
185
+ except Exception as e:
186
+ _kill_proc_tree(proc)
187
+ return AgentResult(
188
+ agent_name=agent.name,
189
+ exit_code=-1,
190
+ duration=time.monotonic() - start,
191
+ stdout="\n".join(stdout_lines),
192
+ stderr="\n".join(stderr_lines),
193
+ error=str(e),
194
+ )
195
+ finally:
196
+ if prompt_file_path:
197
+ prompt_file_path.unlink(missing_ok=True)
198
+
199
+
200
+ async def _run_remote(
201
+ agent: AgentConfig,
202
+ worktree_path: Path,
203
+ prompt: str,
204
+ console: Console,
205
+ color: str,
206
+ output_lock: asyncio.Lock,
207
+ ) -> AgentResult:
208
+ """Rsync to remote, run agent via SSH, rsync results back."""
209
+ start = time.monotonic()
210
+ remote_dir = f"{agent.remote_workdir}/{agent.name}"
211
+ prefix = f"[{color}]\\[{agent.name}][/{color}]"
212
+ stdout_lines: list[str] = []
213
+ stderr_lines: list[str] = []
214
+
215
+ try:
216
+ # 1. Push worktree to remote
217
+ async with output_lock:
218
+ console.print(f" {prefix} [dim]syncing to {agent.host}:{remote_dir}[/dim]")
219
+ await asyncio.to_thread(_rsync_to_remote, worktree_path, agent.host, remote_dir)
220
+
221
+ # 2. Build and run command over SSH
222
+ cmd, prompt_file_path, _pipe_stdin = _prepare_command(agent, prompt)
223
+ ssh_cmd = _build_ssh_command(agent, remote_dir, cmd)
224
+
225
+ proc = await asyncio.create_subprocess_shell(
226
+ ssh_cmd,
227
+ stdout=asyncio.subprocess.PIPE,
228
+ stderr=asyncio.subprocess.PIPE,
229
+ start_new_session=True,
230
+ )
231
+
232
+ await _stream_and_wait(
233
+ proc,
234
+ agent.timeout,
235
+ stdout_lines,
236
+ stderr_lines,
237
+ console,
238
+ prefix,
239
+ output_lock,
240
+ )
241
+ exit_code = proc.returncode or 0
242
+
243
+ # 3. Pull results back
244
+ async with output_lock:
245
+ console.print(
246
+ f" {prefix} [dim]syncing from {agent.host}:{remote_dir}[/dim]"
247
+ )
248
+ await asyncio.to_thread(
249
+ _rsync_from_remote, agent.host, remote_dir, worktree_path
250
+ )
251
+
252
+ if prompt_file_path:
253
+ prompt_file_path.unlink(missing_ok=True)
254
+
255
+ return AgentResult(
256
+ agent_name=agent.name,
257
+ exit_code=exit_code,
258
+ duration=time.monotonic() - start,
259
+ stdout="\n".join(stdout_lines),
260
+ stderr="\n".join(stderr_lines),
261
+ )
262
+
263
+ except TimeoutError:
264
+ return AgentResult(
265
+ agent_name=agent.name,
266
+ exit_code=-1,
267
+ duration=time.monotonic() - start,
268
+ stdout="\n".join(stdout_lines),
269
+ stderr="\n".join(stderr_lines),
270
+ error=f"Timed out after {agent.timeout}s",
271
+ )
272
+ except Exception as e:
273
+ return AgentResult(
274
+ agent_name=agent.name,
275
+ exit_code=-1,
276
+ duration=time.monotonic() - start,
277
+ stdout="\n".join(stdout_lines),
278
+ stderr="\n".join(stderr_lines),
279
+ error=str(e),
280
+ )
281
+
282
+
283
+ # ── shared streaming helper ───────────────────────────────────────────
284
+
285
+
286
+ async def _stream_and_wait(
287
+ proc: asyncio.subprocess.Process,
288
+ timeout: int,
289
+ stdout_lines: list[str],
290
+ stderr_lines: list[str],
291
+ console: Console,
292
+ prefix: str,
293
+ output_lock: asyncio.Lock,
294
+ ) -> None:
295
+ """Stream stdout/stderr and wait, raising TimeoutError on expiry."""
296
+
297
+ async def _read(
298
+ stream: asyncio.StreamReader | None,
299
+ lines: list[str],
300
+ is_err: bool,
301
+ ) -> None:
302
+ if stream is None:
303
+ return
304
+ while True:
305
+ raw = await stream.readline()
306
+ if not raw:
307
+ break
308
+ line = raw.decode("utf-8", errors="replace").rstrip()
309
+ lines.append(line)
310
+ # Escape Rich markup in agent output to avoid parse errors
311
+ safe = rich.markup.escape(line)
312
+ async with output_lock:
313
+ if is_err:
314
+ console.print(f" {prefix} [dim]{safe}[/dim]")
315
+ else:
316
+ console.print(f" {prefix} {safe}")
317
+
318
+ try:
319
+ await asyncio.wait_for(
320
+ asyncio.gather(
321
+ _read(proc.stdout, stdout_lines, False),
322
+ _read(proc.stderr, stderr_lines, True),
323
+ ),
324
+ timeout=timeout,
325
+ )
326
+ await proc.wait()
327
+ except asyncio.TimeoutError as exc:
328
+ _kill_proc_tree(proc)
329
+ raise TimeoutError from exc
330
+
331
+
332
+ def _kill_proc_tree(
333
+ proc: asyncio.subprocess.Process | None,
334
+ ) -> None:
335
+ """Kill a process and its entire process group."""
336
+ if proc is None or proc.returncode is not None:
337
+ return
338
+ try:
339
+ os.killpg(proc.pid, signal.SIGKILL)
340
+ except (ProcessLookupError, PermissionError):
341
+ proc.kill()
agenttester/cli.py ADDED
@@ -0,0 +1,177 @@
1
+ """CLI entry point."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import urllib.error
7
+ from pathlib import Path
8
+ from typing import Annotated
9
+
10
+ import typer
11
+ from rich.console import Console
12
+
13
+ from .config import load_config
14
+ from .orchestrator import Orchestrator
15
+ from .repl import run_repl
16
+ from .vllm import query as _vllm_query
17
+
18
+ app = typer.Typer(
19
+ name="agenttester",
20
+ help="Send a prompt to multiple coding agents in parallel and compare results.",
21
+ no_args_is_help=True,
22
+ )
23
+ console = Console()
24
+
25
+
26
+ def _parse_agent_names(raw: list[str]) -> list[str]:
27
+ """Flatten comma-separated and repeated --agents values."""
28
+ names: list[str] = []
29
+ for entry in raw:
30
+ names.extend(n.strip() for n in entry.split(",") if n.strip())
31
+ return names
32
+
33
+
34
+ @app.command()
35
+ def run(
36
+ prompt: Annotated[
37
+ str | None, typer.Argument(help="Prompt to send to each agent")
38
+ ] = None,
39
+ agents: Annotated[
40
+ list[str] | None,
41
+ typer.Option(
42
+ "--agents",
43
+ "-a",
44
+ help="Agent names (comma-separated or repeated)",
45
+ ),
46
+ ] = None,
47
+ prompt_file: Annotated[
48
+ Path | None,
49
+ typer.Option("--prompt-file", "-f", help="Read prompt from a file"),
50
+ ] = None,
51
+ keep_worktrees: Annotated[
52
+ bool,
53
+ typer.Option("--keep-worktrees", help="Keep worktrees after the run"),
54
+ ] = False,
55
+ config: Annotated[
56
+ Path | None,
57
+ typer.Option("--config", "-c", help="Path to config YAML file"),
58
+ ] = None,
59
+ timeout: Annotated[
60
+ int | None,
61
+ typer.Option(
62
+ "--timeout",
63
+ "-t",
64
+ help="Override timeout for all agents (seconds)",
65
+ ),
66
+ ] = None,
67
+ repo: Annotated[
68
+ Path | None,
69
+ typer.Option("--repo", "-r", help="Path to target git repo (default: cwd)"),
70
+ ] = None,
71
+ ) -> None:
72
+ """Run agents in parallel on a prompt and compare results."""
73
+ # Resolve prompt
74
+ if prompt_file:
75
+ if not prompt_file.exists():
76
+ console.print(f"[red]Prompt file not found: {prompt_file}[/red]")
77
+ raise typer.Exit(1)
78
+ prompt_text = prompt_file.read_text().strip()
79
+ elif prompt:
80
+ prompt_text = prompt
81
+ else:
82
+ console.print("[red]Provide a prompt or --prompt-file[/red]")
83
+ raise typer.Exit(1)
84
+
85
+ # Resolve agents
86
+ if not agents:
87
+ console.print("[red]Specify at least one agent with --agents[/red]")
88
+ raise typer.Exit(1)
89
+
90
+ agent_names = _parse_agent_names(agents)
91
+ if len(agent_names) > 5:
92
+ console.print("[red]Maximum 5 agents allowed[/red]")
93
+ raise typer.Exit(1)
94
+
95
+ # Load config and resolve agent objects
96
+ all_agents = load_config(config)
97
+ selected = []
98
+ for name in agent_names:
99
+ if name not in all_agents:
100
+ console.print(
101
+ f"[red]Unknown agent: {name}[/red]\n"
102
+ f"Available: {', '.join(sorted(all_agents))}"
103
+ )
104
+ raise typer.Exit(1)
105
+ agent_cfg = all_agents[name]
106
+ if timeout is not None:
107
+ agent_cfg.timeout = timeout
108
+ selected.append(agent_cfg)
109
+
110
+ # Run
111
+ repo_path = (repo or Path.cwd()).resolve()
112
+ orchestrator = Orchestrator(repo_path, console)
113
+
114
+ try:
115
+ asyncio.run(
116
+ orchestrator.run(prompt_text, selected, keep_worktrees=keep_worktrees)
117
+ )
118
+ except RuntimeError as e:
119
+ console.print(f"[red]{e}[/red]")
120
+ raise typer.Exit(1) from e
121
+
122
+
123
+ @app.command()
124
+ def query(
125
+ endpoint: Annotated[
126
+ str, typer.Argument(help="vLLM server endpoint (http://HOST:PORT)")
127
+ ],
128
+ model_id: Annotated[str, typer.Argument(help="Model ID served by the endpoint")],
129
+ prompt: Annotated[str, typer.Argument(help="Prompt to send")],
130
+ max_tokens: Annotated[
131
+ int, typer.Option("--max-tokens", help="Maximum tokens to generate")
132
+ ] = 2048,
133
+ ) -> None:
134
+ """Query a vLLM model server and print the response."""
135
+ try:
136
+ result = _vllm_query(
137
+ endpoint, model_id, [{"role": "user", "content": prompt}], max_tokens
138
+ )
139
+ console.print(result)
140
+ except urllib.error.HTTPError as e:
141
+ body = e.read().decode(errors="replace")
142
+ console.print(f"[red]HTTP {e.code}: {body}[/red]")
143
+ raise typer.Exit(1) from e
144
+ except OSError as e:
145
+ console.print(f"[red]{e}[/red]")
146
+ raise typer.Exit(1) from e
147
+
148
+
149
+ @app.command()
150
+ def repl(
151
+ config: Annotated[
152
+ Path | None,
153
+ typer.Option("--config", "-c", help="Path to config YAML file"),
154
+ ] = None,
155
+ ) -> None:
156
+ """Start an interactive REPL across all vLLM model agents."""
157
+ asyncio.run(run_repl(config))
158
+
159
+
160
+ @app.command("agents")
161
+ def list_agents(
162
+ config: Annotated[
163
+ Path | None,
164
+ typer.Option("--config", "-c", help="Path to config YAML file"),
165
+ ] = None,
166
+ ) -> None:
167
+ """List available agents."""
168
+ all_agents = load_config(config)
169
+ console.print("[bold]Available agents:[/bold]\n")
170
+ for name, agent in sorted(all_agents.items()):
171
+ preset_names = ("claude", "aider", "codex")
172
+ tag = "[dim](preset)[/dim]" if name in preset_names else ""
173
+ console.print(f" [bold]{name}[/bold] {tag}")
174
+ console.print(f" command: [dim]{agent.command}[/dim]")
175
+ console.print(f" host: {agent.host}")
176
+ console.print(f" commit: {agent.commit_style} timeout: {agent.timeout}s")
177
+ console.print()
agenttester/config.py ADDED
@@ -0,0 +1,65 @@
1
+ """Agent configuration and loading."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+
8
+ import yaml
9
+
10
+ from .presets import PRESETS
11
+
12
+ CONFIG_CANDIDATES = [
13
+ "agenttester.yaml",
14
+ "agenttester.yml",
15
+ ".agenttester.yaml",
16
+ ".agenttester.yml",
17
+ ]
18
+
19
+
20
+ @dataclass
21
+ class AgentConfig:
22
+ """Configuration for a single coding agent."""
23
+
24
+ name: str
25
+ command: str
26
+ host: str = "localhost"
27
+ remote_workdir: str = "/tmp/agenttester"
28
+ commit_style: str = "auto" # "auto" (agent commits) or "manual" (we commit)
29
+ env: dict[str, str] = field(default_factory=dict)
30
+ timeout: int = 600 # seconds
31
+
32
+ @property
33
+ def is_remote(self) -> bool:
34
+ """True when the agent runs on a non-local host."""
35
+ return self.host != "localhost"
36
+
37
+
38
+ def load_config(config_path: Path | None = None) -> dict[str, AgentConfig]:
39
+ """Load agent configs from YAML, merged with built-in presets."""
40
+ agents: dict[str, AgentConfig] = {}
41
+ for name, preset in PRESETS.items():
42
+ agents[name] = AgentConfig(name=name, **preset)
43
+
44
+ if config_path is None:
45
+ for candidate in CONFIG_CANDIDATES:
46
+ p = Path(candidate)
47
+ if p.exists():
48
+ config_path = p
49
+ break
50
+
51
+ if config_path and config_path.exists():
52
+ with open(config_path) as f:
53
+ data = yaml.safe_load(f) or {}
54
+ for name, agent_data in (data.get("agents") or {}).items():
55
+ agents[name] = AgentConfig(
56
+ name=name,
57
+ command=agent_data["command"],
58
+ host=agent_data.get("host", "localhost"),
59
+ remote_workdir=agent_data.get("remote_workdir", "/tmp/agenttester"),
60
+ commit_style=agent_data.get("commit_style", "auto"),
61
+ env=agent_data.get("env", {}),
62
+ timeout=agent_data.get("timeout", 600),
63
+ )
64
+
65
+ return agents
@@ -0,0 +1,123 @@
1
+ """Git worktree and branch management."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import os
7
+ import re
8
+ import shlex
9
+ from dataclasses import dataclass, field
10
+ from pathlib import Path
11
+
12
+ import git
13
+ from git.exc import GitCommandError
14
+
15
+
16
+ @dataclass
17
+ class DiffStats:
18
+ """Diff statistics between base ref and an agent's branch."""
19
+
20
+ files_changed: int = 0
21
+ insertions: int = 0
22
+ deletions: int = 0
23
+ changed_files: list[str] = field(default_factory=list)
24
+
25
+
26
+ class GitManager:
27
+ """Manages git worktrees and branches for parallel agent runs."""
28
+
29
+ def __init__(self, repo_path: Path) -> None:
30
+ self.repo_path = repo_path
31
+ self.worktree_base = repo_path / ".agenttester" / "worktrees"
32
+ self.repo = git.Repo(repo_path)
33
+ self._apply_env(self.repo)
34
+
35
+ @staticmethod
36
+ def _apply_env(repo: git.Repo) -> None:
37
+ """Inherit the full shell environment so SSH keys and config are available."""
38
+ env = {k: v for k, v in os.environ.items() if k.isidentifier()}
39
+ if "GIT_SSH_COMMAND" not in env:
40
+ ssh_config = Path.home() / ".ssh" / "config"
41
+ if ssh_config.exists():
42
+ env["GIT_SSH_COMMAND"] = f"ssh -F {shlex.quote(str(ssh_config))}"
43
+
44
+ repo.git.update_environment(**env)
45
+
46
+ def has_commits(self) -> bool:
47
+ """Check if the repo has at least one commit."""
48
+ try:
49
+ self.repo.git.rev_parse("HEAD")
50
+ except GitCommandError:
51
+ return False
52
+ return True
53
+
54
+ def get_head_ref(self) -> str:
55
+ """Return the current HEAD commit SHA."""
56
+ return self.repo.head.commit.hexsha
57
+
58
+ def create_worktree(self, agent_name: str, run_id: str) -> Path:
59
+ """Create a worktree with a new branch for an agent run."""
60
+ branch = f"agenttester/{run_id}/{agent_name}"
61
+ worktree_path = self.worktree_base / run_id / agent_name
62
+ worktree_path.parent.mkdir(parents=True, exist_ok=True)
63
+ self.repo.git.worktree("add", "-b", branch, str(worktree_path))
64
+ return worktree_path
65
+
66
+ def commit_all(self, worktree_path: Path, agent_name: str) -> bool:
67
+ """Stage and commit all changes in a worktree.
68
+
69
+ Returns True if a commit was created.
70
+ """
71
+ wt_repo = git.Repo(worktree_path)
72
+ self._apply_env(wt_repo)
73
+ wt_repo.git.add("-A")
74
+ if not wt_repo.index.diff("HEAD"):
75
+ return False
76
+ wt_repo.index.commit(f"agenttester: {agent_name} changes")
77
+ return True
78
+
79
+ def get_diff_stats(self, run_id: str, agent_name: str, base_ref: str) -> DiffStats:
80
+ """Get diff statistics between the base ref and an agent's branch."""
81
+ branch = f"agenttester/{run_id}/{agent_name}"
82
+ try:
83
+ stat_line = self.repo.git.diff("--shortstat", base_ref, branch)
84
+
85
+ files_changed = insertions = deletions = 0
86
+ if stat_line:
87
+ if m := re.search(r"(\d+) file", stat_line):
88
+ files_changed = int(m.group(1))
89
+ if m := re.search(r"(\d+) insertion", stat_line):
90
+ insertions = int(m.group(1))
91
+ if m := re.search(r"(\d+) deletion", stat_line):
92
+ deletions = int(m.group(1))
93
+
94
+ name_only = self.repo.git.diff("--name-only", base_ref, branch)
95
+ changed_files = [f for f in name_only.strip().split("\n") if f]
96
+
97
+ return DiffStats(
98
+ files_changed=files_changed,
99
+ insertions=insertions,
100
+ deletions=deletions,
101
+ changed_files=changed_files,
102
+ )
103
+ except GitCommandError:
104
+ return DiffStats()
105
+
106
+ def cleanup_worktree(self, run_id: str, agent_name: str) -> None:
107
+ """Remove a single worktree."""
108
+ worktree_path = self.worktree_base / run_id / agent_name
109
+ if worktree_path.exists():
110
+ self.repo.git.worktree("remove", str(worktree_path), "--force")
111
+
112
+ def cleanup_run(self, run_id: str) -> None:
113
+ """Remove all worktrees for a run. Branches are preserved."""
114
+ run_dir = self.worktree_base / run_id
115
+ if not run_dir.exists():
116
+ return
117
+ for agent_dir in sorted(run_dir.iterdir()):
118
+ if agent_dir.is_dir():
119
+ with contextlib.suppress(GitCommandError):
120
+ self.repo.git.worktree("remove", str(agent_dir), "--force")
121
+ for d in (run_dir, self.worktree_base):
122
+ with contextlib.suppress(OSError):
123
+ d.rmdir()
@@ -0,0 +1,135 @@
1
+ """Orchestrate parallel agent runs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import uuid
7
+ from pathlib import Path
8
+
9
+ from rich.console import Console
10
+
11
+ from .agent_runner import AgentResult, run_agent
12
+ from .config import AgentConfig
13
+ from .git_manager import GitManager
14
+ from .report import generate_report
15
+
16
+ AGENT_COLORS = ["cyan", "green", "yellow", "magenta", "blue"]
17
+ MAX_CONCURRENT = 5
18
+
19
+
20
+ class Orchestrator:
21
+ """Run multiple agents in parallel, each in its own worktree."""
22
+
23
+ def __init__(self, repo_path: Path, console: Console) -> None:
24
+ self.repo_path = repo_path
25
+ self.git = GitManager(repo_path)
26
+ self.console = console
27
+ self.semaphore = asyncio.Semaphore(MAX_CONCURRENT)
28
+
29
+ async def run(
30
+ self,
31
+ prompt: str,
32
+ agents: list[AgentConfig],
33
+ *,
34
+ keep_worktrees: bool = False,
35
+ ) -> list[AgentResult]:
36
+ """Execute a prompt across all agents and produce a comparison report."""
37
+ if not self.git.has_commits():
38
+ msg = (
39
+ "Repository has no commits. "
40
+ "Create an initial commit before running agents."
41
+ )
42
+ raise RuntimeError(msg)
43
+
44
+ if len(agents) > MAX_CONCURRENT:
45
+ msg = f"Maximum {MAX_CONCURRENT} agents allowed, got {len(agents)}"
46
+ raise RuntimeError(msg)
47
+
48
+ run_id = uuid.uuid4().hex[:8]
49
+ base_ref = self.git.get_head_ref()
50
+
51
+ self.console.print(
52
+ f"[bold]Starting run [cyan]{run_id}[/cyan] "
53
+ f"with {len(agents)} agent(s) from [dim]{base_ref[:12]}[/dim][/bold]\n"
54
+ )
55
+
56
+ # Create worktrees
57
+ worktrees: dict[str, Path] = {}
58
+ for agent in agents:
59
+ try:
60
+ wt = self.git.create_worktree(agent.name, run_id)
61
+ worktrees[agent.name] = wt
62
+ self.console.print(f" [dim]worktree ready:[/dim] {agent.name} → {wt}")
63
+ except Exception as e:
64
+ self.console.print(
65
+ f" [red]Failed to create worktree for {agent.name}: {e}[/red]"
66
+ )
67
+
68
+ self.console.print()
69
+
70
+ # Run agents concurrently
71
+ output_lock = asyncio.Lock()
72
+
73
+ async def _run_one(agent: AgentConfig, color: str) -> AgentResult:
74
+ wt = worktrees.get(agent.name)
75
+ if not wt:
76
+ return AgentResult(
77
+ agent.name, -1, 0.0, "", "", "Worktree creation failed"
78
+ )
79
+ async with self.semaphore:
80
+ result = await run_agent(
81
+ agent, wt, prompt, self.console, color, output_lock
82
+ )
83
+ # Auto-commit for agents that don't commit themselves
84
+ if agent.commit_style == "manual" and result.exit_code == 0:
85
+ try:
86
+ committed = self.git.commit_all(wt, agent.name)
87
+ if committed:
88
+ self.console.print(
89
+ f" [dim]Auto-committed changes for {agent.name}[/dim]"
90
+ )
91
+ except Exception as e:
92
+ self.console.print(
93
+ f" [yellow]Warning: auto-commit failed for "
94
+ f"{agent.name}: {e}[/yellow]"
95
+ )
96
+ return result
97
+
98
+ tasks = [
99
+ _run_one(agent, AGENT_COLORS[i % len(AGENT_COLORS)])
100
+ for i, agent in enumerate(agents)
101
+ ]
102
+ raw_results = await asyncio.gather(*tasks, return_exceptions=True)
103
+
104
+ # Normalize results
105
+ results: list[AgentResult] = []
106
+ for i, r in enumerate(raw_results):
107
+ if isinstance(r, BaseException):
108
+ results.append(AgentResult(agents[i].name, -1, 0.0, "", "", str(r)))
109
+ else:
110
+ results.append(r)
111
+
112
+ # Print summary
113
+ self.console.print("\n[bold]Results:[/bold]")
114
+ for r in results:
115
+ icon = "✅" if r.exit_code == 0 else "❌"
116
+ self.console.print(
117
+ f" {icon} [bold]{r.agent_name}[/bold] "
118
+ f"— {r.duration:.1f}s, exit {r.exit_code}"
119
+ + (f" ({r.error})" if r.error else "")
120
+ )
121
+
122
+ # Generate report
123
+ report = generate_report(run_id, base_ref, prompt, results, self.git)
124
+ report_path = self.repo_path / f"agenttester-report-{run_id}.md"
125
+ report_path.write_text(report)
126
+ self.console.print(f"\n[bold]Report:[/bold] {report_path}")
127
+
128
+ # Cleanup
129
+ if keep_worktrees:
130
+ self.console.print("[dim]Worktrees kept for inspection.[/dim]")
131
+ else:
132
+ self.git.cleanup_run(run_id)
133
+ self.console.print("[dim]Worktrees removed. Branches preserved.[/dim]")
134
+
135
+ return results
agenttester/presets.py ADDED
@@ -0,0 +1,25 @@
1
+ """Built-in agent command presets."""
2
+
3
+ from __future__ import annotations
4
+
5
+ PRESETS: dict[str, dict] = {
6
+ "claude": {
7
+ "command": (
8
+ "claude -p {prompt}"
9
+ ' --allowedTools "Bash,Read,Edit"'
10
+ " --permission-mode acceptEdits"
11
+ ),
12
+ "commit_style": "auto",
13
+ "timeout": 600,
14
+ },
15
+ "aider": {
16
+ "command": "aider --yes-always --no-auto-commits --message {prompt}",
17
+ "commit_style": "manual",
18
+ "timeout": 600,
19
+ },
20
+ "codex": {
21
+ "command": "codex exec --sandbox danger-full-access {prompt}",
22
+ "commit_style": "auto",
23
+ "timeout": 600,
24
+ },
25
+ }
agenttester/repl.py ADDED
@@ -0,0 +1,121 @@
1
+ """Interactive multi-model REPL with persistent conversation history."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import re
7
+ import urllib.error
8
+ from dataclasses import dataclass, field
9
+ from pathlib import Path
10
+
11
+ import yaml
12
+ from rich.console import Console
13
+ from rich.panel import Panel
14
+ from rich.prompt import Prompt
15
+
16
+ from .config import CONFIG_CANDIDATES
17
+ from .vllm import query as _vllm_query
18
+
19
+ _COMMAND_PATTERN = re.compile(
20
+ r"agenttester\s+query\s+(https?://\S+)\s+(\S+)\s+\{prompt\}"
21
+ )
22
+
23
+
24
+ @dataclass
25
+ class Model:
26
+ name: str
27
+ endpoint: str
28
+ model_id: str
29
+ messages: list[dict] = field(default_factory=list)
30
+
31
+
32
+ def load_models(config_path: Path | None = None) -> dict[str, Model]:
33
+ if config_path is None:
34
+ for candidate in CONFIG_CANDIDATES:
35
+ p = Path(candidate)
36
+ if p.exists():
37
+ config_path = p
38
+ break
39
+
40
+ if not config_path or not config_path.exists():
41
+ return {}
42
+
43
+ with open(config_path) as f:
44
+ data = yaml.safe_load(f) or {}
45
+
46
+ models: dict[str, Model] = {}
47
+ for name, agent_data in (data.get("agents") or {}).items():
48
+ m = _COMMAND_PATTERN.search(agent_data.get("command", ""))
49
+ if m:
50
+ models[name] = Model(name=name, endpoint=m.group(1), model_id=m.group(2))
51
+
52
+ return models
53
+
54
+
55
+ def _query_sync(model: Model, prompt: str, max_tokens: int = 2048) -> str:
56
+ model.messages.append({"role": "user", "content": prompt})
57
+ try:
58
+ reply = _vllm_query(model.endpoint, model.model_id, model.messages, max_tokens)
59
+ except urllib.error.HTTPError as e:
60
+ body = e.read().decode(errors="replace")
61
+ model.messages.pop()
62
+ return f"[error] HTTP {e.code}: {body}"
63
+ except OSError as e:
64
+ model.messages.pop()
65
+ return f"[error] {e}"
66
+ model.messages.append({"role": "assistant", "content": reply})
67
+ return reply
68
+
69
+
70
+ async def _query_all(models: dict[str, Model], prompt: str) -> dict[str, str]:
71
+ tasks = {
72
+ name: asyncio.to_thread(_query_sync, model, prompt)
73
+ for name, model in models.items()
74
+ }
75
+ results = await asyncio.gather(*tasks.values(), return_exceptions=True)
76
+ return {
77
+ name: str(r) if isinstance(r, Exception) else r
78
+ for name, r in zip(tasks.keys(), results, strict=True)
79
+ }
80
+
81
+
82
+ async def run_repl(config_path: Path | None = None) -> None:
83
+ console = Console()
84
+ models = load_models(config_path)
85
+ if not models:
86
+ console.print("[red]No vLLM model agents found in config.[/red]")
87
+ console.print(
88
+ "Add agents using 'agenttester query' commands to your agenttester.yaml."
89
+ )
90
+ return
91
+
92
+ console.print(f"[bold]Models:[/bold] {', '.join(models)}")
93
+ console.print(
94
+ "[dim]Commands: /reset (clear history), exit or Ctrl-C to quit[/dim]\n"
95
+ )
96
+
97
+ while True:
98
+ try:
99
+ prompt = Prompt.ask("[bold cyan]>[/bold cyan]")
100
+ except (EOFError, KeyboardInterrupt):
101
+ console.print("\n[dim]bye[/dim]")
102
+ break
103
+
104
+ prompt = prompt.strip()
105
+ if not prompt:
106
+ continue
107
+ if prompt == "exit":
108
+ break
109
+ if prompt == "/reset":
110
+ for model in models.values():
111
+ model.messages.clear()
112
+ console.print("[dim]Context cleared.[/dim]\n")
113
+ continue
114
+
115
+ console.print()
116
+ responses = await _query_all(models, prompt)
117
+ for name, reply in responses.items():
118
+ console.print(
119
+ Panel(reply, title=f"[bold]{name}[/bold]", border_style="blue")
120
+ )
121
+ console.print()
agenttester/report.py ADDED
@@ -0,0 +1,75 @@
1
+ """Generate markdown comparison reports."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime, timezone
6
+
7
+ from .agent_runner import AgentResult
8
+ from .git_manager import GitManager
9
+
10
+
11
+ def generate_report(
12
+ run_id: str,
13
+ base_ref: str,
14
+ prompt: str,
15
+ results: list[AgentResult],
16
+ git: GitManager,
17
+ ) -> str:
18
+ """Build a markdown report comparing agent results."""
19
+ lines: list[str] = [
20
+ f"# AgentTester Report: {run_id}",
21
+ "",
22
+ f"**Base ref**: `{base_ref[:12]}`",
23
+ f"**Date**: {datetime.now(tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}",
24
+ f"**Agents**: {', '.join(r.agent_name for r in results)}",
25
+ "",
26
+ "## Prompt",
27
+ "",
28
+ "```",
29
+ prompt,
30
+ "```",
31
+ "",
32
+ "## Summary",
33
+ "",
34
+ "| Agent | Status | Duration | Files | Insertions | Deletions |",
35
+ "|-------|--------|----------|-------|------------|-----------|",
36
+ ]
37
+
38
+ for r in results:
39
+ stats = git.get_diff_stats(run_id, r.agent_name, base_ref)
40
+ status = "✅" if r.exit_code == 0 else "❌"
41
+ if r.error:
42
+ status += f" {r.error}"
43
+ lines.append(
44
+ f"| {r.agent_name} | {status} | {r.duration:.1f}s "
45
+ f"| {stats.files_changed} | +{stats.insertions} "
46
+ f"| -{stats.deletions} |"
47
+ )
48
+
49
+ lines.append("")
50
+
51
+ # Per-agent details
52
+ for r in results:
53
+ stats = git.get_diff_stats(run_id, r.agent_name, base_ref)
54
+
55
+ lines.extend(
56
+ [
57
+ f"## {r.agent_name}",
58
+ "",
59
+ f"**Branch**: `agenttester/{run_id}/{r.agent_name}`",
60
+ f"**Duration**: {r.duration:.1f}s",
61
+ f"**Exit code**: {r.exit_code}",
62
+ ]
63
+ )
64
+
65
+ if r.error:
66
+ lines.append(f"**Error**: {r.error}")
67
+
68
+ if stats.changed_files:
69
+ lines.extend(["", "### Files Changed", ""])
70
+ for f in stats.changed_files:
71
+ lines.append(f"- `{f}`")
72
+
73
+ lines.append("")
74
+
75
+ return "\n".join(lines)
agenttester/vllm.py ADDED
@@ -0,0 +1,35 @@
1
+ """HTTP client for vLLM OpenAI-compatible inference servers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import urllib.error
7
+ import urllib.request
8
+
9
+
10
+ def query(
11
+ endpoint: str,
12
+ model_id: str,
13
+ messages: list[dict],
14
+ max_tokens: int = 2048,
15
+ timeout: int = 120,
16
+ ) -> str:
17
+ """Send a chat completion request and return the response text.
18
+
19
+ Raises urllib.error.HTTPError or OSError on failure.
20
+ """
21
+ payload = json.dumps(
22
+ {
23
+ "model": model_id,
24
+ "messages": messages,
25
+ "max_tokens": max_tokens,
26
+ }
27
+ ).encode()
28
+ req = urllib.request.Request(
29
+ f"{endpoint.rstrip('/')}/v1/chat/completions",
30
+ data=payload,
31
+ headers={"Content-Type": "application/json"},
32
+ )
33
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
34
+ data = json.loads(resp.read())
35
+ return data["choices"][0]["message"]["content"]
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.4
2
+ Name: agenttester
3
+ Version: 0.1.0
4
+ Summary: Send prompts to multiple coding agents in parallel and compare results
5
+ License-Expression: MIT
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.10
8
+ Requires-Dist: gitpython>=3.1
9
+ Requires-Dist: pyyaml>=6.0
10
+ Requires-Dist: rich>=13.0
11
+ Requires-Dist: typer>=0.9
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
14
+ Requires-Dist: pytest>=8.0; extra == 'dev'
15
+ Requires-Dist: ruff>=0.4; extra == 'dev'
@@ -0,0 +1,15 @@
1
+ agenttester/__init__.py,sha256=auNtvZAfdLaH5OG43PwjKq-TuUFxAUj_EB7h51pVNGM,551
2
+ agenttester/agent_runner.py,sha256=h6etvHJ2lFHA5i5wQyFUpC2Pdlo3zYxcqODfkXMRiqI,9939
3
+ agenttester/cli.py,sha256=YoX-GvqXokk9Q0dXOO7S5-02ZZAVloAaL5BgnR8h6yA,5422
4
+ agenttester/config.py,sha256=1TvBiY6YqkKNPpZUOAfCWuvqemEJ7KiD4rWUHU9dRVU,1938
5
+ agenttester/git_manager.py,sha256=0brFkO0kaRtyL0FL-9vcOwoeas8sb5mZ1oGgIvvp_vg,4513
6
+ agenttester/orchestrator.py,sha256=1TyvjvY8FbeGJylbUSPRP_NCkaQGY1dqsjboiJLNYiE,4771
7
+ agenttester/presets.py,sha256=WAYXJDJD6iKY4ga8rCfeZ23MbB2UY20beQJ4NR5fie0,649
8
+ agenttester/repl.py,sha256=Lg3De4IwWVs6eJETQIGSArhOyBf_vmagrb6c7JWnrf4,3615
9
+ agenttester/report.py,sha256=C1CtErdsQYUh7c6Xxfg_FW9ZVEyTnbZ0i8aeiK-vXIg,2084
10
+ agenttester/vllm.py,sha256=h3abkbHWxqCW0evNOzG68PhTxzz4ore-rlGKAX8ug64,917
11
+ agenttester-0.1.0.dist-info/METADATA,sha256=rUBqZbXVWTjD_U_P_C9ocTac1htkNvuzgmDg4zQAxvY,470
12
+ agenttester-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
13
+ agenttester-0.1.0.dist-info/entry_points.txt,sha256=KUIZqIbLGFCwo_dG9bfLGTO52aFGeoIcAMmapqozvMM,52
14
+ agenttester-0.1.0.dist-info/licenses/LICENSE,sha256=t1iGtmPs5Z0ZhwTcbgnw4nUIDhoDPWlOLwSIw5GLJKo,1066
15
+ agenttester-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ agenttester = agenttester.cli:app
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 sroomberg
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.