agenttester 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agenttester/__init__.py +24 -0
- agenttester/agent_runner.py +341 -0
- agenttester/cli.py +177 -0
- agenttester/config.py +65 -0
- agenttester/git_manager.py +123 -0
- agenttester/orchestrator.py +135 -0
- agenttester/presets.py +25 -0
- agenttester/repl.py +121 -0
- agenttester/report.py +75 -0
- agenttester/vllm.py +35 -0
- agenttester-0.1.0.dist-info/METADATA +15 -0
- agenttester-0.1.0.dist-info/RECORD +15 -0
- agenttester-0.1.0.dist-info/WHEEL +4 -0
- agenttester-0.1.0.dist-info/entry_points.txt +2 -0
- agenttester-0.1.0.dist-info/licenses/LICENSE +21 -0
agenttester/__init__.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""AgentTester: Multi-agent comparison tool.
|
|
2
|
+
|
|
3
|
+
Usable as a CLI (``agenttester run …``), a Docker container, or a Python
|
|
4
|
+
library::
|
|
5
|
+
|
|
6
|
+
from agenttester import Orchestrator, AgentConfig, load_config
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .agent_runner import AgentResult, run_agent
|
|
10
|
+
from .config import AgentConfig, load_config
|
|
11
|
+
from .git_manager import DiffStats, GitManager
|
|
12
|
+
from .orchestrator import Orchestrator
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"AgentConfig",
|
|
16
|
+
"AgentResult",
|
|
17
|
+
"DiffStats",
|
|
18
|
+
"GitManager",
|
|
19
|
+
"Orchestrator",
|
|
20
|
+
"load_config",
|
|
21
|
+
"run_agent",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
"""Run a single agent process in a worktree."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import os
|
|
7
|
+
import shlex
|
|
8
|
+
import signal
|
|
9
|
+
import subprocess
|
|
10
|
+
import tempfile
|
|
11
|
+
import time
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
import rich.markup
|
|
16
|
+
from rich.console import Console
|
|
17
|
+
|
|
18
|
+
from .config import AgentConfig
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class AgentResult:
|
|
23
|
+
"""Result of a single agent run."""
|
|
24
|
+
|
|
25
|
+
agent_name: str
|
|
26
|
+
exit_code: int
|
|
27
|
+
duration: float
|
|
28
|
+
stdout: str
|
|
29
|
+
stderr: str
|
|
30
|
+
error: str | None = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ── helpers for remote execution ──────────────────────────────────────
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _rsync_to_remote(local: Path, host: str, remote_dir: str) -> None:
|
|
37
|
+
"""Push a local directory to a remote host via rsync."""
|
|
38
|
+
subprocess.run(
|
|
39
|
+
[
|
|
40
|
+
"rsync",
|
|
41
|
+
"-az",
|
|
42
|
+
"--delete",
|
|
43
|
+
f"{local}/",
|
|
44
|
+
f"{host}:{remote_dir}/",
|
|
45
|
+
],
|
|
46
|
+
check=True,
|
|
47
|
+
capture_output=True,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _rsync_from_remote(host: str, remote_dir: str, local: Path) -> None:
|
|
52
|
+
"""Pull a remote directory back to local via rsync."""
|
|
53
|
+
subprocess.run(
|
|
54
|
+
[
|
|
55
|
+
"rsync",
|
|
56
|
+
"-az",
|
|
57
|
+
"--delete",
|
|
58
|
+
f"{host}:{remote_dir}/",
|
|
59
|
+
f"{local}/",
|
|
60
|
+
],
|
|
61
|
+
check=True,
|
|
62
|
+
capture_output=True,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _build_ssh_command(
|
|
67
|
+
agent: AgentConfig,
|
|
68
|
+
remote_dir: str,
|
|
69
|
+
cmd: str,
|
|
70
|
+
) -> str:
|
|
71
|
+
"""Wrap *cmd* in an SSH invocation on the agent's host."""
|
|
72
|
+
env_exports = ""
|
|
73
|
+
if agent.env:
|
|
74
|
+
parts = " ".join(f"{k}={shlex.quote(v)}" for k, v in agent.env.items())
|
|
75
|
+
env_exports = f"export {parts} && "
|
|
76
|
+
inner = f"{env_exports}cd {shlex.quote(remote_dir)} && {cmd}"
|
|
77
|
+
return f"ssh {agent.host} {shlex.quote(inner)}"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# ── core entry point ──────────────────────────────────────────────────
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _prepare_command(agent: AgentConfig, prompt: str) -> tuple[str, Path | None, bool]:
|
|
84
|
+
"""Substitute placeholders and decide stdin mode.
|
|
85
|
+
|
|
86
|
+
Returns (final_cmd, prompt_file_path | None, pipe_stdin).
|
|
87
|
+
"""
|
|
88
|
+
cmd = agent.command
|
|
89
|
+
prompt_file_path: Path | None = None
|
|
90
|
+
|
|
91
|
+
if "{prompt_file}" in cmd:
|
|
92
|
+
fd, path_str = tempfile.mkstemp(suffix=".md", prefix="agenttester-prompt-")
|
|
93
|
+
prompt_file_path = Path(path_str)
|
|
94
|
+
with os.fdopen(fd, "w") as f:
|
|
95
|
+
f.write(prompt)
|
|
96
|
+
cmd = cmd.replace("{prompt_file}", path_str)
|
|
97
|
+
|
|
98
|
+
if "{prompt}" in cmd:
|
|
99
|
+
cmd = cmd.replace("{prompt}", shlex.quote(prompt))
|
|
100
|
+
|
|
101
|
+
has_placeholder = "{prompt}" in agent.command or "{prompt_file}" in agent.command
|
|
102
|
+
return cmd, prompt_file_path, not has_placeholder
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
async def run_agent(
|
|
106
|
+
agent: AgentConfig,
|
|
107
|
+
worktree_path: Path,
|
|
108
|
+
prompt: str,
|
|
109
|
+
console: Console,
|
|
110
|
+
color: str,
|
|
111
|
+
output_lock: asyncio.Lock,
|
|
112
|
+
) -> AgentResult:
|
|
113
|
+
"""Run an agent locally or on a remote host."""
|
|
114
|
+
if agent.is_remote:
|
|
115
|
+
return await _run_remote(
|
|
116
|
+
agent, worktree_path, prompt, console, color, output_lock
|
|
117
|
+
)
|
|
118
|
+
return await _run_local(agent, worktree_path, prompt, console, color, output_lock)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
async def _run_local(
|
|
122
|
+
agent: AgentConfig,
|
|
123
|
+
worktree_path: Path,
|
|
124
|
+
prompt: str,
|
|
125
|
+
console: Console,
|
|
126
|
+
color: str,
|
|
127
|
+
output_lock: asyncio.Lock,
|
|
128
|
+
) -> AgentResult:
|
|
129
|
+
"""Run an agent as a local subprocess."""
|
|
130
|
+
start = time.monotonic()
|
|
131
|
+
cmd, prompt_file_path, pipe_stdin = _prepare_command(agent, prompt)
|
|
132
|
+
|
|
133
|
+
full_env = os.environ.copy()
|
|
134
|
+
full_env.update(agent.env)
|
|
135
|
+
|
|
136
|
+
prefix = f"[{color}]\\[{agent.name}][/{color}]"
|
|
137
|
+
stdout_lines: list[str] = []
|
|
138
|
+
stderr_lines: list[str] = []
|
|
139
|
+
|
|
140
|
+
proc: asyncio.subprocess.Process | None = None
|
|
141
|
+
try:
|
|
142
|
+
proc = await asyncio.create_subprocess_shell(
|
|
143
|
+
cmd,
|
|
144
|
+
stdin=asyncio.subprocess.PIPE if pipe_stdin else None,
|
|
145
|
+
stdout=asyncio.subprocess.PIPE,
|
|
146
|
+
stderr=asyncio.subprocess.PIPE,
|
|
147
|
+
cwd=worktree_path,
|
|
148
|
+
env=full_env,
|
|
149
|
+
start_new_session=True,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
if pipe_stdin and proc.stdin:
|
|
153
|
+
proc.stdin.write(prompt.encode())
|
|
154
|
+
await proc.stdin.drain()
|
|
155
|
+
proc.stdin.close()
|
|
156
|
+
|
|
157
|
+
await _stream_and_wait(
|
|
158
|
+
proc,
|
|
159
|
+
agent.timeout,
|
|
160
|
+
stdout_lines,
|
|
161
|
+
stderr_lines,
|
|
162
|
+
console,
|
|
163
|
+
prefix,
|
|
164
|
+
output_lock,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
return AgentResult(
|
|
168
|
+
agent_name=agent.name,
|
|
169
|
+
exit_code=proc.returncode or 0,
|
|
170
|
+
duration=time.monotonic() - start,
|
|
171
|
+
stdout="\n".join(stdout_lines),
|
|
172
|
+
stderr="\n".join(stderr_lines),
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
except TimeoutError:
|
|
176
|
+
_kill_proc_tree(proc)
|
|
177
|
+
return AgentResult(
|
|
178
|
+
agent_name=agent.name,
|
|
179
|
+
exit_code=-1,
|
|
180
|
+
duration=time.monotonic() - start,
|
|
181
|
+
stdout="\n".join(stdout_lines),
|
|
182
|
+
stderr="\n".join(stderr_lines),
|
|
183
|
+
error=f"Timed out after {agent.timeout}s",
|
|
184
|
+
)
|
|
185
|
+
except Exception as e:
|
|
186
|
+
_kill_proc_tree(proc)
|
|
187
|
+
return AgentResult(
|
|
188
|
+
agent_name=agent.name,
|
|
189
|
+
exit_code=-1,
|
|
190
|
+
duration=time.monotonic() - start,
|
|
191
|
+
stdout="\n".join(stdout_lines),
|
|
192
|
+
stderr="\n".join(stderr_lines),
|
|
193
|
+
error=str(e),
|
|
194
|
+
)
|
|
195
|
+
finally:
|
|
196
|
+
if prompt_file_path:
|
|
197
|
+
prompt_file_path.unlink(missing_ok=True)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
async def _run_remote(
|
|
201
|
+
agent: AgentConfig,
|
|
202
|
+
worktree_path: Path,
|
|
203
|
+
prompt: str,
|
|
204
|
+
console: Console,
|
|
205
|
+
color: str,
|
|
206
|
+
output_lock: asyncio.Lock,
|
|
207
|
+
) -> AgentResult:
|
|
208
|
+
"""Rsync to remote, run agent via SSH, rsync results back."""
|
|
209
|
+
start = time.monotonic()
|
|
210
|
+
remote_dir = f"{agent.remote_workdir}/{agent.name}"
|
|
211
|
+
prefix = f"[{color}]\\[{agent.name}][/{color}]"
|
|
212
|
+
stdout_lines: list[str] = []
|
|
213
|
+
stderr_lines: list[str] = []
|
|
214
|
+
|
|
215
|
+
try:
|
|
216
|
+
# 1. Push worktree to remote
|
|
217
|
+
async with output_lock:
|
|
218
|
+
console.print(f" {prefix} [dim]syncing to {agent.host}:{remote_dir}[/dim]")
|
|
219
|
+
await asyncio.to_thread(_rsync_to_remote, worktree_path, agent.host, remote_dir)
|
|
220
|
+
|
|
221
|
+
# 2. Build and run command over SSH
|
|
222
|
+
cmd, prompt_file_path, _pipe_stdin = _prepare_command(agent, prompt)
|
|
223
|
+
ssh_cmd = _build_ssh_command(agent, remote_dir, cmd)
|
|
224
|
+
|
|
225
|
+
proc = await asyncio.create_subprocess_shell(
|
|
226
|
+
ssh_cmd,
|
|
227
|
+
stdout=asyncio.subprocess.PIPE,
|
|
228
|
+
stderr=asyncio.subprocess.PIPE,
|
|
229
|
+
start_new_session=True,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
await _stream_and_wait(
|
|
233
|
+
proc,
|
|
234
|
+
agent.timeout,
|
|
235
|
+
stdout_lines,
|
|
236
|
+
stderr_lines,
|
|
237
|
+
console,
|
|
238
|
+
prefix,
|
|
239
|
+
output_lock,
|
|
240
|
+
)
|
|
241
|
+
exit_code = proc.returncode or 0
|
|
242
|
+
|
|
243
|
+
# 3. Pull results back
|
|
244
|
+
async with output_lock:
|
|
245
|
+
console.print(
|
|
246
|
+
f" {prefix} [dim]syncing from {agent.host}:{remote_dir}[/dim]"
|
|
247
|
+
)
|
|
248
|
+
await asyncio.to_thread(
|
|
249
|
+
_rsync_from_remote, agent.host, remote_dir, worktree_path
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
if prompt_file_path:
|
|
253
|
+
prompt_file_path.unlink(missing_ok=True)
|
|
254
|
+
|
|
255
|
+
return AgentResult(
|
|
256
|
+
agent_name=agent.name,
|
|
257
|
+
exit_code=exit_code,
|
|
258
|
+
duration=time.monotonic() - start,
|
|
259
|
+
stdout="\n".join(stdout_lines),
|
|
260
|
+
stderr="\n".join(stderr_lines),
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
except TimeoutError:
|
|
264
|
+
return AgentResult(
|
|
265
|
+
agent_name=agent.name,
|
|
266
|
+
exit_code=-1,
|
|
267
|
+
duration=time.monotonic() - start,
|
|
268
|
+
stdout="\n".join(stdout_lines),
|
|
269
|
+
stderr="\n".join(stderr_lines),
|
|
270
|
+
error=f"Timed out after {agent.timeout}s",
|
|
271
|
+
)
|
|
272
|
+
except Exception as e:
|
|
273
|
+
return AgentResult(
|
|
274
|
+
agent_name=agent.name,
|
|
275
|
+
exit_code=-1,
|
|
276
|
+
duration=time.monotonic() - start,
|
|
277
|
+
stdout="\n".join(stdout_lines),
|
|
278
|
+
stderr="\n".join(stderr_lines),
|
|
279
|
+
error=str(e),
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
# ── shared streaming helper ───────────────────────────────────────────
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
async def _stream_and_wait(
|
|
287
|
+
proc: asyncio.subprocess.Process,
|
|
288
|
+
timeout: int,
|
|
289
|
+
stdout_lines: list[str],
|
|
290
|
+
stderr_lines: list[str],
|
|
291
|
+
console: Console,
|
|
292
|
+
prefix: str,
|
|
293
|
+
output_lock: asyncio.Lock,
|
|
294
|
+
) -> None:
|
|
295
|
+
"""Stream stdout/stderr and wait, raising TimeoutError on expiry."""
|
|
296
|
+
|
|
297
|
+
async def _read(
|
|
298
|
+
stream: asyncio.StreamReader | None,
|
|
299
|
+
lines: list[str],
|
|
300
|
+
is_err: bool,
|
|
301
|
+
) -> None:
|
|
302
|
+
if stream is None:
|
|
303
|
+
return
|
|
304
|
+
while True:
|
|
305
|
+
raw = await stream.readline()
|
|
306
|
+
if not raw:
|
|
307
|
+
break
|
|
308
|
+
line = raw.decode("utf-8", errors="replace").rstrip()
|
|
309
|
+
lines.append(line)
|
|
310
|
+
# Escape Rich markup in agent output to avoid parse errors
|
|
311
|
+
safe = rich.markup.escape(line)
|
|
312
|
+
async with output_lock:
|
|
313
|
+
if is_err:
|
|
314
|
+
console.print(f" {prefix} [dim]{safe}[/dim]")
|
|
315
|
+
else:
|
|
316
|
+
console.print(f" {prefix} {safe}")
|
|
317
|
+
|
|
318
|
+
try:
|
|
319
|
+
await asyncio.wait_for(
|
|
320
|
+
asyncio.gather(
|
|
321
|
+
_read(proc.stdout, stdout_lines, False),
|
|
322
|
+
_read(proc.stderr, stderr_lines, True),
|
|
323
|
+
),
|
|
324
|
+
timeout=timeout,
|
|
325
|
+
)
|
|
326
|
+
await proc.wait()
|
|
327
|
+
except asyncio.TimeoutError as exc:
|
|
328
|
+
_kill_proc_tree(proc)
|
|
329
|
+
raise TimeoutError from exc
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def _kill_proc_tree(
|
|
333
|
+
proc: asyncio.subprocess.Process | None,
|
|
334
|
+
) -> None:
|
|
335
|
+
"""Kill a process and its entire process group."""
|
|
336
|
+
if proc is None or proc.returncode is not None:
|
|
337
|
+
return
|
|
338
|
+
try:
|
|
339
|
+
os.killpg(proc.pid, signal.SIGKILL)
|
|
340
|
+
except (ProcessLookupError, PermissionError):
|
|
341
|
+
proc.kill()
|
agenttester/cli.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""CLI entry point."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import urllib.error
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Annotated
|
|
9
|
+
|
|
10
|
+
import typer
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
|
|
13
|
+
from .config import load_config
|
|
14
|
+
from .orchestrator import Orchestrator
|
|
15
|
+
from .repl import run_repl
|
|
16
|
+
from .vllm import query as _vllm_query
|
|
17
|
+
|
|
18
|
+
app = typer.Typer(
|
|
19
|
+
name="agenttester",
|
|
20
|
+
help="Send a prompt to multiple coding agents in parallel and compare results.",
|
|
21
|
+
no_args_is_help=True,
|
|
22
|
+
)
|
|
23
|
+
console = Console()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _parse_agent_names(raw: list[str]) -> list[str]:
|
|
27
|
+
"""Flatten comma-separated and repeated --agents values."""
|
|
28
|
+
names: list[str] = []
|
|
29
|
+
for entry in raw:
|
|
30
|
+
names.extend(n.strip() for n in entry.split(",") if n.strip())
|
|
31
|
+
return names
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@app.command()
|
|
35
|
+
def run(
|
|
36
|
+
prompt: Annotated[
|
|
37
|
+
str | None, typer.Argument(help="Prompt to send to each agent")
|
|
38
|
+
] = None,
|
|
39
|
+
agents: Annotated[
|
|
40
|
+
list[str] | None,
|
|
41
|
+
typer.Option(
|
|
42
|
+
"--agents",
|
|
43
|
+
"-a",
|
|
44
|
+
help="Agent names (comma-separated or repeated)",
|
|
45
|
+
),
|
|
46
|
+
] = None,
|
|
47
|
+
prompt_file: Annotated[
|
|
48
|
+
Path | None,
|
|
49
|
+
typer.Option("--prompt-file", "-f", help="Read prompt from a file"),
|
|
50
|
+
] = None,
|
|
51
|
+
keep_worktrees: Annotated[
|
|
52
|
+
bool,
|
|
53
|
+
typer.Option("--keep-worktrees", help="Keep worktrees after the run"),
|
|
54
|
+
] = False,
|
|
55
|
+
config: Annotated[
|
|
56
|
+
Path | None,
|
|
57
|
+
typer.Option("--config", "-c", help="Path to config YAML file"),
|
|
58
|
+
] = None,
|
|
59
|
+
timeout: Annotated[
|
|
60
|
+
int | None,
|
|
61
|
+
typer.Option(
|
|
62
|
+
"--timeout",
|
|
63
|
+
"-t",
|
|
64
|
+
help="Override timeout for all agents (seconds)",
|
|
65
|
+
),
|
|
66
|
+
] = None,
|
|
67
|
+
repo: Annotated[
|
|
68
|
+
Path | None,
|
|
69
|
+
typer.Option("--repo", "-r", help="Path to target git repo (default: cwd)"),
|
|
70
|
+
] = None,
|
|
71
|
+
) -> None:
|
|
72
|
+
"""Run agents in parallel on a prompt and compare results."""
|
|
73
|
+
# Resolve prompt
|
|
74
|
+
if prompt_file:
|
|
75
|
+
if not prompt_file.exists():
|
|
76
|
+
console.print(f"[red]Prompt file not found: {prompt_file}[/red]")
|
|
77
|
+
raise typer.Exit(1)
|
|
78
|
+
prompt_text = prompt_file.read_text().strip()
|
|
79
|
+
elif prompt:
|
|
80
|
+
prompt_text = prompt
|
|
81
|
+
else:
|
|
82
|
+
console.print("[red]Provide a prompt or --prompt-file[/red]")
|
|
83
|
+
raise typer.Exit(1)
|
|
84
|
+
|
|
85
|
+
# Resolve agents
|
|
86
|
+
if not agents:
|
|
87
|
+
console.print("[red]Specify at least one agent with --agents[/red]")
|
|
88
|
+
raise typer.Exit(1)
|
|
89
|
+
|
|
90
|
+
agent_names = _parse_agent_names(agents)
|
|
91
|
+
if len(agent_names) > 5:
|
|
92
|
+
console.print("[red]Maximum 5 agents allowed[/red]")
|
|
93
|
+
raise typer.Exit(1)
|
|
94
|
+
|
|
95
|
+
# Load config and resolve agent objects
|
|
96
|
+
all_agents = load_config(config)
|
|
97
|
+
selected = []
|
|
98
|
+
for name in agent_names:
|
|
99
|
+
if name not in all_agents:
|
|
100
|
+
console.print(
|
|
101
|
+
f"[red]Unknown agent: {name}[/red]\n"
|
|
102
|
+
f"Available: {', '.join(sorted(all_agents))}"
|
|
103
|
+
)
|
|
104
|
+
raise typer.Exit(1)
|
|
105
|
+
agent_cfg = all_agents[name]
|
|
106
|
+
if timeout is not None:
|
|
107
|
+
agent_cfg.timeout = timeout
|
|
108
|
+
selected.append(agent_cfg)
|
|
109
|
+
|
|
110
|
+
# Run
|
|
111
|
+
repo_path = (repo or Path.cwd()).resolve()
|
|
112
|
+
orchestrator = Orchestrator(repo_path, console)
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
asyncio.run(
|
|
116
|
+
orchestrator.run(prompt_text, selected, keep_worktrees=keep_worktrees)
|
|
117
|
+
)
|
|
118
|
+
except RuntimeError as e:
|
|
119
|
+
console.print(f"[red]{e}[/red]")
|
|
120
|
+
raise typer.Exit(1) from e
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@app.command()
|
|
124
|
+
def query(
|
|
125
|
+
endpoint: Annotated[
|
|
126
|
+
str, typer.Argument(help="vLLM server endpoint (http://HOST:PORT)")
|
|
127
|
+
],
|
|
128
|
+
model_id: Annotated[str, typer.Argument(help="Model ID served by the endpoint")],
|
|
129
|
+
prompt: Annotated[str, typer.Argument(help="Prompt to send")],
|
|
130
|
+
max_tokens: Annotated[
|
|
131
|
+
int, typer.Option("--max-tokens", help="Maximum tokens to generate")
|
|
132
|
+
] = 2048,
|
|
133
|
+
) -> None:
|
|
134
|
+
"""Query a vLLM model server and print the response."""
|
|
135
|
+
try:
|
|
136
|
+
result = _vllm_query(
|
|
137
|
+
endpoint, model_id, [{"role": "user", "content": prompt}], max_tokens
|
|
138
|
+
)
|
|
139
|
+
console.print(result)
|
|
140
|
+
except urllib.error.HTTPError as e:
|
|
141
|
+
body = e.read().decode(errors="replace")
|
|
142
|
+
console.print(f"[red]HTTP {e.code}: {body}[/red]")
|
|
143
|
+
raise typer.Exit(1) from e
|
|
144
|
+
except OSError as e:
|
|
145
|
+
console.print(f"[red]{e}[/red]")
|
|
146
|
+
raise typer.Exit(1) from e
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@app.command()
|
|
150
|
+
def repl(
|
|
151
|
+
config: Annotated[
|
|
152
|
+
Path | None,
|
|
153
|
+
typer.Option("--config", "-c", help="Path to config YAML file"),
|
|
154
|
+
] = None,
|
|
155
|
+
) -> None:
|
|
156
|
+
"""Start an interactive REPL across all vLLM model agents."""
|
|
157
|
+
asyncio.run(run_repl(config))
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@app.command("agents")
|
|
161
|
+
def list_agents(
|
|
162
|
+
config: Annotated[
|
|
163
|
+
Path | None,
|
|
164
|
+
typer.Option("--config", "-c", help="Path to config YAML file"),
|
|
165
|
+
] = None,
|
|
166
|
+
) -> None:
|
|
167
|
+
"""List available agents."""
|
|
168
|
+
all_agents = load_config(config)
|
|
169
|
+
console.print("[bold]Available agents:[/bold]\n")
|
|
170
|
+
for name, agent in sorted(all_agents.items()):
|
|
171
|
+
preset_names = ("claude", "aider", "codex")
|
|
172
|
+
tag = "[dim](preset)[/dim]" if name in preset_names else ""
|
|
173
|
+
console.print(f" [bold]{name}[/bold] {tag}")
|
|
174
|
+
console.print(f" command: [dim]{agent.command}[/dim]")
|
|
175
|
+
console.print(f" host: {agent.host}")
|
|
176
|
+
console.print(f" commit: {agent.commit_style} timeout: {agent.timeout}s")
|
|
177
|
+
console.print()
|
agenttester/config.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Agent configuration and loading."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
|
|
10
|
+
from .presets import PRESETS
|
|
11
|
+
|
|
12
|
+
CONFIG_CANDIDATES = [
|
|
13
|
+
"agenttester.yaml",
|
|
14
|
+
"agenttester.yml",
|
|
15
|
+
".agenttester.yaml",
|
|
16
|
+
".agenttester.yml",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class AgentConfig:
|
|
22
|
+
"""Configuration for a single coding agent."""
|
|
23
|
+
|
|
24
|
+
name: str
|
|
25
|
+
command: str
|
|
26
|
+
host: str = "localhost"
|
|
27
|
+
remote_workdir: str = "/tmp/agenttester"
|
|
28
|
+
commit_style: str = "auto" # "auto" (agent commits) or "manual" (we commit)
|
|
29
|
+
env: dict[str, str] = field(default_factory=dict)
|
|
30
|
+
timeout: int = 600 # seconds
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def is_remote(self) -> bool:
|
|
34
|
+
"""True when the agent runs on a non-local host."""
|
|
35
|
+
return self.host != "localhost"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def load_config(config_path: Path | None = None) -> dict[str, AgentConfig]:
|
|
39
|
+
"""Load agent configs from YAML, merged with built-in presets."""
|
|
40
|
+
agents: dict[str, AgentConfig] = {}
|
|
41
|
+
for name, preset in PRESETS.items():
|
|
42
|
+
agents[name] = AgentConfig(name=name, **preset)
|
|
43
|
+
|
|
44
|
+
if config_path is None:
|
|
45
|
+
for candidate in CONFIG_CANDIDATES:
|
|
46
|
+
p = Path(candidate)
|
|
47
|
+
if p.exists():
|
|
48
|
+
config_path = p
|
|
49
|
+
break
|
|
50
|
+
|
|
51
|
+
if config_path and config_path.exists():
|
|
52
|
+
with open(config_path) as f:
|
|
53
|
+
data = yaml.safe_load(f) or {}
|
|
54
|
+
for name, agent_data in (data.get("agents") or {}).items():
|
|
55
|
+
agents[name] = AgentConfig(
|
|
56
|
+
name=name,
|
|
57
|
+
command=agent_data["command"],
|
|
58
|
+
host=agent_data.get("host", "localhost"),
|
|
59
|
+
remote_workdir=agent_data.get("remote_workdir", "/tmp/agenttester"),
|
|
60
|
+
commit_style=agent_data.get("commit_style", "auto"),
|
|
61
|
+
env=agent_data.get("env", {}),
|
|
62
|
+
timeout=agent_data.get("timeout", 600),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return agents
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""Git worktree and branch management."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import contextlib
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
import shlex
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
import git
|
|
13
|
+
from git.exc import GitCommandError
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class DiffStats:
|
|
18
|
+
"""Diff statistics between base ref and an agent's branch."""
|
|
19
|
+
|
|
20
|
+
files_changed: int = 0
|
|
21
|
+
insertions: int = 0
|
|
22
|
+
deletions: int = 0
|
|
23
|
+
changed_files: list[str] = field(default_factory=list)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class GitManager:
|
|
27
|
+
"""Manages git worktrees and branches for parallel agent runs."""
|
|
28
|
+
|
|
29
|
+
def __init__(self, repo_path: Path) -> None:
|
|
30
|
+
self.repo_path = repo_path
|
|
31
|
+
self.worktree_base = repo_path / ".agenttester" / "worktrees"
|
|
32
|
+
self.repo = git.Repo(repo_path)
|
|
33
|
+
self._apply_env(self.repo)
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def _apply_env(repo: git.Repo) -> None:
|
|
37
|
+
"""Inherit the full shell environment so SSH keys and config are available."""
|
|
38
|
+
env = {k: v for k, v in os.environ.items() if k.isidentifier()}
|
|
39
|
+
if "GIT_SSH_COMMAND" not in env:
|
|
40
|
+
ssh_config = Path.home() / ".ssh" / "config"
|
|
41
|
+
if ssh_config.exists():
|
|
42
|
+
env["GIT_SSH_COMMAND"] = f"ssh -F {shlex.quote(str(ssh_config))}"
|
|
43
|
+
|
|
44
|
+
repo.git.update_environment(**env)
|
|
45
|
+
|
|
46
|
+
def has_commits(self) -> bool:
|
|
47
|
+
"""Check if the repo has at least one commit."""
|
|
48
|
+
try:
|
|
49
|
+
self.repo.git.rev_parse("HEAD")
|
|
50
|
+
except GitCommandError:
|
|
51
|
+
return False
|
|
52
|
+
return True
|
|
53
|
+
|
|
54
|
+
def get_head_ref(self) -> str:
|
|
55
|
+
"""Return the current HEAD commit SHA."""
|
|
56
|
+
return self.repo.head.commit.hexsha
|
|
57
|
+
|
|
58
|
+
def create_worktree(self, agent_name: str, run_id: str) -> Path:
|
|
59
|
+
"""Create a worktree with a new branch for an agent run."""
|
|
60
|
+
branch = f"agenttester/{run_id}/{agent_name}"
|
|
61
|
+
worktree_path = self.worktree_base / run_id / agent_name
|
|
62
|
+
worktree_path.parent.mkdir(parents=True, exist_ok=True)
|
|
63
|
+
self.repo.git.worktree("add", "-b", branch, str(worktree_path))
|
|
64
|
+
return worktree_path
|
|
65
|
+
|
|
66
|
+
def commit_all(self, worktree_path: Path, agent_name: str) -> bool:
|
|
67
|
+
"""Stage and commit all changes in a worktree.
|
|
68
|
+
|
|
69
|
+
Returns True if a commit was created.
|
|
70
|
+
"""
|
|
71
|
+
wt_repo = git.Repo(worktree_path)
|
|
72
|
+
self._apply_env(wt_repo)
|
|
73
|
+
wt_repo.git.add("-A")
|
|
74
|
+
if not wt_repo.index.diff("HEAD"):
|
|
75
|
+
return False
|
|
76
|
+
wt_repo.index.commit(f"agenttester: {agent_name} changes")
|
|
77
|
+
return True
|
|
78
|
+
|
|
79
|
+
def get_diff_stats(self, run_id: str, agent_name: str, base_ref: str) -> DiffStats:
|
|
80
|
+
"""Get diff statistics between the base ref and an agent's branch."""
|
|
81
|
+
branch = f"agenttester/{run_id}/{agent_name}"
|
|
82
|
+
try:
|
|
83
|
+
stat_line = self.repo.git.diff("--shortstat", base_ref, branch)
|
|
84
|
+
|
|
85
|
+
files_changed = insertions = deletions = 0
|
|
86
|
+
if stat_line:
|
|
87
|
+
if m := re.search(r"(\d+) file", stat_line):
|
|
88
|
+
files_changed = int(m.group(1))
|
|
89
|
+
if m := re.search(r"(\d+) insertion", stat_line):
|
|
90
|
+
insertions = int(m.group(1))
|
|
91
|
+
if m := re.search(r"(\d+) deletion", stat_line):
|
|
92
|
+
deletions = int(m.group(1))
|
|
93
|
+
|
|
94
|
+
name_only = self.repo.git.diff("--name-only", base_ref, branch)
|
|
95
|
+
changed_files = [f for f in name_only.strip().split("\n") if f]
|
|
96
|
+
|
|
97
|
+
return DiffStats(
|
|
98
|
+
files_changed=files_changed,
|
|
99
|
+
insertions=insertions,
|
|
100
|
+
deletions=deletions,
|
|
101
|
+
changed_files=changed_files,
|
|
102
|
+
)
|
|
103
|
+
except GitCommandError:
|
|
104
|
+
return DiffStats()
|
|
105
|
+
|
|
106
|
+
def cleanup_worktree(self, run_id: str, agent_name: str) -> None:
|
|
107
|
+
"""Remove a single worktree."""
|
|
108
|
+
worktree_path = self.worktree_base / run_id / agent_name
|
|
109
|
+
if worktree_path.exists():
|
|
110
|
+
self.repo.git.worktree("remove", str(worktree_path), "--force")
|
|
111
|
+
|
|
112
|
+
def cleanup_run(self, run_id: str) -> None:
|
|
113
|
+
"""Remove all worktrees for a run. Branches are preserved."""
|
|
114
|
+
run_dir = self.worktree_base / run_id
|
|
115
|
+
if not run_dir.exists():
|
|
116
|
+
return
|
|
117
|
+
for agent_dir in sorted(run_dir.iterdir()):
|
|
118
|
+
if agent_dir.is_dir():
|
|
119
|
+
with contextlib.suppress(GitCommandError):
|
|
120
|
+
self.repo.git.worktree("remove", str(agent_dir), "--force")
|
|
121
|
+
for d in (run_dir, self.worktree_base):
|
|
122
|
+
with contextlib.suppress(OSError):
|
|
123
|
+
d.rmdir()
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Orchestrate parallel agent runs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import uuid
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
from .agent_runner import AgentResult, run_agent
|
|
12
|
+
from .config import AgentConfig
|
|
13
|
+
from .git_manager import GitManager
|
|
14
|
+
from .report import generate_report
|
|
15
|
+
|
|
16
|
+
AGENT_COLORS = ["cyan", "green", "yellow", "magenta", "blue"]
|
|
17
|
+
MAX_CONCURRENT = 5
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Orchestrator:
|
|
21
|
+
"""Run multiple agents in parallel, each in its own worktree."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, repo_path: Path, console: Console) -> None:
|
|
24
|
+
self.repo_path = repo_path
|
|
25
|
+
self.git = GitManager(repo_path)
|
|
26
|
+
self.console = console
|
|
27
|
+
self.semaphore = asyncio.Semaphore(MAX_CONCURRENT)
|
|
28
|
+
|
|
29
|
+
async def run(
|
|
30
|
+
self,
|
|
31
|
+
prompt: str,
|
|
32
|
+
agents: list[AgentConfig],
|
|
33
|
+
*,
|
|
34
|
+
keep_worktrees: bool = False,
|
|
35
|
+
) -> list[AgentResult]:
|
|
36
|
+
"""Execute a prompt across all agents and produce a comparison report."""
|
|
37
|
+
if not self.git.has_commits():
|
|
38
|
+
msg = (
|
|
39
|
+
"Repository has no commits. "
|
|
40
|
+
"Create an initial commit before running agents."
|
|
41
|
+
)
|
|
42
|
+
raise RuntimeError(msg)
|
|
43
|
+
|
|
44
|
+
if len(agents) > MAX_CONCURRENT:
|
|
45
|
+
msg = f"Maximum {MAX_CONCURRENT} agents allowed, got {len(agents)}"
|
|
46
|
+
raise RuntimeError(msg)
|
|
47
|
+
|
|
48
|
+
run_id = uuid.uuid4().hex[:8]
|
|
49
|
+
base_ref = self.git.get_head_ref()
|
|
50
|
+
|
|
51
|
+
self.console.print(
|
|
52
|
+
f"[bold]Starting run [cyan]{run_id}[/cyan] "
|
|
53
|
+
f"with {len(agents)} agent(s) from [dim]{base_ref[:12]}[/dim][/bold]\n"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Create worktrees
|
|
57
|
+
worktrees: dict[str, Path] = {}
|
|
58
|
+
for agent in agents:
|
|
59
|
+
try:
|
|
60
|
+
wt = self.git.create_worktree(agent.name, run_id)
|
|
61
|
+
worktrees[agent.name] = wt
|
|
62
|
+
self.console.print(f" [dim]worktree ready:[/dim] {agent.name} → {wt}")
|
|
63
|
+
except Exception as e:
|
|
64
|
+
self.console.print(
|
|
65
|
+
f" [red]Failed to create worktree for {agent.name}: {e}[/red]"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
self.console.print()
|
|
69
|
+
|
|
70
|
+
# Run agents concurrently
|
|
71
|
+
output_lock = asyncio.Lock()
|
|
72
|
+
|
|
73
|
+
async def _run_one(agent: AgentConfig, color: str) -> AgentResult:
|
|
74
|
+
wt = worktrees.get(agent.name)
|
|
75
|
+
if not wt:
|
|
76
|
+
return AgentResult(
|
|
77
|
+
agent.name, -1, 0.0, "", "", "Worktree creation failed"
|
|
78
|
+
)
|
|
79
|
+
async with self.semaphore:
|
|
80
|
+
result = await run_agent(
|
|
81
|
+
agent, wt, prompt, self.console, color, output_lock
|
|
82
|
+
)
|
|
83
|
+
# Auto-commit for agents that don't commit themselves
|
|
84
|
+
if agent.commit_style == "manual" and result.exit_code == 0:
|
|
85
|
+
try:
|
|
86
|
+
committed = self.git.commit_all(wt, agent.name)
|
|
87
|
+
if committed:
|
|
88
|
+
self.console.print(
|
|
89
|
+
f" [dim]Auto-committed changes for {agent.name}[/dim]"
|
|
90
|
+
)
|
|
91
|
+
except Exception as e:
|
|
92
|
+
self.console.print(
|
|
93
|
+
f" [yellow]Warning: auto-commit failed for "
|
|
94
|
+
f"{agent.name}: {e}[/yellow]"
|
|
95
|
+
)
|
|
96
|
+
return result
|
|
97
|
+
|
|
98
|
+
tasks = [
|
|
99
|
+
_run_one(agent, AGENT_COLORS[i % len(AGENT_COLORS)])
|
|
100
|
+
for i, agent in enumerate(agents)
|
|
101
|
+
]
|
|
102
|
+
raw_results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
103
|
+
|
|
104
|
+
# Normalize results
|
|
105
|
+
results: list[AgentResult] = []
|
|
106
|
+
for i, r in enumerate(raw_results):
|
|
107
|
+
if isinstance(r, BaseException):
|
|
108
|
+
results.append(AgentResult(agents[i].name, -1, 0.0, "", "", str(r)))
|
|
109
|
+
else:
|
|
110
|
+
results.append(r)
|
|
111
|
+
|
|
112
|
+
# Print summary
|
|
113
|
+
self.console.print("\n[bold]Results:[/bold]")
|
|
114
|
+
for r in results:
|
|
115
|
+
icon = "✅" if r.exit_code == 0 else "❌"
|
|
116
|
+
self.console.print(
|
|
117
|
+
f" {icon} [bold]{r.agent_name}[/bold] "
|
|
118
|
+
f"— {r.duration:.1f}s, exit {r.exit_code}"
|
|
119
|
+
+ (f" ({r.error})" if r.error else "")
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Generate report
|
|
123
|
+
report = generate_report(run_id, base_ref, prompt, results, self.git)
|
|
124
|
+
report_path = self.repo_path / f"agenttester-report-{run_id}.md"
|
|
125
|
+
report_path.write_text(report)
|
|
126
|
+
self.console.print(f"\n[bold]Report:[/bold] {report_path}")
|
|
127
|
+
|
|
128
|
+
# Cleanup
|
|
129
|
+
if keep_worktrees:
|
|
130
|
+
self.console.print("[dim]Worktrees kept for inspection.[/dim]")
|
|
131
|
+
else:
|
|
132
|
+
self.git.cleanup_run(run_id)
|
|
133
|
+
self.console.print("[dim]Worktrees removed. Branches preserved.[/dim]")
|
|
134
|
+
|
|
135
|
+
return results
|
agenttester/presets.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Built-in agent command presets."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
PRESETS: dict[str, dict] = {
|
|
6
|
+
"claude": {
|
|
7
|
+
"command": (
|
|
8
|
+
"claude -p {prompt}"
|
|
9
|
+
' --allowedTools "Bash,Read,Edit"'
|
|
10
|
+
" --permission-mode acceptEdits"
|
|
11
|
+
),
|
|
12
|
+
"commit_style": "auto",
|
|
13
|
+
"timeout": 600,
|
|
14
|
+
},
|
|
15
|
+
"aider": {
|
|
16
|
+
"command": "aider --yes-always --no-auto-commits --message {prompt}",
|
|
17
|
+
"commit_style": "manual",
|
|
18
|
+
"timeout": 600,
|
|
19
|
+
},
|
|
20
|
+
"codex": {
|
|
21
|
+
"command": "codex exec --sandbox danger-full-access {prompt}",
|
|
22
|
+
"commit_style": "auto",
|
|
23
|
+
"timeout": 600,
|
|
24
|
+
},
|
|
25
|
+
}
|
agenttester/repl.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""Interactive multi-model REPL with persistent conversation history."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import re
|
|
7
|
+
import urllib.error
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
import yaml
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
from rich.panel import Panel
|
|
14
|
+
from rich.prompt import Prompt
|
|
15
|
+
|
|
16
|
+
from .config import CONFIG_CANDIDATES
|
|
17
|
+
from .vllm import query as _vllm_query
|
|
18
|
+
|
|
19
|
+
_COMMAND_PATTERN = re.compile(
|
|
20
|
+
r"agenttester\s+query\s+(https?://\S+)\s+(\S+)\s+\{prompt\}"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class Model:
|
|
26
|
+
name: str
|
|
27
|
+
endpoint: str
|
|
28
|
+
model_id: str
|
|
29
|
+
messages: list[dict] = field(default_factory=list)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def load_models(config_path: Path | None = None) -> dict[str, Model]:
|
|
33
|
+
if config_path is None:
|
|
34
|
+
for candidate in CONFIG_CANDIDATES:
|
|
35
|
+
p = Path(candidate)
|
|
36
|
+
if p.exists():
|
|
37
|
+
config_path = p
|
|
38
|
+
break
|
|
39
|
+
|
|
40
|
+
if not config_path or not config_path.exists():
|
|
41
|
+
return {}
|
|
42
|
+
|
|
43
|
+
with open(config_path) as f:
|
|
44
|
+
data = yaml.safe_load(f) or {}
|
|
45
|
+
|
|
46
|
+
models: dict[str, Model] = {}
|
|
47
|
+
for name, agent_data in (data.get("agents") or {}).items():
|
|
48
|
+
m = _COMMAND_PATTERN.search(agent_data.get("command", ""))
|
|
49
|
+
if m:
|
|
50
|
+
models[name] = Model(name=name, endpoint=m.group(1), model_id=m.group(2))
|
|
51
|
+
|
|
52
|
+
return models
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _query_sync(model: Model, prompt: str, max_tokens: int = 2048) -> str:
|
|
56
|
+
model.messages.append({"role": "user", "content": prompt})
|
|
57
|
+
try:
|
|
58
|
+
reply = _vllm_query(model.endpoint, model.model_id, model.messages, max_tokens)
|
|
59
|
+
except urllib.error.HTTPError as e:
|
|
60
|
+
body = e.read().decode(errors="replace")
|
|
61
|
+
model.messages.pop()
|
|
62
|
+
return f"[error] HTTP {e.code}: {body}"
|
|
63
|
+
except OSError as e:
|
|
64
|
+
model.messages.pop()
|
|
65
|
+
return f"[error] {e}"
|
|
66
|
+
model.messages.append({"role": "assistant", "content": reply})
|
|
67
|
+
return reply
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
async def _query_all(models: dict[str, Model], prompt: str) -> dict[str, str]:
|
|
71
|
+
tasks = {
|
|
72
|
+
name: asyncio.to_thread(_query_sync, model, prompt)
|
|
73
|
+
for name, model in models.items()
|
|
74
|
+
}
|
|
75
|
+
results = await asyncio.gather(*tasks.values(), return_exceptions=True)
|
|
76
|
+
return {
|
|
77
|
+
name: str(r) if isinstance(r, Exception) else r
|
|
78
|
+
for name, r in zip(tasks.keys(), results, strict=True)
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
async def run_repl(config_path: Path | None = None) -> None:
|
|
83
|
+
console = Console()
|
|
84
|
+
models = load_models(config_path)
|
|
85
|
+
if not models:
|
|
86
|
+
console.print("[red]No vLLM model agents found in config.[/red]")
|
|
87
|
+
console.print(
|
|
88
|
+
"Add agents using 'agenttester query' commands to your agenttester.yaml."
|
|
89
|
+
)
|
|
90
|
+
return
|
|
91
|
+
|
|
92
|
+
console.print(f"[bold]Models:[/bold] {', '.join(models)}")
|
|
93
|
+
console.print(
|
|
94
|
+
"[dim]Commands: /reset (clear history), exit or Ctrl-C to quit[/dim]\n"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
while True:
|
|
98
|
+
try:
|
|
99
|
+
prompt = Prompt.ask("[bold cyan]>[/bold cyan]")
|
|
100
|
+
except (EOFError, KeyboardInterrupt):
|
|
101
|
+
console.print("\n[dim]bye[/dim]")
|
|
102
|
+
break
|
|
103
|
+
|
|
104
|
+
prompt = prompt.strip()
|
|
105
|
+
if not prompt:
|
|
106
|
+
continue
|
|
107
|
+
if prompt == "exit":
|
|
108
|
+
break
|
|
109
|
+
if prompt == "/reset":
|
|
110
|
+
for model in models.values():
|
|
111
|
+
model.messages.clear()
|
|
112
|
+
console.print("[dim]Context cleared.[/dim]\n")
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
console.print()
|
|
116
|
+
responses = await _query_all(models, prompt)
|
|
117
|
+
for name, reply in responses.items():
|
|
118
|
+
console.print(
|
|
119
|
+
Panel(reply, title=f"[bold]{name}[/bold]", border_style="blue")
|
|
120
|
+
)
|
|
121
|
+
console.print()
|
agenttester/report.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Generate markdown comparison reports."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
|
|
7
|
+
from .agent_runner import AgentResult
|
|
8
|
+
from .git_manager import GitManager
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def generate_report(
|
|
12
|
+
run_id: str,
|
|
13
|
+
base_ref: str,
|
|
14
|
+
prompt: str,
|
|
15
|
+
results: list[AgentResult],
|
|
16
|
+
git: GitManager,
|
|
17
|
+
) -> str:
|
|
18
|
+
"""Build a markdown report comparing agent results."""
|
|
19
|
+
lines: list[str] = [
|
|
20
|
+
f"# AgentTester Report: {run_id}",
|
|
21
|
+
"",
|
|
22
|
+
f"**Base ref**: `{base_ref[:12]}`",
|
|
23
|
+
f"**Date**: {datetime.now(tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}",
|
|
24
|
+
f"**Agents**: {', '.join(r.agent_name for r in results)}",
|
|
25
|
+
"",
|
|
26
|
+
"## Prompt",
|
|
27
|
+
"",
|
|
28
|
+
"```",
|
|
29
|
+
prompt,
|
|
30
|
+
"```",
|
|
31
|
+
"",
|
|
32
|
+
"## Summary",
|
|
33
|
+
"",
|
|
34
|
+
"| Agent | Status | Duration | Files | Insertions | Deletions |",
|
|
35
|
+
"|-------|--------|----------|-------|------------|-----------|",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
for r in results:
|
|
39
|
+
stats = git.get_diff_stats(run_id, r.agent_name, base_ref)
|
|
40
|
+
status = "✅" if r.exit_code == 0 else "❌"
|
|
41
|
+
if r.error:
|
|
42
|
+
status += f" {r.error}"
|
|
43
|
+
lines.append(
|
|
44
|
+
f"| {r.agent_name} | {status} | {r.duration:.1f}s "
|
|
45
|
+
f"| {stats.files_changed} | +{stats.insertions} "
|
|
46
|
+
f"| -{stats.deletions} |"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
lines.append("")
|
|
50
|
+
|
|
51
|
+
# Per-agent details
|
|
52
|
+
for r in results:
|
|
53
|
+
stats = git.get_diff_stats(run_id, r.agent_name, base_ref)
|
|
54
|
+
|
|
55
|
+
lines.extend(
|
|
56
|
+
[
|
|
57
|
+
f"## {r.agent_name}",
|
|
58
|
+
"",
|
|
59
|
+
f"**Branch**: `agenttester/{run_id}/{r.agent_name}`",
|
|
60
|
+
f"**Duration**: {r.duration:.1f}s",
|
|
61
|
+
f"**Exit code**: {r.exit_code}",
|
|
62
|
+
]
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
if r.error:
|
|
66
|
+
lines.append(f"**Error**: {r.error}")
|
|
67
|
+
|
|
68
|
+
if stats.changed_files:
|
|
69
|
+
lines.extend(["", "### Files Changed", ""])
|
|
70
|
+
for f in stats.changed_files:
|
|
71
|
+
lines.append(f"- `{f}`")
|
|
72
|
+
|
|
73
|
+
lines.append("")
|
|
74
|
+
|
|
75
|
+
return "\n".join(lines)
|
agenttester/vllm.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""HTTP client for vLLM OpenAI-compatible inference servers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import urllib.error
|
|
7
|
+
import urllib.request
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def query(
|
|
11
|
+
endpoint: str,
|
|
12
|
+
model_id: str,
|
|
13
|
+
messages: list[dict],
|
|
14
|
+
max_tokens: int = 2048,
|
|
15
|
+
timeout: int = 120,
|
|
16
|
+
) -> str:
|
|
17
|
+
"""Send a chat completion request and return the response text.
|
|
18
|
+
|
|
19
|
+
Raises urllib.error.HTTPError or OSError on failure.
|
|
20
|
+
"""
|
|
21
|
+
payload = json.dumps(
|
|
22
|
+
{
|
|
23
|
+
"model": model_id,
|
|
24
|
+
"messages": messages,
|
|
25
|
+
"max_tokens": max_tokens,
|
|
26
|
+
}
|
|
27
|
+
).encode()
|
|
28
|
+
req = urllib.request.Request(
|
|
29
|
+
f"{endpoint.rstrip('/')}/v1/chat/completions",
|
|
30
|
+
data=payload,
|
|
31
|
+
headers={"Content-Type": "application/json"},
|
|
32
|
+
)
|
|
33
|
+
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
34
|
+
data = json.loads(resp.read())
|
|
35
|
+
return data["choices"][0]["message"]["content"]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agenttester
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Send prompts to multiple coding agents in parallel and compare results
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Requires-Dist: gitpython>=3.1
|
|
9
|
+
Requires-Dist: pyyaml>=6.0
|
|
10
|
+
Requires-Dist: rich>=13.0
|
|
11
|
+
Requires-Dist: typer>=0.9
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
14
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
15
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
agenttester/__init__.py,sha256=auNtvZAfdLaH5OG43PwjKq-TuUFxAUj_EB7h51pVNGM,551
|
|
2
|
+
agenttester/agent_runner.py,sha256=h6etvHJ2lFHA5i5wQyFUpC2Pdlo3zYxcqODfkXMRiqI,9939
|
|
3
|
+
agenttester/cli.py,sha256=YoX-GvqXokk9Q0dXOO7S5-02ZZAVloAaL5BgnR8h6yA,5422
|
|
4
|
+
agenttester/config.py,sha256=1TvBiY6YqkKNPpZUOAfCWuvqemEJ7KiD4rWUHU9dRVU,1938
|
|
5
|
+
agenttester/git_manager.py,sha256=0brFkO0kaRtyL0FL-9vcOwoeas8sb5mZ1oGgIvvp_vg,4513
|
|
6
|
+
agenttester/orchestrator.py,sha256=1TyvjvY8FbeGJylbUSPRP_NCkaQGY1dqsjboiJLNYiE,4771
|
|
7
|
+
agenttester/presets.py,sha256=WAYXJDJD6iKY4ga8rCfeZ23MbB2UY20beQJ4NR5fie0,649
|
|
8
|
+
agenttester/repl.py,sha256=Lg3De4IwWVs6eJETQIGSArhOyBf_vmagrb6c7JWnrf4,3615
|
|
9
|
+
agenttester/report.py,sha256=C1CtErdsQYUh7c6Xxfg_FW9ZVEyTnbZ0i8aeiK-vXIg,2084
|
|
10
|
+
agenttester/vllm.py,sha256=h3abkbHWxqCW0evNOzG68PhTxzz4ore-rlGKAX8ug64,917
|
|
11
|
+
agenttester-0.1.0.dist-info/METADATA,sha256=rUBqZbXVWTjD_U_P_C9ocTac1htkNvuzgmDg4zQAxvY,470
|
|
12
|
+
agenttester-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
13
|
+
agenttester-0.1.0.dist-info/entry_points.txt,sha256=KUIZqIbLGFCwo_dG9bfLGTO52aFGeoIcAMmapqozvMM,52
|
|
14
|
+
agenttester-0.1.0.dist-info/licenses/LICENSE,sha256=t1iGtmPs5Z0ZhwTcbgnw4nUIDhoDPWlOLwSIw5GLJKo,1066
|
|
15
|
+
agenttester-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 sroomberg
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|