agenttester 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agenttester-0.1.0/.dockerignore +9 -0
- agenttester-0.1.0/.github/workflows/ci.yml +19 -0
- agenttester-0.1.0/.github/workflows/publish.yml +27 -0
- agenttester-0.1.0/.gitignore +8 -0
- agenttester-0.1.0/Dockerfile +22 -0
- agenttester-0.1.0/LICENSE +21 -0
- agenttester-0.1.0/PKG-INFO +15 -0
- agenttester-0.1.0/README.md +115 -0
- agenttester-0.1.0/config.example.yaml +68 -0
- agenttester-0.1.0/deploy/Pulumi.dev.yaml +7 -0
- agenttester-0.1.0/deploy/Pulumi.yaml +6 -0
- agenttester-0.1.0/deploy/README.md +97 -0
- agenttester-0.1.0/deploy/__main__.py +144 -0
- agenttester-0.1.0/deploy/requirements.txt +2 -0
- agenttester-0.1.0/deploy/user_data.sh +25 -0
- agenttester-0.1.0/docker-compose.yaml +14 -0
- agenttester-0.1.0/pyproject.toml +40 -0
- agenttester-0.1.0/src/agenttester/__init__.py +24 -0
- agenttester-0.1.0/src/agenttester/agent_runner.py +341 -0
- agenttester-0.1.0/src/agenttester/cli.py +177 -0
- agenttester-0.1.0/src/agenttester/config.py +65 -0
- agenttester-0.1.0/src/agenttester/git_manager.py +123 -0
- agenttester-0.1.0/src/agenttester/orchestrator.py +135 -0
- agenttester-0.1.0/src/agenttester/presets.py +25 -0
- agenttester-0.1.0/src/agenttester/repl.py +121 -0
- agenttester-0.1.0/src/agenttester/report.py +75 -0
- agenttester-0.1.0/src/agenttester/vllm.py +35 -0
- agenttester-0.1.0/tests/conftest.py +45 -0
- agenttester-0.1.0/tests/test_agent_runner.py +292 -0
- agenttester-0.1.0/tests/test_cli.py +79 -0
- agenttester-0.1.0/tests/test_config.py +98 -0
- agenttester-0.1.0/tests/test_git_manager.py +148 -0
- agenttester-0.1.0/tests/test_repl.py +179 -0
- agenttester-0.1.0/tests/test_report.py +82 -0
- agenttester-0.1.0/tests/test_vllm.py +89 -0
- agenttester-0.1.0/uv.lock +395 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: ["**"]
|
|
6
|
+
pull_request:
|
|
7
|
+
workflow_call:
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
- uses: astral-sh/setup-uv@v4
|
|
15
|
+
- run: uv venv
|
|
16
|
+
- run: uv pip install -e ".[dev]"
|
|
17
|
+
- run: uv run ruff check src/ tests/
|
|
18
|
+
- run: uv run ruff format --check src/ tests/
|
|
19
|
+
- run: uv run pytest
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
id-token: write # trusted publishing (no API token needed)
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
test:
|
|
13
|
+
uses: ./.github/workflows/ci.yml
|
|
14
|
+
|
|
15
|
+
publish:
|
|
16
|
+
needs: test
|
|
17
|
+
runs-on: ubuntu-latest
|
|
18
|
+
environment: pypi
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
- uses: astral-sh/setup-uv@v4
|
|
22
|
+
|
|
23
|
+
- name: Build package
|
|
24
|
+
run: uv build
|
|
25
|
+
|
|
26
|
+
- name: Publish to PyPI
|
|
27
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
FROM python:3.12-slim
|
|
2
|
+
|
|
3
|
+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
|
4
|
+
|
|
5
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
6
|
+
git \
|
|
7
|
+
openssh-client \
|
|
8
|
+
rsync \
|
|
9
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
10
|
+
|
|
11
|
+
RUN git config --global user.email "agenttester@container" \
|
|
12
|
+
&& git config --global user.name "agenttester"
|
|
13
|
+
|
|
14
|
+
WORKDIR /app
|
|
15
|
+
COPY . /app
|
|
16
|
+
RUN uv pip install --system --no-cache .
|
|
17
|
+
|
|
18
|
+
# Mount the target repo at /repo
|
|
19
|
+
VOLUME ["/repo"]
|
|
20
|
+
WORKDIR /repo
|
|
21
|
+
|
|
22
|
+
ENTRYPOINT ["agenttester"]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 sroomberg
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agenttester
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Send prompts to multiple coding agents in parallel and compare results
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Requires-Dist: gitpython>=3.1
|
|
9
|
+
Requires-Dist: pyyaml>=6.0
|
|
10
|
+
Requires-Dist: rich>=13.0
|
|
11
|
+
Requires-Dist: typer>=0.9
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
14
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
15
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# AgentTester
|
|
2
|
+
|
|
3
|
+
> **⚠️ Experimental** — This project is under active development. APIs, config format, and CLI flags may change without notice.
|
|
4
|
+
|
|
5
|
+
Send a single prompt to multiple coding agents running in parallel and compare the results. Each agent works in its own [git worktree](https://git-scm.com/docs/git-worktree) on a separate branch so they never interfere with each other.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
uv pip install -e ".[dev]"
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
# List built-in agents
|
|
17
|
+
agenttester agents
|
|
18
|
+
|
|
19
|
+
# Run two agents on the same prompt
|
|
20
|
+
agenttester run "Add unit tests for the auth module" --agents claude,aider
|
|
21
|
+
|
|
22
|
+
# Use a prompt file
|
|
23
|
+
agenttester run --prompt-file task.md --agents claude,codex,aider
|
|
24
|
+
|
|
25
|
+
# Keep worktrees for manual inspection
|
|
26
|
+
agenttester run "Refactor logging" --agents claude,aider --keep-worktrees
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## How It Works
|
|
30
|
+
|
|
31
|
+
1. You provide a prompt and select agents
|
|
32
|
+
2. AgentTester creates a git worktree + branch for each agent from the current HEAD
|
|
33
|
+
3. All agents run concurrently (up to 5), each in its own worktree
|
|
34
|
+
4. Agent output streams to the terminal with colored prefixes
|
|
35
|
+
5. A markdown comparison report is generated with diff stats and timing
|
|
36
|
+
6. Worktrees are cleaned up (branches are preserved for `git diff`)
|
|
37
|
+
|
|
38
|
+
Branches are named `agenttester/<run-id>/<agent-name>` so you can compare results:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
git diff agenttester/a3f2c1d0/claude agenttester/a3f2c1d0/aider
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Configuration
|
|
45
|
+
|
|
46
|
+
Copy `config.example.yaml` to `agenttester.yaml` in your target repo to customize agents. Built-in presets are available for `claude`, `aider`, and `codex`.
|
|
47
|
+
|
|
48
|
+
### Command Placeholders
|
|
49
|
+
|
|
50
|
+
- `{prompt}` — replaced with the shell-escaped prompt text
|
|
51
|
+
- `{prompt_file}` — replaced with a path to a temp file containing the prompt
|
|
52
|
+
- If neither placeholder is present, the prompt is piped to the agent via stdin
|
|
53
|
+
|
|
54
|
+
### Agent Settings
|
|
55
|
+
|
|
56
|
+
| Field | Description | Default |
|
|
57
|
+
|-------|-------------|---------|
|
|
58
|
+
| `command` | Shell command template | (required) |
|
|
59
|
+
| `commit_style` | `auto` (agent commits) or `manual` (agenttester commits) | `auto` |
|
|
60
|
+
| `timeout` | Max seconds before the agent is killed | `600` |
|
|
61
|
+
| `env` | Extra environment variables (key-value map) | `{}` |
|
|
62
|
+
|
|
63
|
+
## Interactive Model REPL
|
|
64
|
+
|
|
65
|
+
For comparing responses from vLLM model servers interactively, with persistent
|
|
66
|
+
conversation history within a session:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
agenttester repl # auto-discovers agenttester.yaml
|
|
70
|
+
agenttester repl --config custom.yaml # explicit config path
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
The REPL discovers any agent in your config whose command uses `query_model.py`,
|
|
74
|
+
fans out each prompt to all of them in parallel, and maintains separate
|
|
75
|
+
conversation history per model. Use `/reset` to clear history or `exit` to quit.
|
|
76
|
+
|
|
77
|
+
See `config.example.yaml` for example vLLM agent entries.
|
|
78
|
+
|
|
79
|
+
## Development
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
uv pip install -e ".[dev]"
|
|
83
|
+
ruff check src/
|
|
84
|
+
ruff format src/
|
|
85
|
+
pytest
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Docker
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
# Run against the current directory
|
|
92
|
+
docker compose run --rm agenttester run "Fix the bug" --agents claude
|
|
93
|
+
|
|
94
|
+
# Run against a different repo
|
|
95
|
+
REPO_PATH=/path/to/repo docker compose run --rm agenttester run "Add tests" --agents claude,aider
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Library Usage
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
import asyncio
|
|
102
|
+
from pathlib import Path
|
|
103
|
+
from rich.console import Console
|
|
104
|
+
from agenttester import Orchestrator, load_config
|
|
105
|
+
|
|
106
|
+
async def main():
|
|
107
|
+
agents = load_config()
|
|
108
|
+
selected = [agents["claude"], agents["aider"]]
|
|
109
|
+
orch = Orchestrator(Path(".").resolve(), Console())
|
|
110
|
+
results = await orch.run("Add unit tests", selected)
|
|
111
|
+
for r in results:
|
|
112
|
+
print(f"{r.agent_name}: exit={r.exit_code} duration={r.duration:.1f}s")
|
|
113
|
+
|
|
114
|
+
asyncio.run(main())
|
|
115
|
+
```
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# AgentTester configuration
|
|
2
|
+
# Copy to agenttester.yaml in your target repo.
|
|
3
|
+
# Entries here override built-in presets (claude, aider, codex).
|
|
4
|
+
#
|
|
5
|
+
# host: "localhost" (default) runs locally; set to "user@server" for SSH.
|
|
6
|
+
|
|
7
|
+
agents:
|
|
8
|
+
claude:
|
|
9
|
+
command: 'claude -p {prompt} --allowedTools "Bash,Read,Edit" --permission-mode acceptEdits'
|
|
10
|
+
host: localhost
|
|
11
|
+
commit_style: auto # agent commits its own changes
|
|
12
|
+
timeout: 600
|
|
13
|
+
|
|
14
|
+
aider:
|
|
15
|
+
command: "aider --yes-always --no-auto-commits --message {prompt}"
|
|
16
|
+
host: localhost
|
|
17
|
+
commit_style: manual # agenttester commits changes after the agent exits
|
|
18
|
+
timeout: 600
|
|
19
|
+
|
|
20
|
+
codex:
|
|
21
|
+
command: "codex exec --sandbox danger-full-access {prompt}"
|
|
22
|
+
host: localhost
|
|
23
|
+
commit_style: auto
|
|
24
|
+
timeout: 600
|
|
25
|
+
|
|
26
|
+
# Example: agent running on a remote GPU box
|
|
27
|
+
# remote-claude:
|
|
28
|
+
# command: 'claude -p {prompt} --allowedTools "Bash,Read,Edit"'
|
|
29
|
+
# host: user@gpu-server
|
|
30
|
+
# remote_workdir: /home/user/agenttester
|
|
31
|
+
# commit_style: auto
|
|
32
|
+
# timeout: 900
|
|
33
|
+
|
|
34
|
+
# Example: custom agent with API key forwarding
|
|
35
|
+
# my-agent:
|
|
36
|
+
# command: "my-agent --input {prompt_file} --auto"
|
|
37
|
+
# host: localhost
|
|
38
|
+
# commit_style: manual
|
|
39
|
+
# timeout: 300
|
|
40
|
+
# env:
|
|
41
|
+
# MY_AGENT_API_KEY: "..."
|
|
42
|
+
|
|
43
|
+
# Example: agent that reads prompt from stdin (no placeholders)
|
|
44
|
+
# stdin-agent:
|
|
45
|
+
# command: "some-agent --non-interactive"
|
|
46
|
+
# commit_style: manual
|
|
47
|
+
|
|
48
|
+
# --- vLLM model servers on a remote EC2 host ---
|
|
49
|
+
# Deploy the host with: python setup/aws_single_host.py (in MultiAgentTrainer)
|
|
50
|
+
# Then replace HOST_IP with the public IP printed by that script.
|
|
51
|
+
|
|
52
|
+
# llama3:
|
|
53
|
+
# command: 'agenttester query http://HOST_IP:8001 meta-llama/Meta-Llama-3-8B-Instruct {prompt}'
|
|
54
|
+
# host: localhost
|
|
55
|
+
# commit_style: manual
|
|
56
|
+
# timeout: 120
|
|
57
|
+
|
|
58
|
+
# mistral:
|
|
59
|
+
# command: 'agenttester query http://HOST_IP:8002 mistralai/Mistral-7B-Instruct-v0.2 {prompt}'
|
|
60
|
+
# host: localhost
|
|
61
|
+
# commit_style: manual
|
|
62
|
+
# timeout: 120
|
|
63
|
+
|
|
64
|
+
# qwen:
|
|
65
|
+
# command: 'agenttester query http://HOST_IP:8003 Qwen/Qwen2.5-7B-Instruct {prompt}'
|
|
66
|
+
# host: localhost
|
|
67
|
+
# commit_style: manual
|
|
68
|
+
# timeout: 120
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# deploy — AWS infrastructure for agenttester
|
|
2
|
+
|
|
3
|
+
Pulumi project that provisions EC2 instances pre-configured to run coding agents (Claude Code, Aider, Codex) over SSH.
|
|
4
|
+
|
|
5
|
+
## Prerequisites
|
|
6
|
+
|
|
7
|
+
- [Pulumi CLI](https://www.pulumi.com/docs/install/)
|
|
8
|
+
- AWS credentials configured (`aws configure` or env vars)
|
|
9
|
+
- An SSH key pair (default: `~/.ssh/id_ed25519`)
|
|
10
|
+
|
|
11
|
+
## Quick start
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
cd deploy
|
|
15
|
+
pulumi stack init dev
|
|
16
|
+
|
|
17
|
+
# Required: restrict SSH access to your IP
|
|
18
|
+
pulumi config set --path 'allowed_ssh_cidrs[0]' "$(curl -s ifconfig.me)/32"
|
|
19
|
+
|
|
20
|
+
pulumi up
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Configuration
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# REQUIRED: CIDR blocks allowed to SSH into the instances
|
|
27
|
+
pulumi config set --path 'allowed_ssh_cidrs[0]' "203.0.113.10/32"
|
|
28
|
+
pulumi config set --path 'allowed_ssh_cidrs[1]' "10.0.0.0/8" # e.g. VPN range
|
|
29
|
+
|
|
30
|
+
# Instance type (default: t3.large)
|
|
31
|
+
pulumi config set instance_type t3.xlarge
|
|
32
|
+
|
|
33
|
+
# Number of instances (default: 1)
|
|
34
|
+
pulumi config set instance_count 3
|
|
35
|
+
|
|
36
|
+
# SSH public key path (default: ~/.ssh/id_ed25519.pub)
|
|
37
|
+
pulumi config set ssh_public_key_path ~/.ssh/my_key.pub
|
|
38
|
+
|
|
39
|
+
# AWS region
|
|
40
|
+
pulumi config set aws:region us-west-2
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Outputs
|
|
44
|
+
|
|
45
|
+
After `pulumi up`, the stack exports:
|
|
46
|
+
|
|
47
|
+
- **`public_ips`** — IP addresses of the instances
|
|
48
|
+
- **`ssh_hosts`** — Ready-to-use `ubuntu@<ip>` strings
|
|
49
|
+
- **`agenttester_yaml_snippet`** — Paste directly into your `agenttester.yaml`
|
|
50
|
+
|
|
51
|
+
Example:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
# Get the config snippet
|
|
55
|
+
pulumi stack output agenttester_yaml_snippet
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Then add it to your target repo's `agenttester.yaml`:
|
|
59
|
+
|
|
60
|
+
```yaml
|
|
61
|
+
agents:
|
|
62
|
+
remote-agent-0:
|
|
63
|
+
command: 'claude -p {prompt} --allowedTools "Bash,Read,Edit"'
|
|
64
|
+
host: ubuntu@3.14.159.26
|
|
65
|
+
remote_workdir: /tmp/agenttester
|
|
66
|
+
commit_style: auto
|
|
67
|
+
timeout: 600
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## What gets provisioned
|
|
71
|
+
|
|
72
|
+
- **EC2 instances** (Ubuntu 22.04) with 50 GB gp3 root volume
|
|
73
|
+
- **Security group** allowing SSH inbound from `allowed_ssh_cidrs` only, all outbound
|
|
74
|
+
- **IAM role + instance profile** with SSM access (for debugging via Session Manager)
|
|
75
|
+
- **Key pair** from your local SSH public key
|
|
76
|
+
- **User data** that installs git, rsync, Node.js 20, Claude Code, Codex CLI, and Aider
|
|
77
|
+
|
|
78
|
+
## API keys
|
|
79
|
+
|
|
80
|
+
The instances don't store API keys. Forward them via the `env` field in `agenttester.yaml`:
|
|
81
|
+
|
|
82
|
+
```yaml
|
|
83
|
+
agents:
|
|
84
|
+
remote-claude:
|
|
85
|
+
command: 'claude -p {prompt} --allowedTools "Bash,Read,Edit"'
|
|
86
|
+
host: ubuntu@3.14.159.26
|
|
87
|
+
env:
|
|
88
|
+
ANTHROPIC_API_KEY: "sk-ant-..."
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Or set them on the instance via SSM Parameter Store / Secrets Manager and source them in your agent command.
|
|
92
|
+
|
|
93
|
+
## Teardown
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
pulumi destroy
|
|
97
|
+
```
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Pulumi program to deploy agenttester remote-execution infrastructure on AWS."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import pulumi
|
|
6
|
+
import pulumi_aws as aws
|
|
7
|
+
|
|
8
|
+
# ── config ────────────────────────────────────────────────────────────
|
|
9
|
+
|
|
10
|
+
config = pulumi.Config()
|
|
11
|
+
instance_type = config.get("instance_type") or "t3.large"
|
|
12
|
+
instance_count = config.get_int("instance_count") or 1
|
|
13
|
+
ssh_pub_key_path = config.get("ssh_public_key_path") or "~/.ssh/id_ed25519.pub"
|
|
14
|
+
allowed_ssh_cidrs = config.require_object("allowed_ssh_cidrs")
|
|
15
|
+
|
|
16
|
+
ssh_pub_key = Path(ssh_pub_key_path).expanduser().read_text().strip()
|
|
17
|
+
|
|
18
|
+
# ── AMI (latest Ubuntu 22.04) ─────────────────────────────────────────
|
|
19
|
+
|
|
20
|
+
ami = aws.ec2.get_ami(
|
|
21
|
+
most_recent=True,
|
|
22
|
+
owners=["099720109477"], # Canonical
|
|
23
|
+
filters=[
|
|
24
|
+
aws.ec2.GetAmiFilterArgs(
|
|
25
|
+
name="name",
|
|
26
|
+
values=["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"],
|
|
27
|
+
),
|
|
28
|
+
aws.ec2.GetAmiFilterArgs(
|
|
29
|
+
name="virtualization-type",
|
|
30
|
+
values=["hvm"],
|
|
31
|
+
),
|
|
32
|
+
],
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# ── key pair ──────────────────────────────────────────────────────────
|
|
36
|
+
|
|
37
|
+
key_pair = aws.ec2.KeyPair(
|
|
38
|
+
"agenttester-key",
|
|
39
|
+
public_key=ssh_pub_key,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# ── security group (SSH only) ─────────────────────────────────────────
|
|
43
|
+
|
|
44
|
+
sg = aws.ec2.SecurityGroup(
|
|
45
|
+
"agenttester-sg",
|
|
46
|
+
description="Allow SSH for agenttester remote agents",
|
|
47
|
+
ingress=[
|
|
48
|
+
aws.ec2.SecurityGroupIngressArgs(
|
|
49
|
+
protocol="tcp",
|
|
50
|
+
from_port=22,
|
|
51
|
+
to_port=22,
|
|
52
|
+
cidr_blocks=allowed_ssh_cidrs,
|
|
53
|
+
description="SSH",
|
|
54
|
+
),
|
|
55
|
+
],
|
|
56
|
+
egress=[
|
|
57
|
+
aws.ec2.SecurityGroupEgressArgs(
|
|
58
|
+
protocol="-1",
|
|
59
|
+
from_port=0,
|
|
60
|
+
to_port=0,
|
|
61
|
+
cidr_blocks=["0.0.0.0/0"],
|
|
62
|
+
description="All outbound",
|
|
63
|
+
),
|
|
64
|
+
],
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# ── IAM role + instance profile ───────────────────────────────────────
|
|
68
|
+
|
|
69
|
+
assume_role_policy = """{
|
|
70
|
+
"Version": "2012-10-17",
|
|
71
|
+
"Statement": [{
|
|
72
|
+
"Effect": "Allow",
|
|
73
|
+
"Principal": {"Service": "ec2.amazonaws.com"},
|
|
74
|
+
"Action": "sts:AssumeRole"
|
|
75
|
+
}]
|
|
76
|
+
}"""
|
|
77
|
+
|
|
78
|
+
role = aws.iam.Role(
|
|
79
|
+
"agenttester-role",
|
|
80
|
+
assume_role_policy=assume_role_policy,
|
|
81
|
+
tags={"Project": "agenttester"},
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# SSM managed policy so you can debug via Session Manager if needed
|
|
85
|
+
aws.iam.RolePolicyAttachment(
|
|
86
|
+
"agenttester-ssm",
|
|
87
|
+
role=role.name,
|
|
88
|
+
policy_arn="arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore",
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
instance_profile = aws.iam.InstanceProfile(
|
|
92
|
+
"agenttester-profile",
|
|
93
|
+
role=role.name,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# ── user data ─────────────────────────────────────────────────────────
|
|
97
|
+
|
|
98
|
+
user_data_script = (Path(__file__).parent / "user_data.sh").read_text()
|
|
99
|
+
|
|
100
|
+
# ── EC2 instances ─────────────────────────────────────────────────────
|
|
101
|
+
|
|
102
|
+
instances = []
|
|
103
|
+
for i in range(instance_count):
|
|
104
|
+
inst = aws.ec2.Instance(
|
|
105
|
+
f"agenttester-agent-{i}",
|
|
106
|
+
ami=ami.id,
|
|
107
|
+
instance_type=instance_type,
|
|
108
|
+
key_name=key_pair.key_name,
|
|
109
|
+
vpc_security_group_ids=[sg.id],
|
|
110
|
+
iam_instance_profile=instance_profile.name,
|
|
111
|
+
user_data=user_data_script,
|
|
112
|
+
root_block_device=aws.ec2.InstanceRootBlockDeviceArgs(
|
|
113
|
+
volume_size=50,
|
|
114
|
+
volume_type="gp3",
|
|
115
|
+
),
|
|
116
|
+
tags={
|
|
117
|
+
"Name": f"agenttester-agent-{i}",
|
|
118
|
+
"Project": "agenttester",
|
|
119
|
+
},
|
|
120
|
+
)
|
|
121
|
+
instances.append(inst)
|
|
122
|
+
|
|
123
|
+
# ── outputs ───────────────────────────────────────────────────────────
|
|
124
|
+
|
|
125
|
+
pulumi.export("instance_ids", [inst.id for inst in instances])
|
|
126
|
+
pulumi.export("public_ips", [inst.public_ip for inst in instances])
|
|
127
|
+
pulumi.export(
|
|
128
|
+
"ssh_hosts",
|
|
129
|
+
[inst.public_ip.apply(lambda ip: f"ubuntu@{ip}") for inst in instances],
|
|
130
|
+
)
|
|
131
|
+
pulumi.export(
|
|
132
|
+
"agenttester_yaml_snippet",
|
|
133
|
+
pulumi.Output.all(*[inst.public_ip for inst in instances]).apply(
|
|
134
|
+
lambda ips: "\n".join(
|
|
135
|
+
f" remote-agent-{i}:\n"
|
|
136
|
+
f' command: \'claude -p {{prompt}} --allowedTools "Bash,Read,Edit"\'\n'
|
|
137
|
+
f" host: ubuntu@{ip}\n"
|
|
138
|
+
f" remote_workdir: /tmp/agenttester\n"
|
|
139
|
+
f" commit_style: auto\n"
|
|
140
|
+
f" timeout: 600"
|
|
141
|
+
for i, ip in enumerate(ips)
|
|
142
|
+
)
|
|
143
|
+
),
|
|
144
|
+
)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
set -euxo pipefail
|
|
3
|
+
|
|
4
|
+
# ── system packages ───────────────────────────────────────────────────
|
|
5
|
+
apt-get update
|
|
6
|
+
apt-get install -y git rsync curl unzip python3-pip python3-venv
|
|
7
|
+
|
|
8
|
+
# ── Node.js 20 (for Claude Code and Codex CLI) ───────────────────────
|
|
9
|
+
curl -fsSL https://deb.nodesource.com/setup_20.x | bash -
|
|
10
|
+
apt-get install -y nodejs
|
|
11
|
+
|
|
12
|
+
# ── Claude Code ───────────────────────────────────────────────────────
|
|
13
|
+
npm install -g @anthropic-ai/claude-code
|
|
14
|
+
|
|
15
|
+
# ── OpenAI Codex CLI ──────────────────────────────────────────────────
|
|
16
|
+
npm install -g @openai/codex
|
|
17
|
+
|
|
18
|
+
# ── Aider ─────────────────────────────────────────────────────────────
|
|
19
|
+
pip3 install --break-system-packages aider-chat
|
|
20
|
+
|
|
21
|
+
# ── workspace directory ───────────────────────────────────────────────
|
|
22
|
+
mkdir -p /tmp/agenttester
|
|
23
|
+
chown ubuntu:ubuntu /tmp/agenttester
|
|
24
|
+
|
|
25
|
+
echo "agenttester agent host ready" > /var/log/agenttester-setup.log
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
services:
|
|
2
|
+
agenttester:
|
|
3
|
+
build: .
|
|
4
|
+
volumes:
|
|
5
|
+
- ${REPO_PATH:-.}:/repo
|
|
6
|
+
# Forward SSH agent for remote execution
|
|
7
|
+
- ${SSH_AUTH_SOCK:-/dev/null}:/ssh-agent
|
|
8
|
+
# Share host SSH config for remote hosts
|
|
9
|
+
- ~/.ssh:/root/.ssh:ro
|
|
10
|
+
environment:
|
|
11
|
+
- SSH_AUTH_SOCK=/ssh-agent
|
|
12
|
+
# Pass API keys through to agents
|
|
13
|
+
- ANTHROPIC_API_KEY
|
|
14
|
+
- OPENAI_API_KEY
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "agenttester"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Send prompts to multiple coding agents in parallel and compare results"
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"typer>=0.9",
|
|
13
|
+
"pyyaml>=6.0",
|
|
14
|
+
"rich>=13.0",
|
|
15
|
+
"gitpython>=3.1",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.optional-dependencies]
|
|
19
|
+
dev = [
|
|
20
|
+
"ruff>=0.4",
|
|
21
|
+
"pytest>=8.0",
|
|
22
|
+
"pytest-asyncio>=0.23",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[project.scripts]
|
|
26
|
+
agenttester = "agenttester.cli:app"
|
|
27
|
+
|
|
28
|
+
[tool.ruff]
|
|
29
|
+
target-version = "py310"
|
|
30
|
+
line-length = 88
|
|
31
|
+
|
|
32
|
+
[tool.ruff.lint]
|
|
33
|
+
select = ["E", "F", "I", "N", "UP", "B", "SIM", "RUF"]
|
|
34
|
+
|
|
35
|
+
[tool.ruff.lint.isort]
|
|
36
|
+
known-first-party = ["agenttester"]
|
|
37
|
+
|
|
38
|
+
[tool.pytest.ini_options]
|
|
39
|
+
asyncio_mode = "auto"
|
|
40
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""AgentTester: Multi-agent comparison tool.
|
|
2
|
+
|
|
3
|
+
Usable as a CLI (``agenttester run …``), a Docker container, or a Python
|
|
4
|
+
library::
|
|
5
|
+
|
|
6
|
+
from agenttester import Orchestrator, AgentConfig, load_config
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .agent_runner import AgentResult, run_agent
|
|
10
|
+
from .config import AgentConfig, load_config
|
|
11
|
+
from .git_manager import DiffStats, GitManager
|
|
12
|
+
from .orchestrator import Orchestrator
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"AgentConfig",
|
|
16
|
+
"AgentResult",
|
|
17
|
+
"DiffStats",
|
|
18
|
+
"GitManager",
|
|
19
|
+
"Orchestrator",
|
|
20
|
+
"load_config",
|
|
21
|
+
"run_agent",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
__version__ = "0.1.0"
|