agenttester 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. agenttester-0.1.0/.dockerignore +9 -0
  2. agenttester-0.1.0/.github/workflows/ci.yml +19 -0
  3. agenttester-0.1.0/.github/workflows/publish.yml +27 -0
  4. agenttester-0.1.0/.gitignore +8 -0
  5. agenttester-0.1.0/Dockerfile +22 -0
  6. agenttester-0.1.0/LICENSE +21 -0
  7. agenttester-0.1.0/PKG-INFO +15 -0
  8. agenttester-0.1.0/README.md +115 -0
  9. agenttester-0.1.0/config.example.yaml +68 -0
  10. agenttester-0.1.0/deploy/Pulumi.dev.yaml +7 -0
  11. agenttester-0.1.0/deploy/Pulumi.yaml +6 -0
  12. agenttester-0.1.0/deploy/README.md +97 -0
  13. agenttester-0.1.0/deploy/__main__.py +144 -0
  14. agenttester-0.1.0/deploy/requirements.txt +2 -0
  15. agenttester-0.1.0/deploy/user_data.sh +25 -0
  16. agenttester-0.1.0/docker-compose.yaml +14 -0
  17. agenttester-0.1.0/pyproject.toml +40 -0
  18. agenttester-0.1.0/src/agenttester/__init__.py +24 -0
  19. agenttester-0.1.0/src/agenttester/agent_runner.py +341 -0
  20. agenttester-0.1.0/src/agenttester/cli.py +177 -0
  21. agenttester-0.1.0/src/agenttester/config.py +65 -0
  22. agenttester-0.1.0/src/agenttester/git_manager.py +123 -0
  23. agenttester-0.1.0/src/agenttester/orchestrator.py +135 -0
  24. agenttester-0.1.0/src/agenttester/presets.py +25 -0
  25. agenttester-0.1.0/src/agenttester/repl.py +121 -0
  26. agenttester-0.1.0/src/agenttester/report.py +75 -0
  27. agenttester-0.1.0/src/agenttester/vllm.py +35 -0
  28. agenttester-0.1.0/tests/conftest.py +45 -0
  29. agenttester-0.1.0/tests/test_agent_runner.py +292 -0
  30. agenttester-0.1.0/tests/test_cli.py +79 -0
  31. agenttester-0.1.0/tests/test_config.py +98 -0
  32. agenttester-0.1.0/tests/test_git_manager.py +148 -0
  33. agenttester-0.1.0/tests/test_repl.py +179 -0
  34. agenttester-0.1.0/tests/test_report.py +82 -0
  35. agenttester-0.1.0/tests/test_vllm.py +89 -0
  36. agenttester-0.1.0/uv.lock +395 -0
@@ -0,0 +1,9 @@
1
+ .git
2
+ .agenttester
3
+ _plans
4
+ __pycache__
5
+ *.egg-info
6
+ dist
7
+ .venv
8
+ *.pyc
9
+ agenttester-report-*.md
@@ -0,0 +1,19 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: ["**"]
6
+ pull_request:
7
+ workflow_call:
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: astral-sh/setup-uv@v4
15
+ - run: uv venv
16
+ - run: uv pip install -e ".[dev]"
17
+ - run: uv run ruff check src/ tests/
18
+ - run: uv run ruff format --check src/ tests/
19
+ - run: uv run pytest
@@ -0,0 +1,27 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ permissions:
9
+ id-token: write # trusted publishing (no API token needed)
10
+
11
+ jobs:
12
+ test:
13
+ uses: ./.github/workflows/ci.yml
14
+
15
+ publish:
16
+ needs: test
17
+ runs-on: ubuntu-latest
18
+ environment: pypi
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+ - uses: astral-sh/setup-uv@v4
22
+
23
+ - name: Build package
24
+ run: uv build
25
+
26
+ - name: Publish to PyPI
27
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,8 @@
1
+ _plans/
2
+ .agenttester/
3
+ agenttester-report-*.md
4
+ __pycache__/
5
+ *.egg-info/
6
+ dist/
7
+ *.pyc
8
+ .venv/
@@ -0,0 +1,22 @@
1
+ FROM python:3.12-slim
2
+
3
+ COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
4
+
5
+ RUN apt-get update && apt-get install -y --no-install-recommends \
6
+ git \
7
+ openssh-client \
8
+ rsync \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ RUN git config --global user.email "agenttester@container" \
12
+ && git config --global user.name "agenttester"
13
+
14
+ WORKDIR /app
15
+ COPY . /app
16
+ RUN uv pip install --system --no-cache .
17
+
18
+ # Mount the target repo at /repo
19
+ VOLUME ["/repo"]
20
+ WORKDIR /repo
21
+
22
+ ENTRYPOINT ["agenttester"]
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 sroomberg
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.4
2
+ Name: agenttester
3
+ Version: 0.1.0
4
+ Summary: Send prompts to multiple coding agents in parallel and compare results
5
+ License-Expression: MIT
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.10
8
+ Requires-Dist: gitpython>=3.1
9
+ Requires-Dist: pyyaml>=6.0
10
+ Requires-Dist: rich>=13.0
11
+ Requires-Dist: typer>=0.9
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
14
+ Requires-Dist: pytest>=8.0; extra == 'dev'
15
+ Requires-Dist: ruff>=0.4; extra == 'dev'
@@ -0,0 +1,115 @@
1
+ # AgentTester
2
+
3
+ > **⚠️ Experimental** — This project is under active development. APIs, config format, and CLI flags may change without notice.
4
+
5
+ Send a single prompt to multiple coding agents running in parallel and compare the results. Each agent works in its own [git worktree](https://git-scm.com/docs/git-worktree) on a separate branch so they never interfere with each other.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ uv pip install -e ".[dev]"
11
+ ```
12
+
13
+ ## Quick Start
14
+
15
+ ```bash
16
+ # List built-in agents
17
+ agenttester agents
18
+
19
+ # Run two agents on the same prompt
20
+ agenttester run "Add unit tests for the auth module" --agents claude,aider
21
+
22
+ # Use a prompt file
23
+ agenttester run --prompt-file task.md --agents claude,codex,aider
24
+
25
+ # Keep worktrees for manual inspection
26
+ agenttester run "Refactor logging" --agents claude,aider --keep-worktrees
27
+ ```
28
+
29
+ ## How It Works
30
+
31
+ 1. You provide a prompt and select agents
32
+ 2. AgentTester creates a git worktree + branch for each agent from the current HEAD
33
+ 3. All agents run concurrently (up to 5), each in its own worktree
34
+ 4. Agent output streams to the terminal with colored prefixes
35
+ 5. A markdown comparison report is generated with diff stats and timing
36
+ 6. Worktrees are cleaned up (branches are preserved for `git diff`)
37
+
38
+ Branches are named `agenttester/<run-id>/<agent-name>` so you can compare results:
39
+
40
+ ```bash
41
+ git diff agenttester/a3f2c1d0/claude agenttester/a3f2c1d0/aider
42
+ ```
43
+
44
+ ## Configuration
45
+
46
+ Copy `config.example.yaml` to `agenttester.yaml` in your target repo to customize agents. Built-in presets are available for `claude`, `aider`, and `codex`.
47
+
48
+ ### Command Placeholders
49
+
50
+ - `{prompt}` — replaced with the shell-escaped prompt text
51
+ - `{prompt_file}` — replaced with a path to a temp file containing the prompt
52
+ - If neither placeholder is present, the prompt is piped to the agent via stdin
53
+
54
+ ### Agent Settings
55
+
56
+ | Field | Description | Default |
57
+ |-------|-------------|---------|
58
+ | `command` | Shell command template | (required) |
59
+ | `commit_style` | `auto` (agent commits) or `manual` (agenttester commits) | `auto` |
60
+ | `timeout` | Max seconds before the agent is killed | `600` |
61
+ | `env` | Extra environment variables (key-value map) | `{}` |
62
+
63
+ ## Interactive Model REPL
64
+
65
+ For comparing responses from vLLM model servers interactively, with persistent
66
+ conversation history within a session:
67
+
68
+ ```bash
69
+ agenttester repl # auto-discovers agenttester.yaml
70
+ agenttester repl --config custom.yaml # explicit config path
71
+ ```
72
+
73
+ The REPL discovers any agent in your config whose command uses `query_model.py`,
74
+ fans out each prompt to all of them in parallel, and maintains separate
75
+ conversation history per model. Use `/reset` to clear history or `exit` to quit.
76
+
77
+ See `config.example.yaml` for example vLLM agent entries.
78
+
79
+ ## Development
80
+
81
+ ```bash
82
+ uv pip install -e ".[dev]"
83
+ ruff check src/
84
+ ruff format src/
85
+ pytest
86
+ ```
87
+
88
+ ## Docker
89
+
90
+ ```bash
91
+ # Run against the current directory
92
+ docker compose run --rm agenttester run "Fix the bug" --agents claude
93
+
94
+ # Run against a different repo
95
+ REPO_PATH=/path/to/repo docker compose run --rm agenttester run "Add tests" --agents claude,aider
96
+ ```
97
+
98
+ ## Library Usage
99
+
100
+ ```python
101
+ import asyncio
102
+ from pathlib import Path
103
+ from rich.console import Console
104
+ from agenttester import Orchestrator, load_config
105
+
106
+ async def main():
107
+ agents = load_config()
108
+ selected = [agents["claude"], agents["aider"]]
109
+ orch = Orchestrator(Path(".").resolve(), Console())
110
+ results = await orch.run("Add unit tests", selected)
111
+ for r in results:
112
+ print(f"{r.agent_name}: exit={r.exit_code} duration={r.duration:.1f}s")
113
+
114
+ asyncio.run(main())
115
+ ```
@@ -0,0 +1,68 @@
1
+ # AgentTester configuration
2
+ # Copy to agenttester.yaml in your target repo.
3
+ # Entries here override built-in presets (claude, aider, codex).
4
+ #
5
+ # host: "localhost" (default) runs locally; set to "user@server" for SSH.
6
+
7
+ agents:
8
+ claude:
9
+ command: 'claude -p {prompt} --allowedTools "Bash,Read,Edit" --permission-mode acceptEdits'
10
+ host: localhost
11
+ commit_style: auto # agent commits its own changes
12
+ timeout: 600
13
+
14
+ aider:
15
+ command: "aider --yes-always --no-auto-commits --message {prompt}"
16
+ host: localhost
17
+ commit_style: manual # agenttester commits changes after the agent exits
18
+ timeout: 600
19
+
20
+ codex:
21
+ command: "codex exec --sandbox danger-full-access {prompt}"
22
+ host: localhost
23
+ commit_style: auto
24
+ timeout: 600
25
+
26
+ # Example: agent running on a remote GPU box
27
+ # remote-claude:
28
+ # command: 'claude -p {prompt} --allowedTools "Bash,Read,Edit"'
29
+ # host: user@gpu-server
30
+ # remote_workdir: /home/user/agenttester
31
+ # commit_style: auto
32
+ # timeout: 900
33
+
34
+ # Example: custom agent with API key forwarding
35
+ # my-agent:
36
+ # command: "my-agent --input {prompt_file} --auto"
37
+ # host: localhost
38
+ # commit_style: manual
39
+ # timeout: 300
40
+ # env:
41
+ # MY_AGENT_API_KEY: "..."
42
+
43
+ # Example: agent that reads prompt from stdin (no placeholders)
44
+ # stdin-agent:
45
+ # command: "some-agent --non-interactive"
46
+ # commit_style: manual
47
+
48
+ # --- vLLM model servers on a remote EC2 host ---
49
+ # Deploy the host with: python setup/aws_single_host.py (in MultiAgentTrainer)
50
+ # Then replace HOST_IP with the public IP printed by that script.
51
+
52
+ # llama3:
53
+ # command: 'agenttester query http://HOST_IP:8001 meta-llama/Meta-Llama-3-8B-Instruct {prompt}'
54
+ # host: localhost
55
+ # commit_style: manual
56
+ # timeout: 120
57
+
58
+ # mistral:
59
+ # command: 'agenttester query http://HOST_IP:8002 mistralai/Mistral-7B-Instruct-v0.2 {prompt}'
60
+ # host: localhost
61
+ # commit_style: manual
62
+ # timeout: 120
63
+
64
+ # qwen:
65
+ # command: 'agenttester query http://HOST_IP:8003 Qwen/Qwen2.5-7B-Instruct {prompt}'
66
+ # host: localhost
67
+ # commit_style: manual
68
+ # timeout: 120
@@ -0,0 +1,7 @@
1
+ config:
2
+ aws:region: us-east-1
3
+ agenttester-infra:instance_type: t3.large
4
+ agenttester-infra:instance_count: "1"
5
+ agenttester-infra:ssh_public_key_path: ~/.ssh/id_ed25519.pub
6
+ agenttester-infra:allowed_ssh_cidrs:
7
+ - "203.0.113.0/32" # CHANGEME: your IP
@@ -0,0 +1,6 @@
1
+ name: agenttester-infra
2
+ runtime:
3
+ name: python
4
+ options:
5
+ virtualenv: venv
6
+ description: AWS infrastructure for agenttester remote agent execution
@@ -0,0 +1,97 @@
1
+ # deploy — AWS infrastructure for agenttester
2
+
3
+ Pulumi project that provisions EC2 instances pre-configured to run coding agents (Claude Code, Aider, Codex) over SSH.
4
+
5
+ ## Prerequisites
6
+
7
+ - [Pulumi CLI](https://www.pulumi.com/docs/install/)
8
+ - AWS credentials configured (`aws configure` or env vars)
9
+ - An SSH key pair (default: `~/.ssh/id_ed25519`)
10
+
11
+ ## Quick start
12
+
13
+ ```bash
14
+ cd deploy
15
+ pulumi stack init dev
16
+
17
+ # Required: restrict SSH access to your IP
18
+ pulumi config set --path 'allowed_ssh_cidrs[0]' "$(curl -s ifconfig.me)/32"
19
+
20
+ pulumi up
21
+ ```
22
+
23
+ ## Configuration
24
+
25
+ ```bash
26
+ # REQUIRED: CIDR blocks allowed to SSH into the instances
27
+ pulumi config set --path 'allowed_ssh_cidrs[0]' "203.0.113.10/32"
28
+ pulumi config set --path 'allowed_ssh_cidrs[1]' "10.0.0.0/8" # e.g. VPN range
29
+
30
+ # Instance type (default: t3.large)
31
+ pulumi config set instance_type t3.xlarge
32
+
33
+ # Number of instances (default: 1)
34
+ pulumi config set instance_count 3
35
+
36
+ # SSH public key path (default: ~/.ssh/id_ed25519.pub)
37
+ pulumi config set ssh_public_key_path ~/.ssh/my_key.pub
38
+
39
+ # AWS region
40
+ pulumi config set aws:region us-west-2
41
+ ```
42
+
43
+ ## Outputs
44
+
45
+ After `pulumi up`, the stack exports:
46
+
47
+ - **`public_ips`** — IP addresses of the instances
48
+ - **`ssh_hosts`** — Ready-to-use `ubuntu@<ip>` strings
49
+ - **`agenttester_yaml_snippet`** — Paste directly into your `agenttester.yaml`
50
+
51
+ Example:
52
+
53
+ ```bash
54
+ # Get the config snippet
55
+ pulumi stack output agenttester_yaml_snippet
56
+ ```
57
+
58
+ Then add it to your target repo's `agenttester.yaml`:
59
+
60
+ ```yaml
61
+ agents:
62
+ remote-agent-0:
63
+ command: 'claude -p {prompt} --allowedTools "Bash,Read,Edit"'
64
+ host: ubuntu@3.14.159.26
65
+ remote_workdir: /tmp/agenttester
66
+ commit_style: auto
67
+ timeout: 600
68
+ ```
69
+
70
+ ## What gets provisioned
71
+
72
+ - **EC2 instances** (Ubuntu 22.04) with 50 GB gp3 root volume
73
+ - **Security group** allowing SSH inbound from `allowed_ssh_cidrs` only, all outbound
74
+ - **IAM role + instance profile** with SSM access (for debugging via Session Manager)
75
+ - **Key pair** from your local SSH public key
76
+ - **User data** that installs git, rsync, Node.js 20, Claude Code, Codex CLI, and Aider
77
+
78
+ ## API keys
79
+
80
+ The instances don't store API keys. Forward them via the `env` field in `agenttester.yaml`:
81
+
82
+ ```yaml
83
+ agents:
84
+ remote-claude:
85
+ command: 'claude -p {prompt} --allowedTools "Bash,Read,Edit"'
86
+ host: ubuntu@3.14.159.26
87
+ env:
88
+ ANTHROPIC_API_KEY: "sk-ant-..."
89
+ ```
90
+
91
+ Or set them on the instance via SSM Parameter Store / Secrets Manager and source them in your agent command.
92
+
93
+ ## Teardown
94
+
95
+ ```bash
96
+ pulumi destroy
97
+ ```
@@ -0,0 +1,144 @@
1
+ """Pulumi program to deploy agenttester remote-execution infrastructure on AWS."""
2
+
3
+ from pathlib import Path
4
+
5
+ import pulumi
6
+ import pulumi_aws as aws
7
+
8
+ # ── config ────────────────────────────────────────────────────────────
9
+
10
+ config = pulumi.Config()
11
+ instance_type = config.get("instance_type") or "t3.large"
12
+ instance_count = config.get_int("instance_count") or 1
13
+ ssh_pub_key_path = config.get("ssh_public_key_path") or "~/.ssh/id_ed25519.pub"
14
+ allowed_ssh_cidrs = config.require_object("allowed_ssh_cidrs")
15
+
16
+ ssh_pub_key = Path(ssh_pub_key_path).expanduser().read_text().strip()
17
+
18
+ # ── AMI (latest Ubuntu 22.04) ─────────────────────────────────────────
19
+
20
+ ami = aws.ec2.get_ami(
21
+ most_recent=True,
22
+ owners=["099720109477"], # Canonical
23
+ filters=[
24
+ aws.ec2.GetAmiFilterArgs(
25
+ name="name",
26
+ values=["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"],
27
+ ),
28
+ aws.ec2.GetAmiFilterArgs(
29
+ name="virtualization-type",
30
+ values=["hvm"],
31
+ ),
32
+ ],
33
+ )
34
+
35
+ # ── key pair ──────────────────────────────────────────────────────────
36
+
37
+ key_pair = aws.ec2.KeyPair(
38
+ "agenttester-key",
39
+ public_key=ssh_pub_key,
40
+ )
41
+
42
+ # ── security group (SSH only) ─────────────────────────────────────────
43
+
44
+ sg = aws.ec2.SecurityGroup(
45
+ "agenttester-sg",
46
+ description="Allow SSH for agenttester remote agents",
47
+ ingress=[
48
+ aws.ec2.SecurityGroupIngressArgs(
49
+ protocol="tcp",
50
+ from_port=22,
51
+ to_port=22,
52
+ cidr_blocks=allowed_ssh_cidrs,
53
+ description="SSH",
54
+ ),
55
+ ],
56
+ egress=[
57
+ aws.ec2.SecurityGroupEgressArgs(
58
+ protocol="-1",
59
+ from_port=0,
60
+ to_port=0,
61
+ cidr_blocks=["0.0.0.0/0"],
62
+ description="All outbound",
63
+ ),
64
+ ],
65
+ )
66
+
67
+ # ── IAM role + instance profile ───────────────────────────────────────
68
+
69
+ assume_role_policy = """{
70
+ "Version": "2012-10-17",
71
+ "Statement": [{
72
+ "Effect": "Allow",
73
+ "Principal": {"Service": "ec2.amazonaws.com"},
74
+ "Action": "sts:AssumeRole"
75
+ }]
76
+ }"""
77
+
78
+ role = aws.iam.Role(
79
+ "agenttester-role",
80
+ assume_role_policy=assume_role_policy,
81
+ tags={"Project": "agenttester"},
82
+ )
83
+
84
+ # SSM managed policy so you can debug via Session Manager if needed
85
+ aws.iam.RolePolicyAttachment(
86
+ "agenttester-ssm",
87
+ role=role.name,
88
+ policy_arn="arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore",
89
+ )
90
+
91
+ instance_profile = aws.iam.InstanceProfile(
92
+ "agenttester-profile",
93
+ role=role.name,
94
+ )
95
+
96
+ # ── user data ─────────────────────────────────────────────────────────
97
+
98
+ user_data_script = (Path(__file__).parent / "user_data.sh").read_text()
99
+
100
+ # ── EC2 instances ─────────────────────────────────────────────────────
101
+
102
+ instances = []
103
+ for i in range(instance_count):
104
+ inst = aws.ec2.Instance(
105
+ f"agenttester-agent-{i}",
106
+ ami=ami.id,
107
+ instance_type=instance_type,
108
+ key_name=key_pair.key_name,
109
+ vpc_security_group_ids=[sg.id],
110
+ iam_instance_profile=instance_profile.name,
111
+ user_data=user_data_script,
112
+ root_block_device=aws.ec2.InstanceRootBlockDeviceArgs(
113
+ volume_size=50,
114
+ volume_type="gp3",
115
+ ),
116
+ tags={
117
+ "Name": f"agenttester-agent-{i}",
118
+ "Project": "agenttester",
119
+ },
120
+ )
121
+ instances.append(inst)
122
+
123
+ # ── outputs ───────────────────────────────────────────────────────────
124
+
125
+ pulumi.export("instance_ids", [inst.id for inst in instances])
126
+ pulumi.export("public_ips", [inst.public_ip for inst in instances])
127
+ pulumi.export(
128
+ "ssh_hosts",
129
+ [inst.public_ip.apply(lambda ip: f"ubuntu@{ip}") for inst in instances],
130
+ )
131
+ pulumi.export(
132
+ "agenttester_yaml_snippet",
133
+ pulumi.Output.all(*[inst.public_ip for inst in instances]).apply(
134
+ lambda ips: "\n".join(
135
+ f" remote-agent-{i}:\n"
136
+ f' command: \'claude -p {{prompt}} --allowedTools "Bash,Read,Edit"\'\n'
137
+ f" host: ubuntu@{ip}\n"
138
+ f" remote_workdir: /tmp/agenttester\n"
139
+ f" commit_style: auto\n"
140
+ f" timeout: 600"
141
+ for i, ip in enumerate(ips)
142
+ )
143
+ ),
144
+ )
@@ -0,0 +1,2 @@
1
+ pulumi>=3.0.0,<4.0.0
2
+ pulumi-aws>=6.0.0,<7.0.0
@@ -0,0 +1,25 @@
1
+ #!/bin/bash
2
+ set -euxo pipefail
3
+
4
+ # ── system packages ───────────────────────────────────────────────────
5
+ apt-get update
6
+ apt-get install -y git rsync curl unzip python3-pip python3-venv
7
+
8
+ # ── Node.js 20 (for Claude Code and Codex CLI) ───────────────────────
9
+ curl -fsSL https://deb.nodesource.com/setup_20.x | bash -
10
+ apt-get install -y nodejs
11
+
12
+ # ── Claude Code ───────────────────────────────────────────────────────
13
+ npm install -g @anthropic-ai/claude-code
14
+
15
+ # ── OpenAI Codex CLI ──────────────────────────────────────────────────
16
+ npm install -g @openai/codex
17
+
18
+ # ── Aider ─────────────────────────────────────────────────────────────
19
+ pip3 install --break-system-packages aider-chat
20
+
21
+ # ── workspace directory ───────────────────────────────────────────────
22
+ mkdir -p /tmp/agenttester
23
+ chown ubuntu:ubuntu /tmp/agenttester
24
+
25
+ echo "agenttester agent host ready" > /var/log/agenttester-setup.log
@@ -0,0 +1,14 @@
1
+ services:
2
+ agenttester:
3
+ build: .
4
+ volumes:
5
+ - ${REPO_PATH:-.}:/repo
6
+ # Forward SSH agent for remote execution
7
+ - ${SSH_AUTH_SOCK:-/dev/null}:/ssh-agent
8
+ # Share host SSH config for remote hosts
9
+ - ~/.ssh:/root/.ssh:ro
10
+ environment:
11
+ - SSH_AUTH_SOCK=/ssh-agent
12
+ # Pass API keys through to agents
13
+ - ANTHROPIC_API_KEY
14
+ - OPENAI_API_KEY
@@ -0,0 +1,40 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "agenttester"
7
+ version = "0.1.0"
8
+ description = "Send prompts to multiple coding agents in parallel and compare results"
9
+ requires-python = ">=3.10"
10
+ license = "MIT"
11
+ dependencies = [
12
+ "typer>=0.9",
13
+ "pyyaml>=6.0",
14
+ "rich>=13.0",
15
+ "gitpython>=3.1",
16
+ ]
17
+
18
+ [project.optional-dependencies]
19
+ dev = [
20
+ "ruff>=0.4",
21
+ "pytest>=8.0",
22
+ "pytest-asyncio>=0.23",
23
+ ]
24
+
25
+ [project.scripts]
26
+ agenttester = "agenttester.cli:app"
27
+
28
+ [tool.ruff]
29
+ target-version = "py310"
30
+ line-length = 88
31
+
32
+ [tool.ruff.lint]
33
+ select = ["E", "F", "I", "N", "UP", "B", "SIM", "RUF"]
34
+
35
+ [tool.ruff.lint.isort]
36
+ known-first-party = ["agenttester"]
37
+
38
+ [tool.pytest.ini_options]
39
+ asyncio_mode = "auto"
40
+ testpaths = ["tests"]
@@ -0,0 +1,24 @@
1
+ """AgentTester: Multi-agent comparison tool.
2
+
3
+ Usable as a CLI (``agenttester run …``), a Docker container, or a Python
4
+ library::
5
+
6
+ from agenttester import Orchestrator, AgentConfig, load_config
7
+ """
8
+
9
+ from .agent_runner import AgentResult, run_agent
10
+ from .config import AgentConfig, load_config
11
+ from .git_manager import DiffStats, GitManager
12
+ from .orchestrator import Orchestrator
13
+
14
+ __all__ = [
15
+ "AgentConfig",
16
+ "AgentResult",
17
+ "DiffStats",
18
+ "GitManager",
19
+ "Orchestrator",
20
+ "load_config",
21
+ "run_agent",
22
+ ]
23
+
24
+ __version__ = "0.1.0"