coderace 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderace-0.2.0/CHANGELOG.md +34 -0
- {coderace-0.1.0 → coderace-0.2.0}/PKG-INFO +79 -10
- {coderace-0.1.0 → coderace-0.2.0}/README.md +78 -9
- coderace-0.2.0/all-day-build-contract-v0.2.md +137 -0
- {coderace-0.1.0 → coderace-0.2.0}/coderace/__init__.py +1 -1
- {coderace-0.1.0 → coderace-0.2.0}/coderace/adapters/__init__.py +3 -0
- coderace-0.2.0/coderace/adapters/opencode.py +18 -0
- coderace-0.2.0/coderace/cli.py +509 -0
- coderace-0.2.0/coderace/html_report.py +134 -0
- {coderace-0.1.0 → coderace-0.2.0}/coderace/reporter.py +60 -0
- {coderace-0.1.0 → coderace-0.2.0}/coderace/scorer.py +10 -15
- coderace-0.2.0/coderace/stats.py +97 -0
- {coderace-0.1.0 → coderace-0.2.0}/coderace/task.py +13 -0
- coderace-0.2.0/coderace/types.py +130 -0
- coderace-0.2.0/examples/add-type-hints.yaml +31 -0
- coderace-0.2.0/examples/example-task.yaml +23 -0
- coderace-0.2.0/examples/fix-edge-case.yaml +37 -0
- coderace-0.2.0/examples/write-tests.yaml +37 -0
- {coderace-0.1.0 → coderace-0.2.0}/pyproject.toml +4 -1
- {coderace-0.1.0 → coderace-0.2.0}/tests/test_adapters.py +12 -1
- {coderace-0.1.0 → coderace-0.2.0}/tests/test_cli.py +2 -1
- coderace-0.2.0/tests/test_examples.py +65 -0
- coderace-0.2.0/tests/test_html_report.py +95 -0
- coderace-0.2.0/tests/test_scorer.py +80 -0
- coderace-0.2.0/tests/test_stats.py +76 -0
- coderace-0.2.0/uv.lock +349 -0
- coderace-0.1.0/coderace/cli.py +0 -306
- coderace-0.1.0/coderace/types.py +0 -73
- coderace-0.1.0/tests/test_scorer.py +0 -46
- {coderace-0.1.0 → coderace-0.2.0}/.github/workflows/publish.yml +0 -0
- {coderace-0.1.0 → coderace-0.2.0}/.gitignore +0 -0
- {coderace-0.1.0 → coderace-0.2.0}/LICENSE +0 -0
- {coderace-0.1.0 → coderace-0.2.0}/coderace/adapters/aider.py +0 -0
- {coderace-0.1.0 → coderace-0.2.0}/coderace/adapters/base.py +0 -0
- {coderace-0.1.0 → coderace-0.2.0}/coderace/adapters/claude.py +0 -0
- {coderace-0.1.0 → coderace-0.2.0}/coderace/adapters/codex.py +0 -0
- {coderace-0.1.0 → coderace-0.2.0}/coderace/adapters/gemini.py +0 -0
- {coderace-0.1.0 → coderace-0.2.0}/coderace/git_ops.py +0 -0
- {coderace-0.1.0 → coderace-0.2.0}/tests/__init__.py +0 -0
- {coderace-0.1.0 → coderace-0.2.0}/tests/conftest.py +0 -0
- {coderace-0.1.0 → coderace-0.2.0}/tests/test_git_ops.py +0 -0
- {coderace-0.1.0 → coderace-0.2.0}/tests/test_reporter.py +0 -0
- {coderace-0.1.0 → coderace-0.2.0}/tests/test_task.py +0 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [0.2.0] - 2026-02-23
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- **OpenCode adapter** - OpenCode (terminal-first open-source coding agent) is now a supported agent (`opencode` in task YAML)
|
|
8
|
+
- **Custom scoring weights** - Override default weights in task YAML via `scoring:` section; weights are auto-normalized; supports short aliases (`tests`, `exit`, `lint`, `time`, `lines`)
|
|
9
|
+
- **HTML reports** - Self-contained single-file HTML report auto-generated on every run at `.coderace/<task>-results.html`; also `coderace results --html report.html` for manual export; sortable columns, dark theme
|
|
10
|
+
- **Statistical mode** - `coderace run task.yaml --runs N` for multi-run comparison; shows mean ± stddev for score, time, and lines changed; saves per-run and aggregated JSON
|
|
11
|
+
- **Example tasks** - `examples/` directory with 3 ready-to-use templates: `add-type-hints.yaml`, `fix-edge-case.yaml`, `write-tests.yaml`
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
|
|
15
|
+
- `coderace init` template now includes OpenCode in default agent list
|
|
16
|
+
- `coderace init` template includes commented scoring example
|
|
17
|
+
- README: "Try it now" section, statistical mode docs, HTML report docs, custom scoring docs, updated agent table
|
|
18
|
+
|
|
19
|
+
### Fixed
|
|
20
|
+
|
|
21
|
+
- `opencode` now accepted as a valid agent name in task validation
|
|
22
|
+
|
|
23
|
+
## [0.1.0] - 2026-02-22
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
|
|
27
|
+
- Initial release
|
|
28
|
+
- CLI: `init`, `run`, `results`, `version` commands
|
|
29
|
+
- 4 agent adapters: Claude Code, Codex, Aider, Gemini CLI
|
|
30
|
+
- Sequential and parallel (git worktrees) run modes
|
|
31
|
+
- Composite scoring: tests (40%), exit (20%), lint (15%), time (15%), lines (10%)
|
|
32
|
+
- JSON results output
|
|
33
|
+
- Rich terminal table output
|
|
34
|
+
- `coderace run --parallel` using git worktrees
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderace
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Race coding agents against each other on real tasks
|
|
5
5
|
Project-URL: Homepage, https://github.com/mikiships/coderace
|
|
6
6
|
Project-URL: Repository, https://github.com/mikiships/coderace
|
|
@@ -30,9 +30,11 @@ Description-Content-Type: text/markdown
|
|
|
30
30
|
|
|
31
31
|
# coderace
|
|
32
32
|
|
|
33
|
-
Race coding agents against each other on real tasks in your repo.
|
|
33
|
+
Stop reading blog comparisons. Race coding agents against each other on real tasks in *your* repo with *your* code.
|
|
34
34
|
|
|
35
|
-
|
|
35
|
+
Every week there's a new "Claude Code vs Codex vs Cursor" post. They test on toy problems with cherry-picked examples. coderace gives you automated, reproducible, scored comparisons on the tasks you actually care about.
|
|
36
|
+
|
|
37
|
+
Define a task. Run it against Claude Code, Codex, Aider, Gemini CLI, and OpenCode. Get a scored comparison table.
|
|
36
38
|
|
|
37
39
|
## Install
|
|
38
40
|
|
|
@@ -108,16 +110,71 @@ Terminal table with Rich formatting:
|
|
|
108
110
|
└──────┴────────┴───────┴───────┴──────┴──────┴──────────┴───────┘
|
|
109
111
|
```
|
|
110
112
|
|
|
111
|
-
Results also saved as JSON in `.coderace/<task>-results.json`.
|
|
113
|
+
Results also saved as JSON in `.coderace/<task>-results.json` and as a self-contained HTML report in `.coderace/<task>-results.html`.
|
|
114
|
+
|
|
115
|
+
## Try It Now
|
|
116
|
+
|
|
117
|
+
The `examples/` directory has ready-to-use task templates:
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
# Race agents on adding type hints to your project
|
|
121
|
+
coderace run examples/add-type-hints.yaml
|
|
122
|
+
|
|
123
|
+
# Race agents on fixing an edge case bug
|
|
124
|
+
coderace run examples/fix-edge-case.yaml
|
|
125
|
+
|
|
126
|
+
# Race agents on writing new tests
|
|
127
|
+
coderace run examples/write-tests.yaml
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Edit the `repo` and `description` fields to point at your actual project and describe your real task.
|
|
131
|
+
|
|
132
|
+
## Statistical Mode
|
|
133
|
+
|
|
134
|
+
Run each agent multiple times and get mean ± stddev:
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
coderace run task.yaml --runs 5
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Useful for tasks with variable outcomes (LLM nondeterminism is real).
|
|
141
|
+
|
|
142
|
+
## HTML Reports
|
|
143
|
+
|
|
144
|
+
Export results as a shareable single-file HTML report:
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
# Auto-generated on every run at .coderace/<task>-results.html
|
|
148
|
+
# Or export manually:
|
|
149
|
+
coderace results task.yaml --html report.html
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
The HTML report has sortable columns and a dark theme. Drop it in a blog post or Slack.
|
|
153
|
+
|
|
154
|
+
## Custom Scoring
|
|
155
|
+
|
|
156
|
+
Override the default weights in your task YAML:
|
|
157
|
+
|
|
158
|
+
```yaml
|
|
159
|
+
scoring:
|
|
160
|
+
tests: 60 # tests passing (default 40)
|
|
161
|
+
exit: 20 # clean exit (default 20)
|
|
162
|
+
lint: 10 # lint clean (default 15)
|
|
163
|
+
time: 5 # wall time (default 15)
|
|
164
|
+
lines: 5 # lines changed (default 10)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
Weights are normalized automatically (don't need to sum to 100).
|
|
112
168
|
|
|
113
169
|
## Supported Agents
|
|
114
170
|
|
|
115
|
-
| Agent | CLI |
|
|
116
|
-
|
|
117
|
-
| Claude Code | `claude` |
|
|
118
|
-
| Codex | `codex` |
|
|
119
|
-
| Aider | `aider` |
|
|
120
|
-
| Gemini CLI | `gemini` |
|
|
171
|
+
| Agent | CLI | Notes |
|
|
172
|
+
|-------|-----|-------|
|
|
173
|
+
| Claude Code | `claude` | Anthropic's coding agent |
|
|
174
|
+
| Codex | `codex` | OpenAI Codex CLI |
|
|
175
|
+
| Aider | `aider` | Git-integrated AI coding |
|
|
176
|
+
| Gemini CLI | `gemini` | Google's Gemini CLI |
|
|
177
|
+
| OpenCode | `opencode` | Open-source terminal agent |
|
|
121
178
|
|
|
122
179
|
Each agent must be installed and authenticated separately.
|
|
123
180
|
|
|
@@ -131,6 +188,18 @@ coderace run task.yaml --parallel
|
|
|
131
188
|
|
|
132
189
|
Sequential mode (default) runs agents one at a time on the same repo.
|
|
133
190
|
|
|
191
|
+
## Why coderace?
|
|
192
|
+
|
|
193
|
+
**Blog posts compare models. coderace compares agents on your work.**
|
|
194
|
+
|
|
195
|
+
- Run on your actual codebase, not HumanEval
|
|
196
|
+
- Automated scoring: tests, lint, time, lines changed
|
|
197
|
+
- Parallel mode with git worktrees (no interference between agents)
|
|
198
|
+
- JSON output for CI integration and tracking over time
|
|
199
|
+
- Works with any agent that has a CLI
|
|
200
|
+
|
|
201
|
+
The goal isn't "which model is best." It's "which agent solves my specific problem best."
|
|
202
|
+
|
|
134
203
|
## Requirements
|
|
135
204
|
|
|
136
205
|
- Python 3.10+
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
# coderace
|
|
2
2
|
|
|
3
|
-
Race coding agents against each other on real tasks in your repo.
|
|
3
|
+
Stop reading blog comparisons. Race coding agents against each other on real tasks in *your* repo with *your* code.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Every week there's a new "Claude Code vs Codex vs Cursor" post. They test on toy problems with cherry-picked examples. coderace gives you automated, reproducible, scored comparisons on the tasks you actually care about.
|
|
6
|
+
|
|
7
|
+
Define a task. Run it against Claude Code, Codex, Aider, Gemini CLI, and OpenCode. Get a scored comparison table.
|
|
6
8
|
|
|
7
9
|
## Install
|
|
8
10
|
|
|
@@ -78,16 +80,71 @@ Terminal table with Rich formatting:
|
|
|
78
80
|
└──────┴────────┴───────┴───────┴──────┴──────┴──────────┴───────┘
|
|
79
81
|
```
|
|
80
82
|
|
|
81
|
-
Results also saved as JSON in `.coderace/<task>-results.json`.
|
|
83
|
+
Results also saved as JSON in `.coderace/<task>-results.json` and as a self-contained HTML report in `.coderace/<task>-results.html`.
|
|
84
|
+
|
|
85
|
+
## Try It Now
|
|
86
|
+
|
|
87
|
+
The `examples/` directory has ready-to-use task templates:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
# Race agents on adding type hints to your project
|
|
91
|
+
coderace run examples/add-type-hints.yaml
|
|
92
|
+
|
|
93
|
+
# Race agents on fixing an edge case bug
|
|
94
|
+
coderace run examples/fix-edge-case.yaml
|
|
95
|
+
|
|
96
|
+
# Race agents on writing new tests
|
|
97
|
+
coderace run examples/write-tests.yaml
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Edit the `repo` and `description` fields to point at your actual project and describe your real task.
|
|
101
|
+
|
|
102
|
+
## Statistical Mode
|
|
103
|
+
|
|
104
|
+
Run each agent multiple times and get mean ± stddev:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
coderace run task.yaml --runs 5
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Useful for tasks with variable outcomes (LLM nondeterminism is real).
|
|
111
|
+
|
|
112
|
+
## HTML Reports
|
|
113
|
+
|
|
114
|
+
Export results as a shareable single-file HTML report:
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# Auto-generated on every run at .coderace/<task>-results.html
|
|
118
|
+
# Or export manually:
|
|
119
|
+
coderace results task.yaml --html report.html
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
The HTML report has sortable columns and a dark theme. Drop it in a blog post or Slack.
|
|
123
|
+
|
|
124
|
+
## Custom Scoring
|
|
125
|
+
|
|
126
|
+
Override the default weights in your task YAML:
|
|
127
|
+
|
|
128
|
+
```yaml
|
|
129
|
+
scoring:
|
|
130
|
+
tests: 60 # tests passing (default 40)
|
|
131
|
+
exit: 20 # clean exit (default 20)
|
|
132
|
+
lint: 10 # lint clean (default 15)
|
|
133
|
+
time: 5 # wall time (default 15)
|
|
134
|
+
lines: 5 # lines changed (default 10)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Weights are normalized automatically (don't need to sum to 100).
|
|
82
138
|
|
|
83
139
|
## Supported Agents
|
|
84
140
|
|
|
85
|
-
| Agent | CLI |
|
|
86
|
-
|
|
87
|
-
| Claude Code | `claude` |
|
|
88
|
-
| Codex | `codex` |
|
|
89
|
-
| Aider | `aider` |
|
|
90
|
-
| Gemini CLI | `gemini` |
|
|
141
|
+
| Agent | CLI | Notes |
|
|
142
|
+
|-------|-----|-------|
|
|
143
|
+
| Claude Code | `claude` | Anthropic's coding agent |
|
|
144
|
+
| Codex | `codex` | OpenAI Codex CLI |
|
|
145
|
+
| Aider | `aider` | Git-integrated AI coding |
|
|
146
|
+
| Gemini CLI | `gemini` | Google's Gemini CLI |
|
|
147
|
+
| OpenCode | `opencode` | Open-source terminal agent |
|
|
91
148
|
|
|
92
149
|
Each agent must be installed and authenticated separately.
|
|
93
150
|
|
|
@@ -101,6 +158,18 @@ coderace run task.yaml --parallel
|
|
|
101
158
|
|
|
102
159
|
Sequential mode (default) runs agents one at a time on the same repo.
|
|
103
160
|
|
|
161
|
+
## Why coderace?
|
|
162
|
+
|
|
163
|
+
**Blog posts compare models. coderace compares agents on your work.**
|
|
164
|
+
|
|
165
|
+
- Run on your actual codebase, not HumanEval
|
|
166
|
+
- Automated scoring: tests, lint, time, lines changed
|
|
167
|
+
- Parallel mode with git worktrees (no interference between agents)
|
|
168
|
+
- JSON output for CI integration and tracking over time
|
|
169
|
+
- Works with any agent that has a CLI
|
|
170
|
+
|
|
171
|
+
The goal isn't "which model is best." It's "which agent solves my specific problem best."
|
|
172
|
+
|
|
104
173
|
## Requirements
|
|
105
174
|
|
|
106
175
|
- Python 3.10+
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# All-Day Build Contract: coderace v0.2.0
|
|
2
|
+
|
|
3
|
+
Status: In Progress
|
|
4
|
+
Date: 2026-02-23
|
|
5
|
+
Owner: Codex/sub-agent execution pass
|
|
6
|
+
Scope type: Deliverable-gated (no hour promises)
|
|
7
|
+
|
|
8
|
+
## 1. Objective
|
|
9
|
+
|
|
10
|
+
Ship coderace v0.2.0 with five new features that make comparison results shareable, statistically meaningful, and broader in agent coverage. The "Claude Code vs Codex" comparison trend is peaking this week. OpenCode (60k-star open-source alternative) just got a major benchmark review. Adding OpenCode as the 5th agent + HTML reports makes coderace the go-to tool for this moment.
|
|
11
|
+
|
|
12
|
+
This contract is considered complete only when every deliverable and validation gate below is satisfied.
|
|
13
|
+
|
|
14
|
+
## 2. Non-Negotiable Build Rules
|
|
15
|
+
|
|
16
|
+
1. No time-based completion claims.
|
|
17
|
+
2. Completion is allowed only when all checklist items are checked.
|
|
18
|
+
3. Full test suite must pass at the end (existing 39 tests + new tests).
|
|
19
|
+
4. New features must ship with docs and report addendum updates in the same pass.
|
|
20
|
+
5. CLI outputs must be deterministic and schema-backed where specified.
|
|
21
|
+
6. Never modify files outside the project directory.
|
|
22
|
+
7. Commit after each completed deliverable (not at the end).
|
|
23
|
+
8. If stuck on same issue for 3 attempts, stop and write a blocker report.
|
|
24
|
+
9. Do NOT refactor, restyle, or "improve" code outside the deliverables.
|
|
25
|
+
10. Read existing tests and docs before writing new code.
|
|
26
|
+
|
|
27
|
+
## 3. Feature Deliverables
|
|
28
|
+
|
|
29
|
+
### D1. OpenCode Adapter (5th CLI agent)
|
|
30
|
+
|
|
31
|
+
Add OpenCode CLI as a supported agent. OpenCode is a terminal-first open-source coding assistant with 60k+ GitHub stars. It's invoked as `opencode` with similar patterns to other CLI agents.
|
|
32
|
+
|
|
33
|
+
Required files:
|
|
34
|
+
- `coderace/adapters/opencode.py`
|
|
35
|
+
- `tests/test_opencode_adapter.py`
|
|
36
|
+
|
|
37
|
+
- [ ] Implement OpenCode adapter following existing adapter pattern (see claude.py, codex.py, aider.py, gemini.py)
|
|
38
|
+
- [ ] OpenCode invocation: `opencode` CLI with task prompt via stdin or --prompt flag (research actual CLI interface)
|
|
39
|
+
- [ ] If OpenCode CLI is not installed, adapter should detect and report clearly
|
|
40
|
+
- [ ] Register adapter in adapter registry
|
|
41
|
+
- [ ] Add `opencode` to task YAML agents list support
|
|
42
|
+
- [ ] Tests: unit tests for adapter (mock CLI invocation), integration with run pipeline
|
|
43
|
+
- [ ] Update README: add OpenCode to supported agents list and example YAML
|
|
44
|
+
|
|
45
|
+
### D2. Custom Scoring Weights
|
|
46
|
+
|
|
47
|
+
Allow users to override default scoring weights in the task YAML. Currently hardcoded (40/20/15/15/10). Users should be able to tune.
|
|
48
|
+
|
|
49
|
+
Required files:
|
|
50
|
+
- Modify `coderace/scoring.py` (or wherever scoring lives)
|
|
51
|
+
- `tests/test_custom_scoring.py`
|
|
52
|
+
|
|
53
|
+
- [ ] Add optional `scoring` section to task YAML schema:
|
|
54
|
+
```yaml
|
|
55
|
+
scoring:
|
|
56
|
+
tests: 50
|
|
57
|
+
exit: 20
|
|
58
|
+
lint: 10
|
|
59
|
+
time: 10
|
|
60
|
+
lines: 10
|
|
61
|
+
```
|
|
62
|
+
- [ ] Weights are normalized (sum to 100) automatically
|
|
63
|
+
- [ ] If `scoring` section omitted, use current defaults
|
|
64
|
+
- [ ] Validate: all weights >= 0, no unknown keys
|
|
65
|
+
- [ ] Tests: custom weights, partial override, invalid weights, normalization
|
|
66
|
+
|
|
67
|
+
### D3. HTML Report Output
|
|
68
|
+
|
|
69
|
+
Generate a self-contained HTML report from race results. This makes results shareable on blogs, tweets, and team Slack.
|
|
70
|
+
|
|
71
|
+
Required files:
|
|
72
|
+
- `coderace/report.py`
|
|
73
|
+
- `tests/test_report.py`
|
|
74
|
+
|
|
75
|
+
- [ ] `coderace results task.yaml --html report.html` generates a single-file HTML report
|
|
76
|
+
- [ ] Report includes: task name, date, agent scores table, scoring weights used, timing breakdown
|
|
77
|
+
- [ ] Styled with inline CSS (no external dependencies, single file)
|
|
78
|
+
- [ ] Table is sortable by clicking column headers (vanilla JS, inline)
|
|
79
|
+
- [ ] Include a "Generated by coderace" footer with version
|
|
80
|
+
- [ ] Tests: HTML generation, content validation, file output
|
|
81
|
+
|
|
82
|
+
### D4. Statistical Mode (multiple runs)
|
|
83
|
+
|
|
84
|
+
Run the same task N times and report mean/stddev for each metric. Real benchmarking needs statistical significance.
|
|
85
|
+
|
|
86
|
+
Required files:
|
|
87
|
+
- Modify `coderace/runner.py` (or equivalent)
|
|
88
|
+
- `coderace/stats.py`
|
|
89
|
+
- `tests/test_stats.py`
|
|
90
|
+
|
|
91
|
+
- [ ] `coderace run task.yaml --runs 5` runs each agent 5 times
|
|
92
|
+
- [ ] Results show mean ± stddev for score, time, and lines changed
|
|
93
|
+
- [ ] Rich table adapts to show statistical columns
|
|
94
|
+
- [ ] JSON output includes per-run data + aggregates
|
|
95
|
+
- [ ] HTML report (D3) also supports statistical view
|
|
96
|
+
- [ ] Tests: multi-run aggregation, edge cases (1 run = no stddev), JSON schema
|
|
97
|
+
|
|
98
|
+
### D5. Example Benchmark Tasks
|
|
99
|
+
|
|
100
|
+
Ship example tasks that work out of the box on any Python project. Users shouldn't have to write YAML from scratch to try coderace.
|
|
101
|
+
|
|
102
|
+
Required files:
|
|
103
|
+
- `examples/add-type-hints.yaml`
|
|
104
|
+
- `examples/fix-edge-case.yaml`
|
|
105
|
+
- `examples/write-tests.yaml`
|
|
106
|
+
- Update README with examples section
|
|
107
|
+
|
|
108
|
+
- [ ] 3 example task YAMLs that target common patterns (type hints, edge cases, test coverage)
|
|
109
|
+
- [ ] Each example has a description explaining what it tests
|
|
110
|
+
- [ ] Examples reference a small bundled test fixture (or clearly document how to point at user's repo)
|
|
111
|
+
- [ ] README section: "Try it now" with copy-paste commands
|
|
112
|
+
- [ ] Tests: validate example YAML files parse correctly
|
|
113
|
+
|
|
114
|
+
## 4. Test Requirements
|
|
115
|
+
|
|
116
|
+
- [ ] Unit tests for each deliverable (specified above)
|
|
117
|
+
- [ ] All 39 existing tests must still pass
|
|
118
|
+
- [ ] Integration test: run full pipeline with mock agents including OpenCode
|
|
119
|
+
- [ ] Edge cases: empty results, single agent, all agents fail, custom weights sum to 0
|
|
120
|
+
|
|
121
|
+
## 5. Reports
|
|
122
|
+
|
|
123
|
+
- Write progress to `progress-log.md` after each deliverable
|
|
124
|
+
- Include: what was built, what tests pass, what's next, any blockers
|
|
125
|
+
- Final summary when all deliverables done or stopped
|
|
126
|
+
|
|
127
|
+
## 6. Stop Conditions
|
|
128
|
+
|
|
129
|
+
- All deliverables checked and all tests passing -> DONE
|
|
130
|
+
- 3 consecutive failed attempts on same issue -> STOP, write blocker report
|
|
131
|
+
- Scope creep detected (new requirements discovered) -> STOP, report what's new
|
|
132
|
+
- All tests passing but deliverables remain -> continue to next deliverable
|
|
133
|
+
|
|
134
|
+
## 7. Version Bump
|
|
135
|
+
|
|
136
|
+
- [ ] Bump version to 0.2.0 in pyproject.toml
|
|
137
|
+
- [ ] Update CHANGELOG or add one if missing
|
|
@@ -5,12 +5,14 @@ from coderace.adapters.base import BaseAdapter
|
|
|
5
5
|
from coderace.adapters.claude import ClaudeAdapter
|
|
6
6
|
from coderace.adapters.codex import CodexAdapter
|
|
7
7
|
from coderace.adapters.gemini import GeminiAdapter
|
|
8
|
+
from coderace.adapters.opencode import OpenCodeAdapter
|
|
8
9
|
|
|
9
10
|
ADAPTERS: dict[str, type[BaseAdapter]] = {
|
|
10
11
|
"claude": ClaudeAdapter,
|
|
11
12
|
"codex": CodexAdapter,
|
|
12
13
|
"aider": AiderAdapter,
|
|
13
14
|
"gemini": GeminiAdapter,
|
|
15
|
+
"opencode": OpenCodeAdapter,
|
|
14
16
|
}
|
|
15
17
|
|
|
16
18
|
__all__ = [
|
|
@@ -20,4 +22,5 @@ __all__ = [
|
|
|
20
22
|
"CodexAdapter",
|
|
21
23
|
"AiderAdapter",
|
|
22
24
|
"GeminiAdapter",
|
|
25
|
+
"OpenCodeAdapter",
|
|
23
26
|
]
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""OpenCode adapter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from coderace.adapters.base import BaseAdapter
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class OpenCodeAdapter(BaseAdapter):
|
|
9
|
+
"""Adapter for OpenCode CLI (terminal-first AI coding agent)."""
|
|
10
|
+
|
|
11
|
+
name = "opencode"
|
|
12
|
+
|
|
13
|
+
def build_command(self, task_description: str) -> list[str]:
|
|
14
|
+
return [
|
|
15
|
+
"opencode",
|
|
16
|
+
"run",
|
|
17
|
+
task_description,
|
|
18
|
+
]
|