autoevolve 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autoevolve-1.0.0/.gitignore +38 -0
- autoevolve-1.0.0/PKG-INFO +90 -0
- autoevolve-1.0.0/README.md +68 -0
- autoevolve-1.0.0/auto_evolve.py +966 -0
- autoevolve-1.0.0/pyproject.toml +35 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
*.egg
|
|
7
|
+
dist/
|
|
8
|
+
build/
|
|
9
|
+
*.whl
|
|
10
|
+
|
|
11
|
+
# Environment variables
|
|
12
|
+
.env
|
|
13
|
+
|
|
14
|
+
# Virtual environments
|
|
15
|
+
.venv/
|
|
16
|
+
venv/
|
|
17
|
+
env/
|
|
18
|
+
|
|
19
|
+
# IDE
|
|
20
|
+
.idea/
|
|
21
|
+
.vscode/
|
|
22
|
+
*.swp
|
|
23
|
+
*.swo
|
|
24
|
+
*~
|
|
25
|
+
.DS_Store
|
|
26
|
+
|
|
27
|
+
# Testing
|
|
28
|
+
.pytest_cache/
|
|
29
|
+
.coverage
|
|
30
|
+
htmlcov/
|
|
31
|
+
.mypy_cache/
|
|
32
|
+
|
|
33
|
+
# Claude Code working memory
|
|
34
|
+
.memory/
|
|
35
|
+
|
|
36
|
+
# Distribution
|
|
37
|
+
*.tar.gz
|
|
38
|
+
.docs/
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: autoevolve
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Multi-agent research competition orchestrator for autoresearch
|
|
5
|
+
Project-URL: Homepage, https://github.com/dean0x/autolab
|
|
6
|
+
Project-URL: Repository, https://github.com/dean0x/autolab
|
|
7
|
+
Project-URL: Issues, https://github.com/dean0x/autolab/issues
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Keywords: autoresearch,gpt,karpathy,multi-agent,pretraining
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Environment :: Console
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Requires-Dist: click>=8.0
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
|
|
23
|
+
# autoevolve
|
|
24
|
+
|
|
25
|
+
Multi-agent research competition orchestrator for [autoresearch](https://github.com/karpathy/autoresearch). Run parallel AI agents with different strategies and cross-pollinate winning ideas.
|
|
26
|
+
|
|
27
|
+
## Install
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install autoevolve
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
# Initialize a 3-agent competition
|
|
37
|
+
autoevolve init --agents 3 --tag mar15
|
|
38
|
+
|
|
39
|
+
# Check who's winning
|
|
40
|
+
autoevolve status
|
|
41
|
+
autoevolve leaderboard --detailed
|
|
42
|
+
|
|
43
|
+
# Spread winning ideas to all agents
|
|
44
|
+
autoevolve pollinate
|
|
45
|
+
|
|
46
|
+
# Export results
|
|
47
|
+
autoevolve export --format json -o evolve-results.json
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## How It Works
|
|
51
|
+
|
|
52
|
+
1. **init** creates one git branch per agent, each with a different research strategy
|
|
53
|
+
2. Each agent works independently on its branch using autojudge + autosteer
|
|
54
|
+
3. **leaderboard** ranks agents by best val_bpb with keep rate tracking
|
|
55
|
+
4. **pollinate** writes the leader's best experiments to `evolve-hints.md` — readable from any branch
|
|
56
|
+
5. Agents incorporate hints and continue competing
|
|
57
|
+
|
|
58
|
+
## Built-in Strategies
|
|
59
|
+
|
|
60
|
+
| Strategy | Approach |
|
|
61
|
+
|----------|----------|
|
|
62
|
+
| Architecture First | Explore model structure before tuning |
|
|
63
|
+
| Hyperparams First | Sweep learning rates and schedules first |
|
|
64
|
+
| Optimizer First | Tune Muon/Adam parameters first |
|
|
65
|
+
| Regularization First | Explore weight decay, dropout, z-loss |
|
|
66
|
+
| Efficiency First | Maximize compute efficiency to run more experiments |
|
|
67
|
+
| Radical | Bold, unconventional changes |
|
|
68
|
+
|
|
69
|
+
Strategies are assigned round-robin. With 3 agents, you get 3 different strategies competing.
|
|
70
|
+
|
|
71
|
+
## Commands
|
|
72
|
+
|
|
73
|
+
| Command | Description |
|
|
74
|
+
|---------|-------------|
|
|
75
|
+
| `autoevolve init --agents N --tag TAG` | Create N agent branches |
|
|
76
|
+
| `autoevolve status` | Quick overview with current leader |
|
|
77
|
+
| `autoevolve leaderboard` | Ranked table with keep rates |
|
|
78
|
+
| `autoevolve leaderboard --detailed` | Full trajectories + strategy effectiveness |
|
|
79
|
+
| `autoevolve pollinate` | Cross-pollinate winning ideas |
|
|
80
|
+
| `autoevolve export --format json\|tsv` | Export results for analysis |
|
|
81
|
+
|
|
82
|
+
## Requirements
|
|
83
|
+
|
|
84
|
+
- Python >= 3.10
|
|
85
|
+
- A git repository with autoresearch set up
|
|
86
|
+
- Multiple compute environments (one per agent)
|
|
87
|
+
|
|
88
|
+
## License
|
|
89
|
+
|
|
90
|
+
MIT
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# autoevolve
|
|
2
|
+
|
|
3
|
+
Multi-agent research competition orchestrator for [autoresearch](https://github.com/karpathy/autoresearch). Run parallel AI agents with different strategies and cross-pollinate winning ideas.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install autoevolve
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Initialize a 3-agent competition
|
|
15
|
+
autoevolve init --agents 3 --tag mar15
|
|
16
|
+
|
|
17
|
+
# Check who's winning
|
|
18
|
+
autoevolve status
|
|
19
|
+
autoevolve leaderboard --detailed
|
|
20
|
+
|
|
21
|
+
# Spread winning ideas to all agents
|
|
22
|
+
autoevolve pollinate
|
|
23
|
+
|
|
24
|
+
# Export results
|
|
25
|
+
autoevolve export --format json -o evolve-results.json
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## How It Works
|
|
29
|
+
|
|
30
|
+
1. **init** creates one git branch per agent, each with a different research strategy
|
|
31
|
+
2. Each agent works independently on its branch using autojudge + autosteer
|
|
32
|
+
3. **leaderboard** ranks agents by best val_bpb with keep rate tracking
|
|
33
|
+
4. **pollinate** writes the leader's best experiments to `evolve-hints.md` — readable from any branch
|
|
34
|
+
5. Agents incorporate hints and continue competing
|
|
35
|
+
|
|
36
|
+
## Built-in Strategies
|
|
37
|
+
|
|
38
|
+
| Strategy | Approach |
|
|
39
|
+
|----------|----------|
|
|
40
|
+
| Architecture First | Explore model structure before tuning |
|
|
41
|
+
| Hyperparams First | Sweep learning rates and schedules first |
|
|
42
|
+
| Optimizer First | Tune Muon/Adam parameters first |
|
|
43
|
+
| Regularization First | Explore weight decay, dropout, z-loss |
|
|
44
|
+
| Efficiency First | Maximize compute efficiency to run more experiments |
|
|
45
|
+
| Radical | Bold, unconventional changes |
|
|
46
|
+
|
|
47
|
+
Strategies are assigned round-robin. With 3 agents, you get 3 different strategies competing.
|
|
48
|
+
|
|
49
|
+
## Commands
|
|
50
|
+
|
|
51
|
+
| Command | Description |
|
|
52
|
+
|---------|-------------|
|
|
53
|
+
| `autoevolve init --agents N --tag TAG` | Create N agent branches |
|
|
54
|
+
| `autoevolve status` | Quick overview with current leader |
|
|
55
|
+
| `autoevolve leaderboard` | Ranked table with keep rates |
|
|
56
|
+
| `autoevolve leaderboard --detailed` | Full trajectories + strategy effectiveness |
|
|
57
|
+
| `autoevolve pollinate` | Cross-pollinate winning ideas |
|
|
58
|
+
| `autoevolve export --format json\|tsv` | Export results for analysis |
|
|
59
|
+
|
|
60
|
+
## Requirements
|
|
61
|
+
|
|
62
|
+
- Python >= 3.10
|
|
63
|
+
- A git repository with autoresearch set up
|
|
64
|
+
- Multiple compute environments (one per agent)
|
|
65
|
+
|
|
66
|
+
## License
|
|
67
|
+
|
|
68
|
+
MIT
|
|
@@ -0,0 +1,966 @@
|
|
|
1
|
+
"""
|
|
2
|
+
auto-evolve: Multi-agent research competition orchestrator for autoresearch.
|
|
3
|
+
|
|
4
|
+
Manages multiple competing autoresearch agents on separate git branches,
|
|
5
|
+
with leaderboard tracking and cross-pollination of winning ideas.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import subprocess
|
|
13
|
+
import sys
|
|
14
|
+
import tempfile
|
|
15
|
+
from dataclasses import dataclass, field, asdict
|
|
16
|
+
from datetime import datetime, timezone
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Generic, Optional, TypeVar, Union
|
|
19
|
+
|
|
20
|
+
import click
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
# Output infrastructure
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
@dataclass(frozen=True)
|
|
28
|
+
class OutputConfig:
|
|
29
|
+
color: bool
|
|
30
|
+
quiet: bool
|
|
31
|
+
def styled(self, text: str, **kwargs) -> str:
|
|
32
|
+
return click.style(text, **kwargs) if self.color else text
|
|
33
|
+
|
|
34
|
+
# Status symbols
|
|
35
|
+
SYM_KEEP = "\u2714" # ✔
|
|
36
|
+
SYM_FAIL = "\u2718" # ✘
|
|
37
|
+
SYM_CRASH = "\u2620" # ☠
|
|
38
|
+
SYM_WARN = "\u26A0" # ⚠
|
|
39
|
+
SYM_ARROW = "\u2192" # →
|
|
40
|
+
SYM_STAR = "\u2605" # ★
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# Result type — all fallible operations return Result instead of raising
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
T = TypeVar("T")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass(frozen=True)
|
|
51
|
+
class Ok(Generic[T]):
|
|
52
|
+
value: T
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def ok(self) -> bool:
|
|
56
|
+
return True
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass(frozen=True)
|
|
60
|
+
class Err:
|
|
61
|
+
error: str
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def ok(self) -> bool:
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
Result = Union[Ok[T], Err]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
# Domain types
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
STRATEGIES: list[dict[str, str]] = [
|
|
76
|
+
{
|
|
77
|
+
"key": "architecture-first",
|
|
78
|
+
"label": "Architecture First",
|
|
79
|
+
"guidance": (
|
|
80
|
+
"Start by exploring model architecture (depth, width, attention patterns, "
|
|
81
|
+
"MLP ratio). Once you find a good architecture, fine-tune hyperparams."
|
|
82
|
+
),
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
"key": "hyperparams-first",
|
|
86
|
+
"label": "Hyperparams First",
|
|
87
|
+
"guidance": (
|
|
88
|
+
"Start by sweeping hyperparameters (learning rates, batch size, "
|
|
89
|
+
"warmup/cooldown). Find optimal training dynamics before changing architecture."
|
|
90
|
+
),
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
"key": "optimizer-first",
|
|
94
|
+
"label": "Optimizer First",
|
|
95
|
+
"guidance": (
|
|
96
|
+
"Start by tuning the optimizer (Muon momentum, ns_steps, AdamW betas, "
|
|
97
|
+
"weight decay schedule). A well-tuned optimizer can unlock gains."
|
|
98
|
+
),
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
"key": "regularization-first",
|
|
102
|
+
"label": "Regularization First",
|
|
103
|
+
"guidance": (
|
|
104
|
+
"Start by exploring regularization (weight decay, dropout, z-loss, softcap "
|
|
105
|
+
"values). Prevent overfitting before scaling up."
|
|
106
|
+
),
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
"key": "efficiency-first",
|
|
110
|
+
"label": "Efficiency First",
|
|
111
|
+
"guidance": (
|
|
112
|
+
"Start by maximizing compute efficiency (larger batch size, better memory "
|
|
113
|
+
"usage, faster iteration). More experiments = more chances."
|
|
114
|
+
),
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
"key": "radical",
|
|
118
|
+
"label": "Radical",
|
|
119
|
+
"guidance": (
|
|
120
|
+
"Try bold, unconventional changes. Large architecture modifications, novel "
|
|
121
|
+
"activation functions, unusual training schedules. Go big or go home."
|
|
122
|
+
),
|
|
123
|
+
},
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@dataclass(frozen=True)
|
|
128
|
+
class Experiment:
|
|
129
|
+
"""A single row from results.tsv."""
|
|
130
|
+
commit: str
|
|
131
|
+
val_bpb: float
|
|
132
|
+
memory_gb: float
|
|
133
|
+
status: str
|
|
134
|
+
description: str
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@dataclass
|
|
138
|
+
class AgentConfig:
|
|
139
|
+
"""Configuration for a single agent in the evolve."""
|
|
140
|
+
id: int
|
|
141
|
+
branch: str
|
|
142
|
+
strategy: str
|
|
143
|
+
status: str = "pending"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@dataclass
|
|
147
|
+
class EvolveConfig:
|
|
148
|
+
"""Root evolve state persisted to evolve.json."""
|
|
149
|
+
tag: str
|
|
150
|
+
base_branch: str
|
|
151
|
+
base_commit: str
|
|
152
|
+
created_at: str
|
|
153
|
+
agents: list[AgentConfig] = field(default_factory=list)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@dataclass(frozen=True)
|
|
157
|
+
class AgentStatus:
|
|
158
|
+
"""Runtime status of an agent derived from its results.tsv."""
|
|
159
|
+
agent: AgentConfig
|
|
160
|
+
experiments: list[Experiment]
|
|
161
|
+
best_val_bpb: Optional[float]
|
|
162
|
+
best_experiment: Optional[Experiment]
|
|
163
|
+
keep_count: int
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# ---------------------------------------------------------------------------
|
|
167
|
+
# Git helpers — thin wrappers around subprocess
|
|
168
|
+
# ---------------------------------------------------------------------------
|
|
169
|
+
|
|
170
|
+
def _run_git(*args: str, check: bool = True, timeout: int = 30) -> Result[str]:
|
|
171
|
+
"""Run a git command and return stdout on success, or an Err on failure."""
|
|
172
|
+
cmd = ["git"] + list(args)
|
|
173
|
+
try:
|
|
174
|
+
proc = subprocess.run(
|
|
175
|
+
cmd,
|
|
176
|
+
capture_output=True,
|
|
177
|
+
text=True,
|
|
178
|
+
check=check,
|
|
179
|
+
timeout=timeout,
|
|
180
|
+
)
|
|
181
|
+
return Ok(proc.stdout.strip())
|
|
182
|
+
except subprocess.TimeoutExpired:
|
|
183
|
+
return Err(f"git {' '.join(args)} timed out after {timeout}s")
|
|
184
|
+
except subprocess.CalledProcessError as exc:
|
|
185
|
+
stderr = exc.stderr.strip() if exc.stderr else str(exc)
|
|
186
|
+
return Err(f"git {' '.join(args)} failed: {stderr}")
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _git_branch_exists(branch: str) -> bool:
|
|
190
|
+
"""Check whether a local branch exists."""
|
|
191
|
+
result = _run_git("rev-parse", "--verify", branch, check=False)
|
|
192
|
+
return result.ok
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _git_current_branch() -> Result[str]:
|
|
196
|
+
"""Return the current branch name."""
|
|
197
|
+
return _run_git("rev-parse", "--abbrev-ref", "HEAD")
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _git_head_sha() -> Result[str]:
|
|
201
|
+
"""Return the short SHA of HEAD."""
|
|
202
|
+
return _run_git("rev-parse", "--short", "HEAD")
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _git_show_file(branch: str, path: str) -> Result[str]:
|
|
206
|
+
"""Read a file from a given branch without checking it out."""
|
|
207
|
+
return _run_git("show", f"{branch}:{path}", check=False)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _git_working_tree_clean() -> bool:
|
|
211
|
+
"""Check that the working tree has no uncommitted changes."""
|
|
212
|
+
result = _run_git("status", "--porcelain", check=False)
|
|
213
|
+
return result.ok and result.value.strip() == ""
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _git_diff_commits(commit_a: str, commit_b: str) -> Result[str]:
|
|
217
|
+
"""Get the diff between two commits."""
|
|
218
|
+
return _run_git("diff", commit_a, commit_b)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _git_log_oneline(branch: str, base_commit: str, max_count: int = 50) -> Result[str]:
|
|
222
|
+
"""Get one-line log of commits on branch since base_commit."""
|
|
223
|
+
return _run_git(
|
|
224
|
+
"log", "--oneline", f"--max-count={max_count}",
|
|
225
|
+
f"{base_commit}..{branch}",
|
|
226
|
+
check=False,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
# ---------------------------------------------------------------------------
|
|
231
|
+
# Evolve config persistence
|
|
232
|
+
# ---------------------------------------------------------------------------
|
|
233
|
+
|
|
234
|
+
EVOLVE_CONFIG_FILE = "evolve.json"
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _evolve_config_path() -> Path:
|
|
238
|
+
"""Return the path to evolve.json in the repo root."""
|
|
239
|
+
result = _run_git("rev-parse", "--show-toplevel")
|
|
240
|
+
if not result.ok:
|
|
241
|
+
return Path(EVOLVE_CONFIG_FILE)
|
|
242
|
+
return Path(result.value) / EVOLVE_CONFIG_FILE
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _load_evolve_config() -> Result[EvolveConfig]:
|
|
246
|
+
"""Load evolve.json from the repo root."""
|
|
247
|
+
path = _evolve_config_path()
|
|
248
|
+
if not path.exists():
|
|
249
|
+
return Err(
|
|
250
|
+
f"No evolve config found at {path}. "
|
|
251
|
+
"Run 'autoevolve init' first."
|
|
252
|
+
)
|
|
253
|
+
try:
|
|
254
|
+
raw = json.loads(path.read_text())
|
|
255
|
+
agents = [
|
|
256
|
+
AgentConfig(
|
|
257
|
+
id=a["id"],
|
|
258
|
+
branch=a["branch"],
|
|
259
|
+
strategy=a["strategy"],
|
|
260
|
+
status=a.get("status", "pending"),
|
|
261
|
+
)
|
|
262
|
+
for a in raw.get("agents", [])
|
|
263
|
+
]
|
|
264
|
+
return Ok(EvolveConfig(
|
|
265
|
+
tag=raw["tag"],
|
|
266
|
+
base_branch=raw["base_branch"],
|
|
267
|
+
base_commit=raw["base_commit"],
|
|
268
|
+
created_at=raw["created_at"],
|
|
269
|
+
agents=agents,
|
|
270
|
+
))
|
|
271
|
+
except (json.JSONDecodeError, KeyError, TypeError, ValueError, AttributeError) as exc:
|
|
272
|
+
return Err(f"Corrupt evolve.json: {exc}")
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _save_evolve_config(config: EvolveConfig) -> Result[None]:
|
|
276
|
+
"""Persist evolve state to evolve.json atomically (not committed to git)."""
|
|
277
|
+
path = _evolve_config_path()
|
|
278
|
+
data = {
|
|
279
|
+
"tag": config.tag,
|
|
280
|
+
"base_branch": config.base_branch,
|
|
281
|
+
"base_commit": config.base_commit,
|
|
282
|
+
"created_at": config.created_at,
|
|
283
|
+
"agents": [asdict(a) for a in config.agents],
|
|
284
|
+
}
|
|
285
|
+
try:
|
|
286
|
+
content = json.dumps(data, indent=2) + "\n"
|
|
287
|
+
tmp_fd, tmp_path = tempfile.mkstemp(dir=path.parent, suffix=".tmp")
|
|
288
|
+
try:
|
|
289
|
+
with os.fdopen(tmp_fd, "w") as f:
|
|
290
|
+
f.write(content)
|
|
291
|
+
Path(tmp_path).replace(path)
|
|
292
|
+
except BaseException:
|
|
293
|
+
Path(tmp_path).unlink(missing_ok=True)
|
|
294
|
+
raise
|
|
295
|
+
return Ok(None)
|
|
296
|
+
except OSError as exc:
|
|
297
|
+
return Err(f"Failed to write evolve.json: {exc}")
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
# ---------------------------------------------------------------------------
|
|
301
|
+
# Results.tsv parsing
|
|
302
|
+
# ---------------------------------------------------------------------------
|
|
303
|
+
|
|
304
|
+
def _parse_results_tsv(raw: str) -> list[Experiment]:
|
|
305
|
+
"""Parse a results.tsv string into a list of Experiment records.
|
|
306
|
+
|
|
307
|
+
Expected header: commit\tval_bpb\tmemory_gb\tstatus\tdescription
|
|
308
|
+
"""
|
|
309
|
+
lines = raw.strip().splitlines()
|
|
310
|
+
experiments: list[Experiment] = []
|
|
311
|
+
|
|
312
|
+
for idx, line in enumerate(lines):
|
|
313
|
+
# Skip empty lines, and skip the first line if it looks like a header
|
|
314
|
+
if not line.strip():
|
|
315
|
+
continue
|
|
316
|
+
if idx == 0 and line.strip().startswith("commit"):
|
|
317
|
+
continue
|
|
318
|
+
parts = line.split("\t")
|
|
319
|
+
if len(parts) < 5:
|
|
320
|
+
continue
|
|
321
|
+
try:
|
|
322
|
+
experiments.append(Experiment(
|
|
323
|
+
commit=parts[0].strip(),
|
|
324
|
+
val_bpb=float(parts[1].strip()),
|
|
325
|
+
memory_gb=float(parts[2].strip()),
|
|
326
|
+
status=parts[3].strip(),
|
|
327
|
+
description="\t".join(parts[4:]).strip(),
|
|
328
|
+
))
|
|
329
|
+
except (ValueError, IndexError):
|
|
330
|
+
# Skip malformed rows
|
|
331
|
+
continue
|
|
332
|
+
|
|
333
|
+
return experiments
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def _read_results_for_agent(agent: AgentConfig) -> str:
|
|
337
|
+
"""Read results.tsv for an agent, trying git first then filesystem."""
|
|
338
|
+
# Try git show first (works if results.tsv is committed)
|
|
339
|
+
result = _git_show_file(agent.branch, "results.tsv")
|
|
340
|
+
if result.ok and result.value.strip():
|
|
341
|
+
return result.value
|
|
342
|
+
|
|
343
|
+
# Fall back to filesystem if this branch is currently checked out
|
|
344
|
+
current = _git_current_branch()
|
|
345
|
+
if current.ok and current.value == agent.branch:
|
|
346
|
+
repo_root = _run_git("rev-parse", "--show-toplevel")
|
|
347
|
+
if repo_root.ok:
|
|
348
|
+
results_path = Path(repo_root.value) / "results.tsv"
|
|
349
|
+
if results_path.exists():
|
|
350
|
+
return results_path.read_text()
|
|
351
|
+
|
|
352
|
+
return ""
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def _get_agent_status(agent: AgentConfig) -> AgentStatus:
|
|
356
|
+
"""Read results.tsv from an agent's branch and compute status."""
|
|
357
|
+
raw = _read_results_for_agent(agent)
|
|
358
|
+
if not raw.strip():
|
|
359
|
+
return AgentStatus(
|
|
360
|
+
agent=agent,
|
|
361
|
+
experiments=[],
|
|
362
|
+
best_val_bpb=None,
|
|
363
|
+
best_experiment=None,
|
|
364
|
+
keep_count=0,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
experiments = _parse_results_tsv(raw)
|
|
368
|
+
keeps = [e for e in experiments if e.status == "keep"]
|
|
369
|
+
valid_keeps = [e for e in experiments if e.status == "keep" and e.val_bpb > 0]
|
|
370
|
+
best = min(valid_keeps, key=lambda e: e.val_bpb) if valid_keeps else None
|
|
371
|
+
|
|
372
|
+
return AgentStatus(
|
|
373
|
+
agent=agent,
|
|
374
|
+
experiments=experiments,
|
|
375
|
+
best_val_bpb=best.val_bpb if best else None,
|
|
376
|
+
best_experiment=best,
|
|
377
|
+
keep_count=len(keeps),
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def _compute_improvements(experiments: list[Experiment]) -> list[tuple[Experiment, float]]:
|
|
382
|
+
"""Find impactful keep experiments by comparing each to the previous best.
|
|
383
|
+
|
|
384
|
+
Each keep is compared against the best val_bpb among all preceding keeps,
|
|
385
|
+
avoiding comparisons to crashed experiments (val_bpb=0.0) or discards.
|
|
386
|
+
"""
|
|
387
|
+
improvements: list[tuple[Experiment, float]] = []
|
|
388
|
+
prev_best: Optional[float] = None
|
|
389
|
+
for exp in experiments:
|
|
390
|
+
if exp.status != "keep" or exp.val_bpb <= 0:
|
|
391
|
+
continue
|
|
392
|
+
if prev_best is not None:
|
|
393
|
+
delta = prev_best - exp.val_bpb
|
|
394
|
+
if delta > 0:
|
|
395
|
+
improvements.append((exp, delta))
|
|
396
|
+
if prev_best is None or exp.val_bpb < prev_best:
|
|
397
|
+
prev_best = exp.val_bpb
|
|
398
|
+
return improvements
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
# ---------------------------------------------------------------------------
|
|
402
|
+
# Program.md generation
|
|
403
|
+
# ---------------------------------------------------------------------------
|
|
404
|
+
|
|
405
|
+
def _generate_program_md(strategy: dict[str, str], agent_id: int, tag: str) -> str:
|
|
406
|
+
"""Generate a program.md variant for an agent with a specific research strategy."""
|
|
407
|
+
return f"""\
|
|
408
|
+
# Autoresearch Program — Evolve {tag}, Agent {agent_id}
|
|
409
|
+
|
|
410
|
+
## Strategy: {strategy['label']}
|
|
411
|
+
|
|
412
|
+
{strategy['guidance']}
|
|
413
|
+
|
|
414
|
+
## Rules
|
|
415
|
+
|
|
416
|
+
1. Modify `train.py` and commit your changes with a clear description.
|
|
417
|
+
2. Run `uv run train.py > run.log 2>&1` — training runs for exactly 5 minutes.
|
|
418
|
+
3. Read results: `grep "^val_bpb:\\|^peak_vram_mb:" run.log`
|
|
419
|
+
4. Record results in `results.tsv` (tab-separated):
|
|
420
|
+
- commit (short hash), val_bpb, memory_gb (peak_vram_mb / 1024), status, description
|
|
421
|
+
5. **Commit results.tsv** after each experiment so evolve tracking works:
|
|
422
|
+
`git add results.tsv && git commit -m "update results"`
|
|
423
|
+
6. If val_bpb improved (lower), set status to `keep` and advance the branch.
|
|
424
|
+
7. If val_bpb is equal or worse, set status to `discard` and `git reset --hard HEAD~1`
|
|
425
|
+
to revert the train.py changes (but keep results.tsv updated).
|
|
426
|
+
8. Repeat indefinitely. Each experiment should build on previous successes.
|
|
427
|
+
|
|
428
|
+
## Hints
|
|
429
|
+
|
|
430
|
+
If an `evolve-hints.md` file exists in the repo root, it contains insights from the
|
|
431
|
+
leading agent in the evolve. Consider incorporating their successful ideas.
|
|
432
|
+
|
|
433
|
+
## Goal
|
|
434
|
+
|
|
435
|
+
Minimize `val_bpb` within the 5-minute time budget per experiment. Lower is better.
|
|
436
|
+
"""
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
# ---------------------------------------------------------------------------
|
|
440
|
+
# CLI commands
|
|
441
|
+
# ---------------------------------------------------------------------------
|
|
442
|
+
|
|
443
|
+
@click.group(epilog="Exit codes: 0 = success, 1 = error")
|
|
444
|
+
@click.version_option(version="1.0.0", prog_name="autoevolve")
|
|
445
|
+
@click.option("--no-color", is_flag=True, default=False, help="Disable colored output")
|
|
446
|
+
@click.option("--quiet", "-q", is_flag=True, default=False, help="Minimal output")
|
|
447
|
+
@click.pass_context
|
|
448
|
+
def cli(ctx: click.Context, no_color: bool, quiet: bool) -> None:
|
|
449
|
+
"""Multi-agent research competition orchestrator for autoresearch."""
|
|
450
|
+
ctx.ensure_object(dict)
|
|
451
|
+
ctx.obj["cfg"] = OutputConfig(
|
|
452
|
+
color=not no_color and sys.stdout.isatty(),
|
|
453
|
+
quiet=quiet,
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
@cli.command()
|
|
458
|
+
@click.option("--agents", "-n", type=int, required=True, help="Number of competing agents")
|
|
459
|
+
@click.option("--base-branch", "-b", type=str, default="main", help="Branch to fork from")
|
|
460
|
+
@click.option("--tag", "-t", type=str, required=True, help="Evolve tag (e.g. mar15)")
|
|
461
|
+
@click.pass_context
|
|
462
|
+
def init(ctx: click.Context, agents: int, base_branch: str, tag: str) -> None:
|
|
463
|
+
"""Initialize a new evolve with N competing agent branches."""
|
|
464
|
+
cfg = ctx.obj["cfg"]
|
|
465
|
+
if agents < 1:
|
|
466
|
+
click.echo("Error: --agents must be at least 1.", err=True)
|
|
467
|
+
sys.exit(1)
|
|
468
|
+
|
|
469
|
+
# Verify we are inside a git repo
|
|
470
|
+
repo_root = _run_git("rev-parse", "--show-toplevel")
|
|
471
|
+
if not repo_root.ok:
|
|
472
|
+
click.echo("Error: not inside a git repository.", err=True)
|
|
473
|
+
sys.exit(1)
|
|
474
|
+
|
|
475
|
+
# Ensure no uncommitted changes before checking out branches
|
|
476
|
+
if not _git_working_tree_clean():
|
|
477
|
+
click.echo("Error: working tree has uncommitted changes. Commit or stash first.", err=True)
|
|
478
|
+
sys.exit(1)
|
|
479
|
+
|
|
480
|
+
# Verify base branch exists
|
|
481
|
+
if not _git_branch_exists(base_branch):
|
|
482
|
+
click.echo(f"Error: base branch '{base_branch}' does not exist.", err=True)
|
|
483
|
+
sys.exit(1)
|
|
484
|
+
|
|
485
|
+
# Check for existing evolve config
|
|
486
|
+
config_path = _evolve_config_path()
|
|
487
|
+
if config_path.exists():
|
|
488
|
+
click.echo(
|
|
489
|
+
f"Error: evolve.json already exists at {config_path}. "
|
|
490
|
+
"Remove it first or use a different repo.",
|
|
491
|
+
err=True,
|
|
492
|
+
)
|
|
493
|
+
sys.exit(1)
|
|
494
|
+
|
|
495
|
+
# Get base commit
|
|
496
|
+
base_sha = _run_git("rev-parse", "--short", base_branch)
|
|
497
|
+
if not base_sha.ok:
|
|
498
|
+
click.echo(f"Error: could not resolve base branch: {base_sha.error}", err=True)
|
|
499
|
+
sys.exit(1)
|
|
500
|
+
|
|
501
|
+
# Remember current branch to return to it
|
|
502
|
+
current_branch = _git_current_branch()
|
|
503
|
+
if not current_branch.ok:
|
|
504
|
+
click.echo(f"Error: {current_branch.error}", err=True)
|
|
505
|
+
sys.exit(1)
|
|
506
|
+
|
|
507
|
+
agent_configs: list[AgentConfig] = []
|
|
508
|
+
created_branches: list[str] = []
|
|
509
|
+
|
|
510
|
+
try:
|
|
511
|
+
for i in range(1, agents + 1):
|
|
512
|
+
strategy = STRATEGIES[(i - 1) % len(STRATEGIES)]
|
|
513
|
+
branch_name = f"evolve/{tag}-agent-{i}"
|
|
514
|
+
|
|
515
|
+
# Check if branch already exists
|
|
516
|
+
if _git_branch_exists(branch_name):
|
|
517
|
+
click.echo(f"Error: branch '{branch_name}' already exists.", err=True)
|
|
518
|
+
sys.exit(1)
|
|
519
|
+
|
|
520
|
+
# Create branch from base
|
|
521
|
+
result = _run_git("checkout", "-b", branch_name, base_branch)
|
|
522
|
+
if not result.ok:
|
|
523
|
+
click.echo(f"Error creating branch {branch_name}: {result.error}", err=True)
|
|
524
|
+
sys.exit(1)
|
|
525
|
+
created_branches.append(branch_name)
|
|
526
|
+
|
|
527
|
+
# Write program.md
|
|
528
|
+
program_content = _generate_program_md(strategy, i, tag)
|
|
529
|
+
program_path = Path(repo_root.value) / "program.md"
|
|
530
|
+
program_path.write_text(program_content)
|
|
531
|
+
|
|
532
|
+
# Create an initial empty results.tsv with header
|
|
533
|
+
results_path = Path(repo_root.value) / "results.tsv"
|
|
534
|
+
if not results_path.exists():
|
|
535
|
+
results_path.write_text("commit\tval_bpb\tmemory_gb\tstatus\tdescription\n")
|
|
536
|
+
|
|
537
|
+
# Commit program.md and results.tsv to the branch
|
|
538
|
+
_run_git("add", "program.md", "results.tsv")
|
|
539
|
+
commit_result = _run_git(
|
|
540
|
+
"commit", "-m",
|
|
541
|
+
f"evolve({tag}): initialize agent {i} with {strategy['key']} strategy",
|
|
542
|
+
)
|
|
543
|
+
if not commit_result.ok:
|
|
544
|
+
click.echo(f"Warning: commit on {branch_name}: {commit_result.error}", err=True)
|
|
545
|
+
|
|
546
|
+
agent_configs.append(AgentConfig(
|
|
547
|
+
id=i,
|
|
548
|
+
branch=branch_name,
|
|
549
|
+
strategy=strategy["key"],
|
|
550
|
+
status="pending",
|
|
551
|
+
))
|
|
552
|
+
except SystemExit:
|
|
553
|
+
# Clean up created branches on failure
|
|
554
|
+
_run_git("checkout", current_branch.value, check=False)
|
|
555
|
+
for branch in created_branches:
|
|
556
|
+
_run_git("branch", "-D", branch, check=False)
|
|
557
|
+
raise
|
|
558
|
+
finally:
|
|
559
|
+
# Always return to original branch
|
|
560
|
+
_run_git("checkout", current_branch.value, check=False)
|
|
561
|
+
|
|
562
|
+
# Save evolve config (not committed)
|
|
563
|
+
evolve = EvolveConfig(
|
|
564
|
+
tag=tag,
|
|
565
|
+
base_branch=base_branch,
|
|
566
|
+
base_commit=base_sha.value,
|
|
567
|
+
created_at=datetime.now(timezone.utc).isoformat(timespec="seconds"),
|
|
568
|
+
agents=agent_configs,
|
|
569
|
+
)
|
|
570
|
+
save_result = _save_evolve_config(evolve)
|
|
571
|
+
if not save_result.ok:
|
|
572
|
+
click.echo(f"Error saving config: {save_result.error}", err=True)
|
|
573
|
+
sys.exit(1)
|
|
574
|
+
|
|
575
|
+
# Print summary
|
|
576
|
+
click.echo(f"\n== {cfg.styled('autoevolve', fg='cyan', bold=True)} initialized ==")
|
|
577
|
+
click.echo(f"Evolve: {tag} | Agents: {agents} | Base: {base_branch} ({base_sha.value})")
|
|
578
|
+
click.echo()
|
|
579
|
+
for ac in agent_configs:
|
|
580
|
+
strategy_info = next(s for s in STRATEGIES if s["key"] == ac.strategy)
|
|
581
|
+
click.echo(f" Agent {ac.id}: {ac.branch} ({strategy_info['label']})")
|
|
582
|
+
click.echo()
|
|
583
|
+
click.echo("To start each agent, check out its branch and run your autoresearch agent:")
|
|
584
|
+
click.echo()
|
|
585
|
+
for ac in agent_configs:
|
|
586
|
+
click.echo(f" git checkout {ac.branch}")
|
|
587
|
+
click.echo(f" # Start your AI agent here (e.g., claude, codex, gemini)")
|
|
588
|
+
click.echo()
|
|
589
|
+
click.echo(f"Monitor progress with: autoevolve status")
|
|
590
|
+
click.echo(f"Cross-pollinate ideas with: autoevolve pollinate")
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
@cli.command()
|
|
594
|
+
@click.pass_context
|
|
595
|
+
def status(ctx: click.Context) -> None:
|
|
596
|
+
"""Show current evolve status and quick leaderboard."""
|
|
597
|
+
cfg = ctx.obj["cfg"]
|
|
598
|
+
|
|
599
|
+
config_result = _load_evolve_config()
|
|
600
|
+
if not config_result.ok:
|
|
601
|
+
click.echo(f"Error: {config_result.error}", err=True)
|
|
602
|
+
sys.exit(1)
|
|
603
|
+
|
|
604
|
+
config = config_result.value
|
|
605
|
+
statuses = [_get_agent_status(agent) for agent in config.agents]
|
|
606
|
+
|
|
607
|
+
# Find overall leader
|
|
608
|
+
agents_with_results = [s for s in statuses if s.best_val_bpb is not None]
|
|
609
|
+
leader = min(agents_with_results, key=lambda s: s.best_val_bpb) if agents_with_results else None
|
|
610
|
+
|
|
611
|
+
# Quiet mode
|
|
612
|
+
if cfg.quiet:
|
|
613
|
+
if leader and leader.best_experiment:
|
|
614
|
+
total_exps = sum(len(s.experiments) for s in statuses)
|
|
615
|
+
click.echo(
|
|
616
|
+
f"Leader: Agent {leader.agent.id}, "
|
|
617
|
+
f"best: {leader.best_val_bpb:.6f} "
|
|
618
|
+
f"({total_exps} experiments)"
|
|
619
|
+
)
|
|
620
|
+
else:
|
|
621
|
+
click.echo("No results yet.")
|
|
622
|
+
return
|
|
623
|
+
|
|
624
|
+
click.echo(f"\n== {cfg.styled('autoevolve', fg='cyan', bold=True)} status ==")
|
|
625
|
+
click.echo(
|
|
626
|
+
f"Evolve: {config.tag} | Agents: {len(config.agents)} | "
|
|
627
|
+
f"Started: {config.created_at}"
|
|
628
|
+
)
|
|
629
|
+
click.echo()
|
|
630
|
+
|
|
631
|
+
for s in statuses:
|
|
632
|
+
strategy_info = next(
|
|
633
|
+
(st for st in STRATEGIES if st["key"] == s.agent.strategy),
|
|
634
|
+
{"label": s.agent.strategy},
|
|
635
|
+
)
|
|
636
|
+
bpb_str = f"{s.best_val_bpb:.6f}" if s.best_val_bpb is not None else "N/A"
|
|
637
|
+
marker = (
|
|
638
|
+
f" {cfg.styled(SYM_STAR + ' LEADER', fg='yellow', bold=True)}"
|
|
639
|
+
if (leader and s is leader)
|
|
640
|
+
else ""
|
|
641
|
+
)
|
|
642
|
+
click.echo(
|
|
643
|
+
f"Agent {s.agent.id} ({strategy_info['label']}): "
|
|
644
|
+
f"{len(s.experiments)} experiments, "
|
|
645
|
+
f"best val_bpb: {bpb_str}, "
|
|
646
|
+
f"{s.keep_count} keeps{marker}"
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
if leader and leader.best_experiment:
|
|
650
|
+
click.echo()
|
|
651
|
+
exp = leader.best_experiment
|
|
652
|
+
click.echo(
|
|
653
|
+
f"Overall best: Agent {leader.agent.id} at {exp.val_bpb:.6f} "
|
|
654
|
+
f'("{exp.description}")'
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
if not agents_with_results:
|
|
658
|
+
click.echo("\nNo experiment results found yet. Agents may not have started.")
|
|
659
|
+
|
|
660
|
+
click.echo()
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
@cli.command()
|
|
664
|
+
@click.option("--detailed", is_flag=True, help="Show detailed per-agent trajectory")
|
|
665
|
+
@click.pass_context
|
|
666
|
+
def leaderboard(ctx: click.Context, detailed: bool) -> None:
|
|
667
|
+
"""Show detailed leaderboard comparison across all agents."""
|
|
668
|
+
cfg = ctx.obj["cfg"]
|
|
669
|
+
|
|
670
|
+
config_result = _load_evolve_config()
|
|
671
|
+
if not config_result.ok:
|
|
672
|
+
click.echo(f"Error: {config_result.error}", err=True)
|
|
673
|
+
sys.exit(1)
|
|
674
|
+
|
|
675
|
+
config = config_result.value
|
|
676
|
+
statuses = [_get_agent_status(agent) for agent in config.agents]
|
|
677
|
+
|
|
678
|
+
ranked = sorted(
|
|
679
|
+
statuses,
|
|
680
|
+
key=lambda s: s.best_val_bpb if s.best_val_bpb is not None else float("inf"),
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
click.echo(f"\n== {cfg.styled('autoevolve', fg='cyan', bold=True)} leaderboard ==")
|
|
684
|
+
click.echo(
|
|
685
|
+
f"Evolve: {config.tag} | Agents: {len(config.agents)} | "
|
|
686
|
+
f"Base: {config.base_branch} ({config.base_commit})"
|
|
687
|
+
)
|
|
688
|
+
click.echo()
|
|
689
|
+
|
|
690
|
+
header = f"{'Rank':<6}{'Agent':<10}{'Strategy':<25}{'Best BPB':<12}{'Exps':<8}{'Keeps':<8}{'Keep %':<8}"
|
|
691
|
+
click.echo(cfg.styled(header, dim=True))
|
|
692
|
+
click.echo("-" * 77)
|
|
693
|
+
|
|
694
|
+
for rank, s in enumerate(ranked, 1):
|
|
695
|
+
strategy_info = next(
|
|
696
|
+
(st for st in STRATEGIES if st["key"] == s.agent.strategy),
|
|
697
|
+
{"label": s.agent.strategy},
|
|
698
|
+
)
|
|
699
|
+
bpb_str = f"{s.best_val_bpb:.6f}" if s.best_val_bpb is not None else "N/A"
|
|
700
|
+
total = len(s.experiments)
|
|
701
|
+
keep_pct = f"{(s.keep_count / total * 100):.0f}%" if total > 0 else "N/A"
|
|
702
|
+
click.echo(
|
|
703
|
+
f"{rank:<6}{s.agent.id:<10}{strategy_info['label']:<25}"
|
|
704
|
+
f"{bpb_str:<12}{total:<8}{s.keep_count:<8}{keep_pct:<8}"
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
if detailed:
|
|
708
|
+
click.echo()
|
|
709
|
+
click.echo("=" * 77)
|
|
710
|
+
click.echo("DETAILED TRAJECTORIES")
|
|
711
|
+
click.echo("=" * 77)
|
|
712
|
+
|
|
713
|
+
for s in ranked:
|
|
714
|
+
strategy_info = next(
|
|
715
|
+
(st for st in STRATEGIES if st["key"] == s.agent.strategy),
|
|
716
|
+
{"label": s.agent.strategy},
|
|
717
|
+
)
|
|
718
|
+
click.echo(
|
|
719
|
+
f"\n--- Agent {s.agent.id}: {strategy_info['label']} "
|
|
720
|
+
f"({s.agent.branch}) ---"
|
|
721
|
+
)
|
|
722
|
+
|
|
723
|
+
if not s.experiments:
|
|
724
|
+
click.echo(" No experiments yet.")
|
|
725
|
+
continue
|
|
726
|
+
|
|
727
|
+
traj_header = f" {'#':<4}{'Commit':<10}{'val_bpb':<12}{'Status':<8}{'Best So Far':<14}{'Description'}"
|
|
728
|
+
click.echo(cfg.styled(traj_header, dim=True))
|
|
729
|
+
click.echo(f" {'-' * 72}")
|
|
730
|
+
|
|
731
|
+
running_best = float("inf")
|
|
732
|
+
for idx, exp in enumerate(s.experiments, 1):
|
|
733
|
+
is_new_best = exp.val_bpb < running_best
|
|
734
|
+
if is_new_best:
|
|
735
|
+
running_best = exp.val_bpb
|
|
736
|
+
marker = f" {cfg.styled(SYM_KEEP, fg='green')}" if is_new_best else ""
|
|
737
|
+
click.echo(
|
|
738
|
+
f" {idx:<4}{exp.commit:<10}{exp.val_bpb:<12.6f}"
|
|
739
|
+
f"{exp.status:<8}{running_best:<14.6f}{exp.description}{marker}"
|
|
740
|
+
)
|
|
741
|
+
|
|
742
|
+
click.echo()
|
|
743
|
+
click.echo("=" * 77)
|
|
744
|
+
click.echo("STRATEGY EFFECTIVENESS")
|
|
745
|
+
click.echo("=" * 77)
|
|
746
|
+
click.echo()
|
|
747
|
+
|
|
748
|
+
for s in ranked:
|
|
749
|
+
strategy_info = next(
|
|
750
|
+
(st for st in STRATEGIES if st["key"] == s.agent.strategy),
|
|
751
|
+
{"label": s.agent.strategy},
|
|
752
|
+
)
|
|
753
|
+
computed = _compute_improvements(s.experiments)
|
|
754
|
+
if not computed:
|
|
755
|
+
continue
|
|
756
|
+
|
|
757
|
+
deltas = [delta for _, delta in computed]
|
|
758
|
+
avg_improvement = sum(deltas) / len(deltas)
|
|
759
|
+
best_improvement = max(deltas)
|
|
760
|
+
click.echo(
|
|
761
|
+
f" {strategy_info['label']}: "
|
|
762
|
+
f"avg improvement per keep: {avg_improvement:.6f}, "
|
|
763
|
+
f"best single improvement: {best_improvement:.6f}"
|
|
764
|
+
)
|
|
765
|
+
|
|
766
|
+
click.echo()
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
def _find_impactful_experiments(status: AgentStatus) -> list[tuple[Experiment, float]]:
|
|
770
|
+
"""Find the most impactful keep experiments for an agent, sorted by delta."""
|
|
771
|
+
improvements = _compute_improvements(status.experiments)
|
|
772
|
+
improvements.sort(key=lambda pair: pair[1], reverse=True)
|
|
773
|
+
return improvements
|
|
774
|
+
|
|
775
|
+
|
|
776
|
+
def _build_hints_content(
|
|
777
|
+
config: EvolveConfig,
|
|
778
|
+
leader: AgentStatus,
|
|
779
|
+
impactful: list[tuple[Experiment, float]],
|
|
780
|
+
) -> str:
|
|
781
|
+
"""Build the evolve-hints.md content from leader data."""
|
|
782
|
+
leader_strategy = next(
|
|
783
|
+
(st for st in STRATEGIES if st["key"] == leader.agent.strategy),
|
|
784
|
+
{"label": leader.agent.strategy},
|
|
785
|
+
)
|
|
786
|
+
lines: list[str] = [
|
|
787
|
+
f"# Hints from Evolve {config.tag}",
|
|
788
|
+
f"",
|
|
789
|
+
f"Generated by `auto-evolve pollinate` at "
|
|
790
|
+
f"{datetime.now(timezone.utc).isoformat(timespec='seconds')}",
|
|
791
|
+
f"",
|
|
792
|
+
f"## Leading Agent",
|
|
793
|
+
f"",
|
|
794
|
+
f"Agent {leader.agent.id} ({leader_strategy['label']}) is currently leading "
|
|
795
|
+
f"with best val_bpb: {leader.best_val_bpb:.6f}",
|
|
796
|
+
f"",
|
|
797
|
+
f"## Most Impactful Experiments",
|
|
798
|
+
f"",
|
|
799
|
+
]
|
|
800
|
+
|
|
801
|
+
# Include up to top 5 most impactful experiments
|
|
802
|
+
top_n = min(5, len(impactful))
|
|
803
|
+
if top_n == 0:
|
|
804
|
+
lines.append("No impactful improvements detected yet.")
|
|
805
|
+
lines.append("")
|
|
806
|
+
else:
|
|
807
|
+
for rank, (exp, delta) in enumerate(impactful[:top_n], 1):
|
|
808
|
+
lines.append(f"### {rank}. {exp.description}")
|
|
809
|
+
lines.append(f"")
|
|
810
|
+
lines.append(f"- **Commit**: {exp.commit}")
|
|
811
|
+
lines.append(f"- **val_bpb**: {exp.val_bpb:.6f}")
|
|
812
|
+
lines.append(f"- **Improvement**: {delta:.6f}")
|
|
813
|
+
lines.append(f"- **Memory**: {exp.memory_gb:.1f} GB")
|
|
814
|
+
lines.append(f"")
|
|
815
|
+
|
|
816
|
+
# Try to get the diff for this commit
|
|
817
|
+
diff_result = _run_git(
|
|
818
|
+
"diff", f"{exp.commit}~1", exp.commit,
|
|
819
|
+
"--", "train.py",
|
|
820
|
+
check=False,
|
|
821
|
+
)
|
|
822
|
+
if diff_result.ok and diff_result.value.strip():
|
|
823
|
+
lines.append(f"<details><summary>Code changes</summary>")
|
|
824
|
+
lines.append(f"")
|
|
825
|
+
lines.append(f"```diff")
|
|
826
|
+
lines.append(diff_result.value)
|
|
827
|
+
lines.append(f"```")
|
|
828
|
+
lines.append(f"")
|
|
829
|
+
lines.append(f"</details>")
|
|
830
|
+
lines.append(f"")
|
|
831
|
+
|
|
832
|
+
lines.append("## Suggestion")
|
|
833
|
+
lines.append("")
|
|
834
|
+
lines.append(
|
|
835
|
+
"Consider incorporating the above successful changes into your experiments. "
|
|
836
|
+
"These modifications produced measurable improvements in val_bpb."
|
|
837
|
+
)
|
|
838
|
+
lines.append("")
|
|
839
|
+
|
|
840
|
+
return "\n".join(lines)
|
|
841
|
+
|
|
842
|
+
|
|
843
|
+
@cli.command()
|
|
844
|
+
@click.pass_context
|
|
845
|
+
def pollinate(ctx: click.Context) -> None:
|
|
846
|
+
"""Cross-pollinate: share winning ideas from the best agent with all others."""
|
|
847
|
+
cfg = ctx.obj["cfg"]
|
|
848
|
+
config_result = _load_evolve_config()
|
|
849
|
+
if not config_result.ok:
|
|
850
|
+
click.echo(f"Error: {config_result.error}", err=True)
|
|
851
|
+
sys.exit(1)
|
|
852
|
+
|
|
853
|
+
config = config_result.value
|
|
854
|
+
statuses = [_get_agent_status(agent) for agent in config.agents]
|
|
855
|
+
|
|
856
|
+
# Find the leader
|
|
857
|
+
agents_with_results = [s for s in statuses if s.best_val_bpb is not None]
|
|
858
|
+
if not agents_with_results:
|
|
859
|
+
click.echo("No experiment results found yet. Nothing to pollinate.", err=True)
|
|
860
|
+
sys.exit(1)
|
|
861
|
+
|
|
862
|
+
leader = min(agents_with_results, key=lambda s: s.best_val_bpb)
|
|
863
|
+
|
|
864
|
+
# Find the leader's most impactful "keep" experiments
|
|
865
|
+
impactful = _find_impactful_experiments(leader)
|
|
866
|
+
|
|
867
|
+
# Build hints content
|
|
868
|
+
hints_content = _build_hints_content(config, leader, impactful)
|
|
869
|
+
|
|
870
|
+
# Write to repo root as an untracked file (no checkout needed!)
|
|
871
|
+
repo_root = _run_git("rev-parse", "--show-toplevel")
|
|
872
|
+
if not repo_root.ok:
|
|
873
|
+
click.echo(f"Error: {repo_root.error}", err=True)
|
|
874
|
+
sys.exit(1)
|
|
875
|
+
|
|
876
|
+
hints_path = Path(repo_root.value) / "evolve-hints.md"
|
|
877
|
+
hints_path.write_text(hints_content)
|
|
878
|
+
|
|
879
|
+
click.echo(f"Hints from Agent {leader.agent.id} written to {hints_path}")
|
|
880
|
+
click.echo("All agents can read this file regardless of their branch.")
|
|
881
|
+
click.echo()
|
|
882
|
+
|
|
883
|
+
|
|
884
|
+
@cli.command()
|
|
885
|
+
@click.option(
|
|
886
|
+
"--format", "fmt",
|
|
887
|
+
type=click.Choice(["json", "tsv"]),
|
|
888
|
+
default="json",
|
|
889
|
+
help="Export format",
|
|
890
|
+
)
|
|
891
|
+
@click.option("--output", "-o", type=click.Path(), default=None, help="Output file path")
|
|
892
|
+
@click.pass_context
|
|
893
|
+
def export(ctx: click.Context, fmt: str, output: Optional[str]) -> None:
|
|
894
|
+
"""Export all agent results to a single file for external analysis."""
|
|
895
|
+
cfg = ctx.obj["cfg"]
|
|
896
|
+
config_result = _load_evolve_config()
|
|
897
|
+
if not config_result.ok:
|
|
898
|
+
click.echo(f"Error: {config_result.error}", err=True)
|
|
899
|
+
sys.exit(1)
|
|
900
|
+
|
|
901
|
+
config = config_result.value
|
|
902
|
+
statuses = [_get_agent_status(agent) for agent in config.agents]
|
|
903
|
+
|
|
904
|
+
if fmt == "json":
|
|
905
|
+
data = {
|
|
906
|
+
"evolve": config.tag,
|
|
907
|
+
"base_branch": config.base_branch,
|
|
908
|
+
"base_commit": config.base_commit,
|
|
909
|
+
"created_at": config.created_at,
|
|
910
|
+
"exported_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
|
|
911
|
+
"agents": [],
|
|
912
|
+
}
|
|
913
|
+
for s in statuses:
|
|
914
|
+
agent_data = {
|
|
915
|
+
"id": s.agent.id,
|
|
916
|
+
"branch": s.agent.branch,
|
|
917
|
+
"strategy": s.agent.strategy,
|
|
918
|
+
"best_val_bpb": s.best_val_bpb,
|
|
919
|
+
"keep_count": s.keep_count,
|
|
920
|
+
"total_experiments": len(s.experiments),
|
|
921
|
+
"experiments": [
|
|
922
|
+
{
|
|
923
|
+
"commit": e.commit,
|
|
924
|
+
"val_bpb": e.val_bpb,
|
|
925
|
+
"memory_gb": e.memory_gb,
|
|
926
|
+
"status": e.status,
|
|
927
|
+
"description": e.description,
|
|
928
|
+
}
|
|
929
|
+
for e in s.experiments
|
|
930
|
+
],
|
|
931
|
+
}
|
|
932
|
+
data["agents"].append(agent_data)
|
|
933
|
+
|
|
934
|
+
content = json.dumps(data, indent=2) + "\n"
|
|
935
|
+
|
|
936
|
+
elif fmt == "tsv":
|
|
937
|
+
header = "agent_id\tagent_strategy\tcommit\tval_bpb\tmemory_gb\tstatus\tdescription\n"
|
|
938
|
+
rows: list[str] = [header]
|
|
939
|
+
for s in statuses:
|
|
940
|
+
for e in s.experiments:
|
|
941
|
+
rows.append(
|
|
942
|
+
f"{s.agent.id}\t{s.agent.strategy}\t{e.commit}\t"
|
|
943
|
+
f"{e.val_bpb}\t{e.memory_gb}\t{e.status}\t{e.description}\n"
|
|
944
|
+
)
|
|
945
|
+
content = "".join(rows)
|
|
946
|
+
else:
|
|
947
|
+
click.echo(f"Error: unsupported format '{fmt}'.", err=True)
|
|
948
|
+
sys.exit(1)
|
|
949
|
+
|
|
950
|
+
if output:
|
|
951
|
+
Path(output).write_text(content)
|
|
952
|
+
click.echo(f"Exported {fmt.upper()} to {output}")
|
|
953
|
+
else:
|
|
954
|
+
click.echo(content, nl=False)
|
|
955
|
+
|
|
956
|
+
|
|
957
|
+
# ---------------------------------------------------------------------------
|
|
958
|
+
# Entry point
|
|
959
|
+
# ---------------------------------------------------------------------------
|
|
960
|
+
|
|
961
|
+
def main() -> None:
|
|
962
|
+
cli()
|
|
963
|
+
|
|
964
|
+
|
|
965
|
+
if __name__ == "__main__":
|
|
966
|
+
main()
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "autoevolve"
|
|
3
|
+
version = "1.0.0"
|
|
4
|
+
description = "Multi-agent research competition orchestrator for autoresearch"
|
|
5
|
+
requires-python = ">=3.10"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
keywords = ["autoresearch", "karpathy", "gpt", "pretraining", "multi-agent"]
|
|
9
|
+
classifiers = [
|
|
10
|
+
"Development Status :: 4 - Beta",
|
|
11
|
+
"Environment :: Console",
|
|
12
|
+
"Intended Audience :: Science/Research",
|
|
13
|
+
"License :: OSI Approved :: MIT License",
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Programming Language :: Python :: 3.10",
|
|
16
|
+
"Programming Language :: Python :: 3.11",
|
|
17
|
+
"Programming Language :: Python :: 3.12",
|
|
18
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
19
|
+
]
|
|
20
|
+
dependencies = ["click>=8.0"]
|
|
21
|
+
|
|
22
|
+
[project.scripts]
|
|
23
|
+
autoevolve = "auto_evolve:main"
|
|
24
|
+
|
|
25
|
+
[project.urls]
|
|
26
|
+
Homepage = "https://github.com/dean0x/autolab"
|
|
27
|
+
Repository = "https://github.com/dean0x/autolab"
|
|
28
|
+
Issues = "https://github.com/dean0x/autolab/issues"
|
|
29
|
+
|
|
30
|
+
[build-system]
|
|
31
|
+
requires = ["hatchling"]
|
|
32
|
+
build-backend = "hatchling.build"
|
|
33
|
+
|
|
34
|
+
[tool.hatch.build.targets.wheel]
|
|
35
|
+
packages = ["auto_evolve.py"]
|