autoevolve 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ *.egg
7
+ dist/
8
+ build/
9
+ *.whl
10
+
11
+ # Environment variables
12
+ .env
13
+
14
+ # Virtual environments
15
+ .venv/
16
+ venv/
17
+ env/
18
+
19
+ # IDE
20
+ .idea/
21
+ .vscode/
22
+ *.swp
23
+ *.swo
24
+ *~
25
+ .DS_Store
26
+
27
+ # Testing
28
+ .pytest_cache/
29
+ .coverage
30
+ htmlcov/
31
+ .mypy_cache/
32
+
33
+ # Claude Code working memory
34
+ .memory/
35
+
36
+ # Distribution
37
+ *.tar.gz
38
+ .docs/
@@ -0,0 +1,90 @@
1
+ Metadata-Version: 2.4
2
+ Name: autoevolve
3
+ Version: 1.0.0
4
+ Summary: Multi-agent research competition orchestrator for autoresearch
5
+ Project-URL: Homepage, https://github.com/dean0x/autolab
6
+ Project-URL: Repository, https://github.com/dean0x/autolab
7
+ Project-URL: Issues, https://github.com/dean0x/autolab/issues
8
+ License-Expression: MIT
9
+ Keywords: autoresearch,gpt,karpathy,multi-agent,pretraining
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Requires-Python: >=3.10
20
+ Requires-Dist: click>=8.0
21
+ Description-Content-Type: text/markdown
22
+
23
+ # autoevolve
24
+
25
+ Multi-agent research competition orchestrator for [autoresearch](https://github.com/karpathy/autoresearch). Run parallel AI agents with different strategies and cross-pollinate winning ideas.
26
+
27
+ ## Install
28
+
29
+ ```bash
30
+ pip install autoevolve
31
+ ```
32
+
33
+ ## Usage
34
+
35
+ ```bash
36
+ # Initialize a 3-agent competition
37
+ autoevolve init --agents 3 --tag mar15
38
+
39
+ # Check who's winning
40
+ autoevolve status
41
+ autoevolve leaderboard --detailed
42
+
43
+ # Spread winning ideas to all agents
44
+ autoevolve pollinate
45
+
46
+ # Export results
47
+ autoevolve export --format json -o evolve-results.json
48
+ ```
49
+
50
+ ## How It Works
51
+
52
+ 1. **init** creates one git branch per agent, each with a different research strategy
53
+ 2. Each agent works independently on its branch using autojudge + autosteer
54
+ 3. **leaderboard** ranks agents by best val_bpb with keep rate tracking
55
+ 4. **pollinate** writes the leader's best experiments to `evolve-hints.md` — readable from any branch
56
+ 5. Agents incorporate hints and continue competing
57
+
58
+ ## Built-in Strategies
59
+
60
+ | Strategy | Approach |
61
+ |----------|----------|
62
+ | Architecture First | Explore model structure before tuning |
63
+ | Hyperparams First | Sweep learning rates and schedules first |
64
+ | Optimizer First | Tune Muon/Adam parameters first |
65
+ | Regularization First | Explore weight decay, dropout, z-loss |
66
+ | Efficiency First | Maximize compute efficiency to run more experiments |
67
+ | Radical | Bold, unconventional changes |
68
+
69
+ Strategies are assigned round-robin. With 3 agents, you get 3 different strategies competing.
70
+
71
+ ## Commands
72
+
73
+ | Command | Description |
74
+ |---------|-------------|
75
+ | `autoevolve init --agents N --tag TAG` | Create N agent branches |
76
+ | `autoevolve status` | Quick overview with current leader |
77
+ | `autoevolve leaderboard` | Ranked table with keep rates |
78
+ | `autoevolve leaderboard --detailed` | Full trajectories + strategy effectiveness |
79
+ | `autoevolve pollinate` | Cross-pollinate winning ideas |
80
+ | `autoevolve export --format json\|tsv` | Export results for analysis |
81
+
82
+ ## Requirements
83
+
84
+ - Python >= 3.10
85
+ - A git repository with autoresearch set up
86
+ - Multiple compute environments (one per agent)
87
+
88
+ ## License
89
+
90
+ MIT
@@ -0,0 +1,68 @@
1
+ # autoevolve
2
+
3
+ Multi-agent research competition orchestrator for [autoresearch](https://github.com/karpathy/autoresearch). Run parallel AI agents with different strategies and cross-pollinate winning ideas.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install autoevolve
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```bash
14
+ # Initialize a 3-agent competition
15
+ autoevolve init --agents 3 --tag mar15
16
+
17
+ # Check who's winning
18
+ autoevolve status
19
+ autoevolve leaderboard --detailed
20
+
21
+ # Spread winning ideas to all agents
22
+ autoevolve pollinate
23
+
24
+ # Export results
25
+ autoevolve export --format json -o evolve-results.json
26
+ ```
27
+
28
+ ## How It Works
29
+
30
+ 1. **init** creates one git branch per agent, each with a different research strategy
31
+ 2. Each agent works independently on its branch using autojudge + autosteer
32
+ 3. **leaderboard** ranks agents by best val_bpb with keep rate tracking
33
+ 4. **pollinate** writes the leader's best experiments to `evolve-hints.md` — readable from any branch
34
+ 5. Agents incorporate hints and continue competing
35
+
36
+ ## Built-in Strategies
37
+
38
+ | Strategy | Approach |
39
+ |----------|----------|
40
+ | Architecture First | Explore model structure before tuning |
41
+ | Hyperparams First | Sweep learning rates and schedules first |
42
+ | Optimizer First | Tune Muon/Adam parameters first |
43
+ | Regularization First | Explore weight decay, dropout, z-loss |
44
+ | Efficiency First | Maximize compute efficiency to run more experiments |
45
+ | Radical | Bold, unconventional changes |
46
+
47
+ Strategies are assigned round-robin. With 3 agents, you get 3 different strategies competing.
48
+
49
+ ## Commands
50
+
51
+ | Command | Description |
52
+ |---------|-------------|
53
+ | `autoevolve init --agents N --tag TAG` | Create N agent branches |
54
+ | `autoevolve status` | Quick overview with current leader |
55
+ | `autoevolve leaderboard` | Ranked table with keep rates |
56
+ | `autoevolve leaderboard --detailed` | Full trajectories + strategy effectiveness |
57
+ | `autoevolve pollinate` | Cross-pollinate winning ideas |
58
+ | `autoevolve export --format json\|tsv` | Export results for analysis |
59
+
60
+ ## Requirements
61
+
62
+ - Python >= 3.10
63
+ - A git repository with autoresearch set up
64
+ - Multiple compute environments (one per agent)
65
+
66
+ ## License
67
+
68
+ MIT
@@ -0,0 +1,966 @@
1
+ """
2
+ auto-evolve: Multi-agent research competition orchestrator for autoresearch.
3
+
4
+ Manages multiple competing autoresearch agents on separate git branches,
5
+ with leaderboard tracking and cross-pollination of winning ideas.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import os
12
+ import subprocess
13
+ import sys
14
+ import tempfile
15
+ from dataclasses import dataclass, field, asdict
16
+ from datetime import datetime, timezone
17
+ from pathlib import Path
18
+ from typing import Generic, Optional, TypeVar, Union
19
+
20
+ import click
21
+
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Output infrastructure
25
+ # ---------------------------------------------------------------------------
26
+
27
+ @dataclass(frozen=True)
28
+ class OutputConfig:
29
+ color: bool
30
+ quiet: bool
31
+ def styled(self, text: str, **kwargs) -> str:
32
+ return click.style(text, **kwargs) if self.color else text
33
+
34
+ # Status symbols
35
+ SYM_KEEP = "\u2714" # ✔
36
+ SYM_FAIL = "\u2718" # ✘
37
+ SYM_CRASH = "\u2620" # ☠
38
+ SYM_WARN = "\u26A0" # ⚠
39
+ SYM_ARROW = "\u2192" # →
40
+ SYM_STAR = "\u2605" # ★
41
+
42
+
43
+ # ---------------------------------------------------------------------------
44
+ # Result type — all fallible operations return Result instead of raising
45
+ # ---------------------------------------------------------------------------
46
+
47
+ T = TypeVar("T")
48
+
49
+
50
+ @dataclass(frozen=True)
51
+ class Ok(Generic[T]):
52
+ value: T
53
+
54
+ @property
55
+ def ok(self) -> bool:
56
+ return True
57
+
58
+
59
+ @dataclass(frozen=True)
60
+ class Err:
61
+ error: str
62
+
63
+ @property
64
+ def ok(self) -> bool:
65
+ return False
66
+
67
+
68
+ Result = Union[Ok[T], Err]
69
+
70
+
71
+ # ---------------------------------------------------------------------------
72
+ # Domain types
73
+ # ---------------------------------------------------------------------------
74
+
75
+ STRATEGIES: list[dict[str, str]] = [
76
+ {
77
+ "key": "architecture-first",
78
+ "label": "Architecture First",
79
+ "guidance": (
80
+ "Start by exploring model architecture (depth, width, attention patterns, "
81
+ "MLP ratio). Once you find a good architecture, fine-tune hyperparams."
82
+ ),
83
+ },
84
+ {
85
+ "key": "hyperparams-first",
86
+ "label": "Hyperparams First",
87
+ "guidance": (
88
+ "Start by sweeping hyperparameters (learning rates, batch size, "
89
+ "warmup/cooldown). Find optimal training dynamics before changing architecture."
90
+ ),
91
+ },
92
+ {
93
+ "key": "optimizer-first",
94
+ "label": "Optimizer First",
95
+ "guidance": (
96
+ "Start by tuning the optimizer (Muon momentum, ns_steps, AdamW betas, "
97
+ "weight decay schedule). A well-tuned optimizer can unlock gains."
98
+ ),
99
+ },
100
+ {
101
+ "key": "regularization-first",
102
+ "label": "Regularization First",
103
+ "guidance": (
104
+ "Start by exploring regularization (weight decay, dropout, z-loss, softcap "
105
+ "values). Prevent overfitting before scaling up."
106
+ ),
107
+ },
108
+ {
109
+ "key": "efficiency-first",
110
+ "label": "Efficiency First",
111
+ "guidance": (
112
+ "Start by maximizing compute efficiency (larger batch size, better memory "
113
+ "usage, faster iteration). More experiments = more chances."
114
+ ),
115
+ },
116
+ {
117
+ "key": "radical",
118
+ "label": "Radical",
119
+ "guidance": (
120
+ "Try bold, unconventional changes. Large architecture modifications, novel "
121
+ "activation functions, unusual training schedules. Go big or go home."
122
+ ),
123
+ },
124
+ ]
125
+
126
+
127
+ @dataclass(frozen=True)
128
+ class Experiment:
129
+ """A single row from results.tsv."""
130
+ commit: str
131
+ val_bpb: float
132
+ memory_gb: float
133
+ status: str
134
+ description: str
135
+
136
+
137
+ @dataclass
138
+ class AgentConfig:
139
+ """Configuration for a single agent in the evolve."""
140
+ id: int
141
+ branch: str
142
+ strategy: str
143
+ status: str = "pending"
144
+
145
+
146
+ @dataclass
147
+ class EvolveConfig:
148
+ """Root evolve state persisted to evolve.json."""
149
+ tag: str
150
+ base_branch: str
151
+ base_commit: str
152
+ created_at: str
153
+ agents: list[AgentConfig] = field(default_factory=list)
154
+
155
+
156
+ @dataclass(frozen=True)
157
+ class AgentStatus:
158
+ """Runtime status of an agent derived from its results.tsv."""
159
+ agent: AgentConfig
160
+ experiments: list[Experiment]
161
+ best_val_bpb: Optional[float]
162
+ best_experiment: Optional[Experiment]
163
+ keep_count: int
164
+
165
+
166
+ # ---------------------------------------------------------------------------
167
+ # Git helpers — thin wrappers around subprocess
168
+ # ---------------------------------------------------------------------------
169
+
170
+ def _run_git(*args: str, check: bool = True, timeout: int = 30) -> Result[str]:
171
+ """Run a git command and return stdout on success, or an Err on failure."""
172
+ cmd = ["git"] + list(args)
173
+ try:
174
+ proc = subprocess.run(
175
+ cmd,
176
+ capture_output=True,
177
+ text=True,
178
+ check=check,
179
+ timeout=timeout,
180
+ )
181
+ return Ok(proc.stdout.strip())
182
+ except subprocess.TimeoutExpired:
183
+ return Err(f"git {' '.join(args)} timed out after {timeout}s")
184
+ except subprocess.CalledProcessError as exc:
185
+ stderr = exc.stderr.strip() if exc.stderr else str(exc)
186
+ return Err(f"git {' '.join(args)} failed: {stderr}")
187
+
188
+
189
+ def _git_branch_exists(branch: str) -> bool:
190
+ """Check whether a local branch exists."""
191
+ result = _run_git("rev-parse", "--verify", branch, check=False)
192
+ return result.ok
193
+
194
+
195
+ def _git_current_branch() -> Result[str]:
196
+ """Return the current branch name."""
197
+ return _run_git("rev-parse", "--abbrev-ref", "HEAD")
198
+
199
+
200
+ def _git_head_sha() -> Result[str]:
201
+ """Return the short SHA of HEAD."""
202
+ return _run_git("rev-parse", "--short", "HEAD")
203
+
204
+
205
+ def _git_show_file(branch: str, path: str) -> Result[str]:
206
+ """Read a file from a given branch without checking it out."""
207
+ return _run_git("show", f"{branch}:{path}", check=False)
208
+
209
+
210
+ def _git_working_tree_clean() -> bool:
211
+ """Check that the working tree has no uncommitted changes."""
212
+ result = _run_git("status", "--porcelain", check=False)
213
+ return result.ok and result.value.strip() == ""
214
+
215
+
216
+ def _git_diff_commits(commit_a: str, commit_b: str) -> Result[str]:
217
+ """Get the diff between two commits."""
218
+ return _run_git("diff", commit_a, commit_b)
219
+
220
+
221
+ def _git_log_oneline(branch: str, base_commit: str, max_count: int = 50) -> Result[str]:
222
+ """Get one-line log of commits on branch since base_commit."""
223
+ return _run_git(
224
+ "log", "--oneline", f"--max-count={max_count}",
225
+ f"{base_commit}..{branch}",
226
+ check=False,
227
+ )
228
+
229
+
230
+ # ---------------------------------------------------------------------------
231
+ # Evolve config persistence
232
+ # ---------------------------------------------------------------------------
233
+
234
+ EVOLVE_CONFIG_FILE = "evolve.json"
235
+
236
+
237
+ def _evolve_config_path() -> Path:
238
+ """Return the path to evolve.json in the repo root."""
239
+ result = _run_git("rev-parse", "--show-toplevel")
240
+ if not result.ok:
241
+ return Path(EVOLVE_CONFIG_FILE)
242
+ return Path(result.value) / EVOLVE_CONFIG_FILE
243
+
244
+
245
+ def _load_evolve_config() -> Result[EvolveConfig]:
246
+ """Load evolve.json from the repo root."""
247
+ path = _evolve_config_path()
248
+ if not path.exists():
249
+ return Err(
250
+ f"No evolve config found at {path}. "
251
+ "Run 'autoevolve init' first."
252
+ )
253
+ try:
254
+ raw = json.loads(path.read_text())
255
+ agents = [
256
+ AgentConfig(
257
+ id=a["id"],
258
+ branch=a["branch"],
259
+ strategy=a["strategy"],
260
+ status=a.get("status", "pending"),
261
+ )
262
+ for a in raw.get("agents", [])
263
+ ]
264
+ return Ok(EvolveConfig(
265
+ tag=raw["tag"],
266
+ base_branch=raw["base_branch"],
267
+ base_commit=raw["base_commit"],
268
+ created_at=raw["created_at"],
269
+ agents=agents,
270
+ ))
271
+ except (json.JSONDecodeError, KeyError, TypeError, ValueError, AttributeError) as exc:
272
+ return Err(f"Corrupt evolve.json: {exc}")
273
+
274
+
275
+ def _save_evolve_config(config: EvolveConfig) -> Result[None]:
276
+ """Persist evolve state to evolve.json atomically (not committed to git)."""
277
+ path = _evolve_config_path()
278
+ data = {
279
+ "tag": config.tag,
280
+ "base_branch": config.base_branch,
281
+ "base_commit": config.base_commit,
282
+ "created_at": config.created_at,
283
+ "agents": [asdict(a) for a in config.agents],
284
+ }
285
+ try:
286
+ content = json.dumps(data, indent=2) + "\n"
287
+ tmp_fd, tmp_path = tempfile.mkstemp(dir=path.parent, suffix=".tmp")
288
+ try:
289
+ with os.fdopen(tmp_fd, "w") as f:
290
+ f.write(content)
291
+ Path(tmp_path).replace(path)
292
+ except BaseException:
293
+ Path(tmp_path).unlink(missing_ok=True)
294
+ raise
295
+ return Ok(None)
296
+ except OSError as exc:
297
+ return Err(f"Failed to write evolve.json: {exc}")
298
+
299
+
300
+ # ---------------------------------------------------------------------------
301
+ # Results.tsv parsing
302
+ # ---------------------------------------------------------------------------
303
+
304
+ def _parse_results_tsv(raw: str) -> list[Experiment]:
305
+ """Parse a results.tsv string into a list of Experiment records.
306
+
307
+ Expected header: commit\tval_bpb\tmemory_gb\tstatus\tdescription
308
+ """
309
+ lines = raw.strip().splitlines()
310
+ experiments: list[Experiment] = []
311
+
312
+ for idx, line in enumerate(lines):
313
+ # Skip empty lines, and skip the first line if it looks like a header
314
+ if not line.strip():
315
+ continue
316
+ if idx == 0 and line.strip().startswith("commit"):
317
+ continue
318
+ parts = line.split("\t")
319
+ if len(parts) < 5:
320
+ continue
321
+ try:
322
+ experiments.append(Experiment(
323
+ commit=parts[0].strip(),
324
+ val_bpb=float(parts[1].strip()),
325
+ memory_gb=float(parts[2].strip()),
326
+ status=parts[3].strip(),
327
+ description="\t".join(parts[4:]).strip(),
328
+ ))
329
+ except (ValueError, IndexError):
330
+ # Skip malformed rows
331
+ continue
332
+
333
+ return experiments
334
+
335
+
336
+ def _read_results_for_agent(agent: AgentConfig) -> str:
337
+ """Read results.tsv for an agent, trying git first then filesystem."""
338
+ # Try git show first (works if results.tsv is committed)
339
+ result = _git_show_file(agent.branch, "results.tsv")
340
+ if result.ok and result.value.strip():
341
+ return result.value
342
+
343
+ # Fall back to filesystem if this branch is currently checked out
344
+ current = _git_current_branch()
345
+ if current.ok and current.value == agent.branch:
346
+ repo_root = _run_git("rev-parse", "--show-toplevel")
347
+ if repo_root.ok:
348
+ results_path = Path(repo_root.value) / "results.tsv"
349
+ if results_path.exists():
350
+ return results_path.read_text()
351
+
352
+ return ""
353
+
354
+
355
+ def _get_agent_status(agent: AgentConfig) -> AgentStatus:
356
+ """Read results.tsv from an agent's branch and compute status."""
357
+ raw = _read_results_for_agent(agent)
358
+ if not raw.strip():
359
+ return AgentStatus(
360
+ agent=agent,
361
+ experiments=[],
362
+ best_val_bpb=None,
363
+ best_experiment=None,
364
+ keep_count=0,
365
+ )
366
+
367
+ experiments = _parse_results_tsv(raw)
368
+ keeps = [e for e in experiments if e.status == "keep"]
369
+ valid_keeps = [e for e in experiments if e.status == "keep" and e.val_bpb > 0]
370
+ best = min(valid_keeps, key=lambda e: e.val_bpb) if valid_keeps else None
371
+
372
+ return AgentStatus(
373
+ agent=agent,
374
+ experiments=experiments,
375
+ best_val_bpb=best.val_bpb if best else None,
376
+ best_experiment=best,
377
+ keep_count=len(keeps),
378
+ )
379
+
380
+
381
+ def _compute_improvements(experiments: list[Experiment]) -> list[tuple[Experiment, float]]:
382
+ """Find impactful keep experiments by comparing each to the previous best.
383
+
384
+ Each keep is compared against the best val_bpb among all preceding keeps,
385
+ avoiding comparisons to crashed experiments (val_bpb=0.0) or discards.
386
+ """
387
+ improvements: list[tuple[Experiment, float]] = []
388
+ prev_best: Optional[float] = None
389
+ for exp in experiments:
390
+ if exp.status != "keep" or exp.val_bpb <= 0:
391
+ continue
392
+ if prev_best is not None:
393
+ delta = prev_best - exp.val_bpb
394
+ if delta > 0:
395
+ improvements.append((exp, delta))
396
+ if prev_best is None or exp.val_bpb < prev_best:
397
+ prev_best = exp.val_bpb
398
+ return improvements
399
+
400
+
401
+ # ---------------------------------------------------------------------------
402
+ # Program.md generation
403
+ # ---------------------------------------------------------------------------
404
+
405
+ def _generate_program_md(strategy: dict[str, str], agent_id: int, tag: str) -> str:
406
+ """Generate a program.md variant for an agent with a specific research strategy."""
407
+ return f"""\
408
+ # Autoresearch Program — Evolve {tag}, Agent {agent_id}
409
+
410
+ ## Strategy: {strategy['label']}
411
+
412
+ {strategy['guidance']}
413
+
414
+ ## Rules
415
+
416
+ 1. Modify `train.py` and commit your changes with a clear description.
417
+ 2. Run `uv run train.py > run.log 2>&1` — training runs for exactly 5 minutes.
418
+ 3. Read results: `grep "^val_bpb:\\|^peak_vram_mb:" run.log`
419
+ 4. Record results in `results.tsv` (tab-separated):
420
+ - commit (short hash), val_bpb, memory_gb (peak_vram_mb / 1024), status, description
421
+ 5. **Commit results.tsv** after each experiment so evolve tracking works:
422
+ `git add results.tsv && git commit -m "update results"`
423
+ 6. If val_bpb improved (lower), set status to `keep` and advance the branch.
424
+ 7. If val_bpb is equal or worse, set status to `discard` and `git reset --hard HEAD~1`
425
+ to revert the train.py changes (but keep results.tsv updated).
426
+ 8. Repeat indefinitely. Each experiment should build on previous successes.
427
+
428
+ ## Hints
429
+
430
+ If an `evolve-hints.md` file exists in the repo root, it contains insights from the
431
+ leading agent in the evolve. Consider incorporating their successful ideas.
432
+
433
+ ## Goal
434
+
435
+ Minimize `val_bpb` within the 5-minute time budget per experiment. Lower is better.
436
+ """
437
+
438
+
439
+ # ---------------------------------------------------------------------------
440
+ # CLI commands
441
+ # ---------------------------------------------------------------------------
442
+
443
+ @click.group(epilog="Exit codes: 0 = success, 1 = error")
444
+ @click.version_option(version="1.0.0", prog_name="autoevolve")
445
+ @click.option("--no-color", is_flag=True, default=False, help="Disable colored output")
446
+ @click.option("--quiet", "-q", is_flag=True, default=False, help="Minimal output")
447
+ @click.pass_context
448
+ def cli(ctx: click.Context, no_color: bool, quiet: bool) -> None:
449
+ """Multi-agent research competition orchestrator for autoresearch."""
450
+ ctx.ensure_object(dict)
451
+ ctx.obj["cfg"] = OutputConfig(
452
+ color=not no_color and sys.stdout.isatty(),
453
+ quiet=quiet,
454
+ )
455
+
456
+
457
+ @cli.command()
458
+ @click.option("--agents", "-n", type=int, required=True, help="Number of competing agents")
459
+ @click.option("--base-branch", "-b", type=str, default="main", help="Branch to fork from")
460
+ @click.option("--tag", "-t", type=str, required=True, help="Evolve tag (e.g. mar15)")
461
+ @click.pass_context
462
+ def init(ctx: click.Context, agents: int, base_branch: str, tag: str) -> None:
463
+ """Initialize a new evolve with N competing agent branches."""
464
+ cfg = ctx.obj["cfg"]
465
+ if agents < 1:
466
+ click.echo("Error: --agents must be at least 1.", err=True)
467
+ sys.exit(1)
468
+
469
+ # Verify we are inside a git repo
470
+ repo_root = _run_git("rev-parse", "--show-toplevel")
471
+ if not repo_root.ok:
472
+ click.echo("Error: not inside a git repository.", err=True)
473
+ sys.exit(1)
474
+
475
+ # Ensure no uncommitted changes before checking out branches
476
+ if not _git_working_tree_clean():
477
+ click.echo("Error: working tree has uncommitted changes. Commit or stash first.", err=True)
478
+ sys.exit(1)
479
+
480
+ # Verify base branch exists
481
+ if not _git_branch_exists(base_branch):
482
+ click.echo(f"Error: base branch '{base_branch}' does not exist.", err=True)
483
+ sys.exit(1)
484
+
485
+ # Check for existing evolve config
486
+ config_path = _evolve_config_path()
487
+ if config_path.exists():
488
+ click.echo(
489
+ f"Error: evolve.json already exists at {config_path}. "
490
+ "Remove it first or use a different repo.",
491
+ err=True,
492
+ )
493
+ sys.exit(1)
494
+
495
+ # Get base commit
496
+ base_sha = _run_git("rev-parse", "--short", base_branch)
497
+ if not base_sha.ok:
498
+ click.echo(f"Error: could not resolve base branch: {base_sha.error}", err=True)
499
+ sys.exit(1)
500
+
501
+ # Remember current branch to return to it
502
+ current_branch = _git_current_branch()
503
+ if not current_branch.ok:
504
+ click.echo(f"Error: {current_branch.error}", err=True)
505
+ sys.exit(1)
506
+
507
+ agent_configs: list[AgentConfig] = []
508
+ created_branches: list[str] = []
509
+
510
+ try:
511
+ for i in range(1, agents + 1):
512
+ strategy = STRATEGIES[(i - 1) % len(STRATEGIES)]
513
+ branch_name = f"evolve/{tag}-agent-{i}"
514
+
515
+ # Check if branch already exists
516
+ if _git_branch_exists(branch_name):
517
+ click.echo(f"Error: branch '{branch_name}' already exists.", err=True)
518
+ sys.exit(1)
519
+
520
+ # Create branch from base
521
+ result = _run_git("checkout", "-b", branch_name, base_branch)
522
+ if not result.ok:
523
+ click.echo(f"Error creating branch {branch_name}: {result.error}", err=True)
524
+ sys.exit(1)
525
+ created_branches.append(branch_name)
526
+
527
+ # Write program.md
528
+ program_content = _generate_program_md(strategy, i, tag)
529
+ program_path = Path(repo_root.value) / "program.md"
530
+ program_path.write_text(program_content)
531
+
532
+ # Create an initial empty results.tsv with header
533
+ results_path = Path(repo_root.value) / "results.tsv"
534
+ if not results_path.exists():
535
+ results_path.write_text("commit\tval_bpb\tmemory_gb\tstatus\tdescription\n")
536
+
537
+ # Commit program.md and results.tsv to the branch
538
+ _run_git("add", "program.md", "results.tsv")
539
+ commit_result = _run_git(
540
+ "commit", "-m",
541
+ f"evolve({tag}): initialize agent {i} with {strategy['key']} strategy",
542
+ )
543
+ if not commit_result.ok:
544
+ click.echo(f"Warning: commit on {branch_name}: {commit_result.error}", err=True)
545
+
546
+ agent_configs.append(AgentConfig(
547
+ id=i,
548
+ branch=branch_name,
549
+ strategy=strategy["key"],
550
+ status="pending",
551
+ ))
552
+ except SystemExit:
553
+ # Clean up created branches on failure
554
+ _run_git("checkout", current_branch.value, check=False)
555
+ for branch in created_branches:
556
+ _run_git("branch", "-D", branch, check=False)
557
+ raise
558
+ finally:
559
+ # Always return to original branch
560
+ _run_git("checkout", current_branch.value, check=False)
561
+
562
+ # Save evolve config (not committed)
563
+ evolve = EvolveConfig(
564
+ tag=tag,
565
+ base_branch=base_branch,
566
+ base_commit=base_sha.value,
567
+ created_at=datetime.now(timezone.utc).isoformat(timespec="seconds"),
568
+ agents=agent_configs,
569
+ )
570
+ save_result = _save_evolve_config(evolve)
571
+ if not save_result.ok:
572
+ click.echo(f"Error saving config: {save_result.error}", err=True)
573
+ sys.exit(1)
574
+
575
+ # Print summary
576
+ click.echo(f"\n== {cfg.styled('autoevolve', fg='cyan', bold=True)} initialized ==")
577
+ click.echo(f"Evolve: {tag} | Agents: {agents} | Base: {base_branch} ({base_sha.value})")
578
+ click.echo()
579
+ for ac in agent_configs:
580
+ strategy_info = next(s for s in STRATEGIES if s["key"] == ac.strategy)
581
+ click.echo(f" Agent {ac.id}: {ac.branch} ({strategy_info['label']})")
582
+ click.echo()
583
+ click.echo("To start each agent, check out its branch and run your autoresearch agent:")
584
+ click.echo()
585
+ for ac in agent_configs:
586
+ click.echo(f" git checkout {ac.branch}")
587
+ click.echo(f" # Start your AI agent here (e.g., claude, codex, gemini)")
588
+ click.echo()
589
+ click.echo(f"Monitor progress with: autoevolve status")
590
+ click.echo(f"Cross-pollinate ideas with: autoevolve pollinate")
591
+
592
+
593
+ @cli.command()
594
+ @click.pass_context
595
+ def status(ctx: click.Context) -> None:
596
+ """Show current evolve status and quick leaderboard."""
597
+ cfg = ctx.obj["cfg"]
598
+
599
+ config_result = _load_evolve_config()
600
+ if not config_result.ok:
601
+ click.echo(f"Error: {config_result.error}", err=True)
602
+ sys.exit(1)
603
+
604
+ config = config_result.value
605
+ statuses = [_get_agent_status(agent) for agent in config.agents]
606
+
607
+ # Find overall leader
608
+ agents_with_results = [s for s in statuses if s.best_val_bpb is not None]
609
+ leader = min(agents_with_results, key=lambda s: s.best_val_bpb) if agents_with_results else None
610
+
611
+ # Quiet mode
612
+ if cfg.quiet:
613
+ if leader and leader.best_experiment:
614
+ total_exps = sum(len(s.experiments) for s in statuses)
615
+ click.echo(
616
+ f"Leader: Agent {leader.agent.id}, "
617
+ f"best: {leader.best_val_bpb:.6f} "
618
+ f"({total_exps} experiments)"
619
+ )
620
+ else:
621
+ click.echo("No results yet.")
622
+ return
623
+
624
+ click.echo(f"\n== {cfg.styled('autoevolve', fg='cyan', bold=True)} status ==")
625
+ click.echo(
626
+ f"Evolve: {config.tag} | Agents: {len(config.agents)} | "
627
+ f"Started: {config.created_at}"
628
+ )
629
+ click.echo()
630
+
631
+ for s in statuses:
632
+ strategy_info = next(
633
+ (st for st in STRATEGIES if st["key"] == s.agent.strategy),
634
+ {"label": s.agent.strategy},
635
+ )
636
+ bpb_str = f"{s.best_val_bpb:.6f}" if s.best_val_bpb is not None else "N/A"
637
+ marker = (
638
+ f" {cfg.styled(SYM_STAR + ' LEADER', fg='yellow', bold=True)}"
639
+ if (leader and s is leader)
640
+ else ""
641
+ )
642
+ click.echo(
643
+ f"Agent {s.agent.id} ({strategy_info['label']}): "
644
+ f"{len(s.experiments)} experiments, "
645
+ f"best val_bpb: {bpb_str}, "
646
+ f"{s.keep_count} keeps{marker}"
647
+ )
648
+
649
+ if leader and leader.best_experiment:
650
+ click.echo()
651
+ exp = leader.best_experiment
652
+ click.echo(
653
+ f"Overall best: Agent {leader.agent.id} at {exp.val_bpb:.6f} "
654
+ f'("{exp.description}")'
655
+ )
656
+
657
+ if not agents_with_results:
658
+ click.echo("\nNo experiment results found yet. Agents may not have started.")
659
+
660
+ click.echo()
661
+
662
+
663
+ @cli.command()
664
+ @click.option("--detailed", is_flag=True, help="Show detailed per-agent trajectory")
665
+ @click.pass_context
666
+ def leaderboard(ctx: click.Context, detailed: bool) -> None:
667
+ """Show detailed leaderboard comparison across all agents."""
668
+ cfg = ctx.obj["cfg"]
669
+
670
+ config_result = _load_evolve_config()
671
+ if not config_result.ok:
672
+ click.echo(f"Error: {config_result.error}", err=True)
673
+ sys.exit(1)
674
+
675
+ config = config_result.value
676
+ statuses = [_get_agent_status(agent) for agent in config.agents]
677
+
678
+ ranked = sorted(
679
+ statuses,
680
+ key=lambda s: s.best_val_bpb if s.best_val_bpb is not None else float("inf"),
681
+ )
682
+
683
+ click.echo(f"\n== {cfg.styled('autoevolve', fg='cyan', bold=True)} leaderboard ==")
684
+ click.echo(
685
+ f"Evolve: {config.tag} | Agents: {len(config.agents)} | "
686
+ f"Base: {config.base_branch} ({config.base_commit})"
687
+ )
688
+ click.echo()
689
+
690
+ header = f"{'Rank':<6}{'Agent':<10}{'Strategy':<25}{'Best BPB':<12}{'Exps':<8}{'Keeps':<8}{'Keep %':<8}"
691
+ click.echo(cfg.styled(header, dim=True))
692
+ click.echo("-" * 77)
693
+
694
+ for rank, s in enumerate(ranked, 1):
695
+ strategy_info = next(
696
+ (st for st in STRATEGIES if st["key"] == s.agent.strategy),
697
+ {"label": s.agent.strategy},
698
+ )
699
+ bpb_str = f"{s.best_val_bpb:.6f}" if s.best_val_bpb is not None else "N/A"
700
+ total = len(s.experiments)
701
+ keep_pct = f"{(s.keep_count / total * 100):.0f}%" if total > 0 else "N/A"
702
+ click.echo(
703
+ f"{rank:<6}{s.agent.id:<10}{strategy_info['label']:<25}"
704
+ f"{bpb_str:<12}{total:<8}{s.keep_count:<8}{keep_pct:<8}"
705
+ )
706
+
707
+ if detailed:
708
+ click.echo()
709
+ click.echo("=" * 77)
710
+ click.echo("DETAILED TRAJECTORIES")
711
+ click.echo("=" * 77)
712
+
713
+ for s in ranked:
714
+ strategy_info = next(
715
+ (st for st in STRATEGIES if st["key"] == s.agent.strategy),
716
+ {"label": s.agent.strategy},
717
+ )
718
+ click.echo(
719
+ f"\n--- Agent {s.agent.id}: {strategy_info['label']} "
720
+ f"({s.agent.branch}) ---"
721
+ )
722
+
723
+ if not s.experiments:
724
+ click.echo(" No experiments yet.")
725
+ continue
726
+
727
+ traj_header = f" {'#':<4}{'Commit':<10}{'val_bpb':<12}{'Status':<8}{'Best So Far':<14}{'Description'}"
728
+ click.echo(cfg.styled(traj_header, dim=True))
729
+ click.echo(f" {'-' * 72}")
730
+
731
+ running_best = float("inf")
732
+ for idx, exp in enumerate(s.experiments, 1):
733
+ is_new_best = exp.val_bpb < running_best
734
+ if is_new_best:
735
+ running_best = exp.val_bpb
736
+ marker = f" {cfg.styled(SYM_KEEP, fg='green')}" if is_new_best else ""
737
+ click.echo(
738
+ f" {idx:<4}{exp.commit:<10}{exp.val_bpb:<12.6f}"
739
+ f"{exp.status:<8}{running_best:<14.6f}{exp.description}{marker}"
740
+ )
741
+
742
+ click.echo()
743
+ click.echo("=" * 77)
744
+ click.echo("STRATEGY EFFECTIVENESS")
745
+ click.echo("=" * 77)
746
+ click.echo()
747
+
748
+ for s in ranked:
749
+ strategy_info = next(
750
+ (st for st in STRATEGIES if st["key"] == s.agent.strategy),
751
+ {"label": s.agent.strategy},
752
+ )
753
+ computed = _compute_improvements(s.experiments)
754
+ if not computed:
755
+ continue
756
+
757
+ deltas = [delta for _, delta in computed]
758
+ avg_improvement = sum(deltas) / len(deltas)
759
+ best_improvement = max(deltas)
760
+ click.echo(
761
+ f" {strategy_info['label']}: "
762
+ f"avg improvement per keep: {avg_improvement:.6f}, "
763
+ f"best single improvement: {best_improvement:.6f}"
764
+ )
765
+
766
+ click.echo()
767
+
768
+
769
+ def _find_impactful_experiments(status: AgentStatus) -> list[tuple[Experiment, float]]:
770
+ """Find the most impactful keep experiments for an agent, sorted by delta."""
771
+ improvements = _compute_improvements(status.experiments)
772
+ improvements.sort(key=lambda pair: pair[1], reverse=True)
773
+ return improvements
774
+
775
+
776
+ def _build_hints_content(
777
+ config: EvolveConfig,
778
+ leader: AgentStatus,
779
+ impactful: list[tuple[Experiment, float]],
780
+ ) -> str:
781
+ """Build the evolve-hints.md content from leader data."""
782
+ leader_strategy = next(
783
+ (st for st in STRATEGIES if st["key"] == leader.agent.strategy),
784
+ {"label": leader.agent.strategy},
785
+ )
786
+ lines: list[str] = [
787
+ f"# Hints from Evolve {config.tag}",
788
+ f"",
789
+ f"Generated by `auto-evolve pollinate` at "
790
+ f"{datetime.now(timezone.utc).isoformat(timespec='seconds')}",
791
+ f"",
792
+ f"## Leading Agent",
793
+ f"",
794
+ f"Agent {leader.agent.id} ({leader_strategy['label']}) is currently leading "
795
+ f"with best val_bpb: {leader.best_val_bpb:.6f}",
796
+ f"",
797
+ f"## Most Impactful Experiments",
798
+ f"",
799
+ ]
800
+
801
+ # Include up to top 5 most impactful experiments
802
+ top_n = min(5, len(impactful))
803
+ if top_n == 0:
804
+ lines.append("No impactful improvements detected yet.")
805
+ lines.append("")
806
+ else:
807
+ for rank, (exp, delta) in enumerate(impactful[:top_n], 1):
808
+ lines.append(f"### {rank}. {exp.description}")
809
+ lines.append(f"")
810
+ lines.append(f"- **Commit**: {exp.commit}")
811
+ lines.append(f"- **val_bpb**: {exp.val_bpb:.6f}")
812
+ lines.append(f"- **Improvement**: {delta:.6f}")
813
+ lines.append(f"- **Memory**: {exp.memory_gb:.1f} GB")
814
+ lines.append(f"")
815
+
816
+ # Try to get the diff for this commit
817
+ diff_result = _run_git(
818
+ "diff", f"{exp.commit}~1", exp.commit,
819
+ "--", "train.py",
820
+ check=False,
821
+ )
822
+ if diff_result.ok and diff_result.value.strip():
823
+ lines.append(f"<details><summary>Code changes</summary>")
824
+ lines.append(f"")
825
+ lines.append(f"```diff")
826
+ lines.append(diff_result.value)
827
+ lines.append(f"```")
828
+ lines.append(f"")
829
+ lines.append(f"</details>")
830
+ lines.append(f"")
831
+
832
+ lines.append("## Suggestion")
833
+ lines.append("")
834
+ lines.append(
835
+ "Consider incorporating the above successful changes into your experiments. "
836
+ "These modifications produced measurable improvements in val_bpb."
837
+ )
838
+ lines.append("")
839
+
840
+ return "\n".join(lines)
841
+
842
+
843
+ @cli.command()
844
+ @click.pass_context
845
+ def pollinate(ctx: click.Context) -> None:
846
+ """Cross-pollinate: share winning ideas from the best agent with all others."""
847
+ cfg = ctx.obj["cfg"]
848
+ config_result = _load_evolve_config()
849
+ if not config_result.ok:
850
+ click.echo(f"Error: {config_result.error}", err=True)
851
+ sys.exit(1)
852
+
853
+ config = config_result.value
854
+ statuses = [_get_agent_status(agent) for agent in config.agents]
855
+
856
+ # Find the leader
857
+ agents_with_results = [s for s in statuses if s.best_val_bpb is not None]
858
+ if not agents_with_results:
859
+ click.echo("No experiment results found yet. Nothing to pollinate.", err=True)
860
+ sys.exit(1)
861
+
862
+ leader = min(agents_with_results, key=lambda s: s.best_val_bpb)
863
+
864
+ # Find the leader's most impactful "keep" experiments
865
+ impactful = _find_impactful_experiments(leader)
866
+
867
+ # Build hints content
868
+ hints_content = _build_hints_content(config, leader, impactful)
869
+
870
+ # Write to repo root as an untracked file (no checkout needed!)
871
+ repo_root = _run_git("rev-parse", "--show-toplevel")
872
+ if not repo_root.ok:
873
+ click.echo(f"Error: {repo_root.error}", err=True)
874
+ sys.exit(1)
875
+
876
+ hints_path = Path(repo_root.value) / "evolve-hints.md"
877
+ hints_path.write_text(hints_content)
878
+
879
+ click.echo(f"Hints from Agent {leader.agent.id} written to {hints_path}")
880
+ click.echo("All agents can read this file regardless of their branch.")
881
+ click.echo()
882
+
883
+
884
+ @cli.command()
885
+ @click.option(
886
+ "--format", "fmt",
887
+ type=click.Choice(["json", "tsv"]),
888
+ default="json",
889
+ help="Export format",
890
+ )
891
+ @click.option("--output", "-o", type=click.Path(), default=None, help="Output file path")
892
+ @click.pass_context
893
+ def export(ctx: click.Context, fmt: str, output: Optional[str]) -> None:
894
+ """Export all agent results to a single file for external analysis."""
895
+ cfg = ctx.obj["cfg"]
896
+ config_result = _load_evolve_config()
897
+ if not config_result.ok:
898
+ click.echo(f"Error: {config_result.error}", err=True)
899
+ sys.exit(1)
900
+
901
+ config = config_result.value
902
+ statuses = [_get_agent_status(agent) for agent in config.agents]
903
+
904
+ if fmt == "json":
905
+ data = {
906
+ "evolve": config.tag,
907
+ "base_branch": config.base_branch,
908
+ "base_commit": config.base_commit,
909
+ "created_at": config.created_at,
910
+ "exported_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
911
+ "agents": [],
912
+ }
913
+ for s in statuses:
914
+ agent_data = {
915
+ "id": s.agent.id,
916
+ "branch": s.agent.branch,
917
+ "strategy": s.agent.strategy,
918
+ "best_val_bpb": s.best_val_bpb,
919
+ "keep_count": s.keep_count,
920
+ "total_experiments": len(s.experiments),
921
+ "experiments": [
922
+ {
923
+ "commit": e.commit,
924
+ "val_bpb": e.val_bpb,
925
+ "memory_gb": e.memory_gb,
926
+ "status": e.status,
927
+ "description": e.description,
928
+ }
929
+ for e in s.experiments
930
+ ],
931
+ }
932
+ data["agents"].append(agent_data)
933
+
934
+ content = json.dumps(data, indent=2) + "\n"
935
+
936
+ elif fmt == "tsv":
937
+ header = "agent_id\tagent_strategy\tcommit\tval_bpb\tmemory_gb\tstatus\tdescription\n"
938
+ rows: list[str] = [header]
939
+ for s in statuses:
940
+ for e in s.experiments:
941
+ rows.append(
942
+ f"{s.agent.id}\t{s.agent.strategy}\t{e.commit}\t"
943
+ f"{e.val_bpb}\t{e.memory_gb}\t{e.status}\t{e.description}\n"
944
+ )
945
+ content = "".join(rows)
946
+ else:
947
+ click.echo(f"Error: unsupported format '{fmt}'.", err=True)
948
+ sys.exit(1)
949
+
950
+ if output:
951
+ Path(output).write_text(content)
952
+ click.echo(f"Exported {fmt.upper()} to {output}")
953
+ else:
954
+ click.echo(content, nl=False)
955
+
956
+
957
+ # ---------------------------------------------------------------------------
958
+ # Entry point
959
+ # ---------------------------------------------------------------------------
960
+
961
+ def main() -> None:
962
+ cli()
963
+
964
+
965
+ if __name__ == "__main__":
966
+ main()
@@ -0,0 +1,35 @@
1
+ [project]
2
+ name = "autoevolve"
3
+ version = "1.0.0"
4
+ description = "Multi-agent research competition orchestrator for autoresearch"
5
+ requires-python = ">=3.10"
6
+ license = "MIT"
7
+ readme = "README.md"
8
+ keywords = ["autoresearch", "karpathy", "gpt", "pretraining", "multi-agent"]
9
+ classifiers = [
10
+ "Development Status :: 4 - Beta",
11
+ "Environment :: Console",
12
+ "Intended Audience :: Science/Research",
13
+ "License :: OSI Approved :: MIT License",
14
+ "Programming Language :: Python :: 3",
15
+ "Programming Language :: Python :: 3.10",
16
+ "Programming Language :: Python :: 3.11",
17
+ "Programming Language :: Python :: 3.12",
18
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
19
+ ]
20
+ dependencies = ["click>=8.0"]
21
+
22
+ [project.scripts]
23
+ autoevolve = "auto_evolve:main"
24
+
25
+ [project.urls]
26
+ Homepage = "https://github.com/dean0x/autolab"
27
+ Repository = "https://github.com/dean0x/autolab"
28
+ Issues = "https://github.com/dean0x/autolab/issues"
29
+
30
+ [build-system]
31
+ requires = ["hatchling"]
32
+ build-backend = "hatchling.build"
33
+
34
+ [tool.hatch.build.targets.wheel]
35
+ packages = ["auto_evolve.py"]