autoevolve-cli 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autoevolve_cli-0.1.0/.github/workflows/ci.yml +36 -0
- autoevolve_cli-0.1.0/.github/workflows/publish.yml +36 -0
- autoevolve_cli-0.1.0/.gitignore +10 -0
- autoevolve_cli-0.1.0/.python-version +1 -0
- autoevolve_cli-0.1.0/Makefile +31 -0
- autoevolve_cli-0.1.0/PKG-INFO +105 -0
- autoevolve_cli-0.1.0/README.md +94 -0
- autoevolve_cli-0.1.0/assets/screenshot.png +0 -0
- autoevolve_cli-0.1.0/pyproject.toml +57 -0
- autoevolve_cli-0.1.0/src/autoevolve/__init__.py +0 -0
- autoevolve_cli-0.1.0/src/autoevolve/app.py +100 -0
- autoevolve_cli-0.1.0/src/autoevolve/commands/__init__.py +0 -0
- autoevolve_cli-0.1.0/src/autoevolve/commands/analytics.py +163 -0
- autoevolve_cli-0.1.0/src/autoevolve/commands/human.py +166 -0
- autoevolve_cli-0.1.0/src/autoevolve/commands/inspect.py +512 -0
- autoevolve_cli-0.1.0/src/autoevolve/commands/lifecycle.py +79 -0
- autoevolve_cli-0.1.0/src/autoevolve/dashboard.py +2127 -0
- autoevolve_cli-0.1.0/src/autoevolve/git.py +175 -0
- autoevolve_cli-0.1.0/src/autoevolve/harnesses.py +153 -0
- autoevolve_cli-0.1.0/src/autoevolve/models/__init__.py +0 -0
- autoevolve_cli-0.1.0/src/autoevolve/models/experiment.py +63 -0
- autoevolve_cli-0.1.0/src/autoevolve/models/git.py +32 -0
- autoevolve_cli-0.1.0/src/autoevolve/models/lineage.py +18 -0
- autoevolve_cli-0.1.0/src/autoevolve/models/types.py +22 -0
- autoevolve_cli-0.1.0/src/autoevolve/models/worktree.py +24 -0
- autoevolve_cli-0.1.0/src/autoevolve/problem.py +44 -0
- autoevolve_cli-0.1.0/src/autoevolve/prompt.py +157 -0
- autoevolve_cli-0.1.0/src/autoevolve/repository.py +459 -0
- autoevolve_cli-0.1.0/src/autoevolve/scaffold.py +88 -0
- autoevolve_cli-0.1.0/src/autoevolve/worktree.py +186 -0
- autoevolve_cli-0.1.0/tests/__init__.py +1 -0
- autoevolve_cli-0.1.0/tests/e2e/__init__.py +0 -0
- autoevolve_cli-0.1.0/tests/e2e/conftest.py +228 -0
- autoevolve_cli-0.1.0/tests/e2e/experiments.py +325 -0
- autoevolve_cli-0.1.0/tests/e2e/test_analytics.py +126 -0
- autoevolve_cli-0.1.0/tests/e2e/test_dashboard.py +445 -0
- autoevolve_cli-0.1.0/tests/e2e/test_human.py +229 -0
- autoevolve_cli-0.1.0/tests/e2e/test_inspect.py +237 -0
- autoevolve_cli-0.1.0/tests/e2e/test_lifecycle.py +143 -0
- autoevolve_cli-0.1.0/tests/fixtures/playground/README.md +13 -0
- autoevolve_cli-0.1.0/tests/fixtures/playground/data/cases.json +38 -0
- autoevolve_cli-0.1.0/tests/fixtures/playground/pyproject.toml +5 -0
- autoevolve_cli-0.1.0/tests/fixtures/playground/scripts/validate.py +29 -0
- autoevolve_cli-0.1.0/tests/fixtures/playground/src/ranker.py +5 -0
- autoevolve_cli-0.1.0/tests/unit/autoevolve/test_problem.py +25 -0
- autoevolve_cli-0.1.0/tests/unit/autoevolve/test_repository.py +165 -0
- autoevolve_cli-0.1.0/uv.lock +560 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
env:
|
|
10
|
+
UV_FROZEN: "1"
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
check:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
strategy:
|
|
16
|
+
fail-fast: false
|
|
17
|
+
matrix:
|
|
18
|
+
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
|
19
|
+
|
|
20
|
+
steps:
|
|
21
|
+
- name: Check out repository
|
|
22
|
+
uses: actions/checkout@v4
|
|
23
|
+
with:
|
|
24
|
+
fetch-depth: 0
|
|
25
|
+
|
|
26
|
+
- name: Set up uv
|
|
27
|
+
uses: astral-sh/setup-uv@v5
|
|
28
|
+
with:
|
|
29
|
+
python-version: ${{ matrix.python-version }}
|
|
30
|
+
enable-cache: true
|
|
31
|
+
|
|
32
|
+
- name: Sync dependencies
|
|
33
|
+
run: make sync
|
|
34
|
+
|
|
35
|
+
- name: Run checks
|
|
36
|
+
run: make check
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: Publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
publish:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
environment:
|
|
12
|
+
name: pypi
|
|
13
|
+
url: https://pypi.org/p/autoevolve-cli
|
|
14
|
+
permissions:
|
|
15
|
+
contents: read
|
|
16
|
+
id-token: write
|
|
17
|
+
|
|
18
|
+
steps:
|
|
19
|
+
- name: Check out repository
|
|
20
|
+
uses: actions/checkout@v4
|
|
21
|
+
with:
|
|
22
|
+
fetch-depth: 0
|
|
23
|
+
|
|
24
|
+
- name: Set up uv
|
|
25
|
+
uses: astral-sh/setup-uv@v5
|
|
26
|
+
with:
|
|
27
|
+
enable-cache: true
|
|
28
|
+
|
|
29
|
+
- name: Install dependencies
|
|
30
|
+
run: make sync
|
|
31
|
+
|
|
32
|
+
- name: Build distributions
|
|
33
|
+
run: uv build
|
|
34
|
+
|
|
35
|
+
- name: Publish to PyPI
|
|
36
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.10
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
.PHONY: sync
|
|
2
|
+
sync:
|
|
3
|
+
uv sync
|
|
4
|
+
|
|
5
|
+
.PHONY: format
|
|
6
|
+
format:
|
|
7
|
+
uv run ruff format .
|
|
8
|
+
uv run ruff check --fix .
|
|
9
|
+
|
|
10
|
+
.PHONY: format-check
|
|
11
|
+
format-check:
|
|
12
|
+
uv run ruff format --check .
|
|
13
|
+
|
|
14
|
+
.PHONY: lint
|
|
15
|
+
lint:
|
|
16
|
+
uv run ruff check .
|
|
17
|
+
|
|
18
|
+
.PHONY: typecheck
|
|
19
|
+
typecheck:
|
|
20
|
+
uv run mypy src
|
|
21
|
+
|
|
22
|
+
.PHONY: test
|
|
23
|
+
test:
|
|
24
|
+
uv run pytest -q
|
|
25
|
+
|
|
26
|
+
.PHONY: build
|
|
27
|
+
build:
|
|
28
|
+
uv build
|
|
29
|
+
|
|
30
|
+
.PHONY: check
|
|
31
|
+
check: format-check lint typecheck test
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: autoevolve-cli
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Git-backed experiment loops for coding agents
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Requires-Dist: gitpython>=3.1.46
|
|
7
|
+
Requires-Dist: rich>=14
|
|
8
|
+
Requires-Dist: textual>=1
|
|
9
|
+
Requires-Dist: typer>=0.24.1
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
|
|
12
|
+
# autoevolve
|
|
13
|
+
|
|
14
|
+

|
|
15
|
+
|
|
16
|
+
`autoevolve` lets coding agents run git-backed experiment loops autonomously. It gives agents a lightweight workflow for branching, recording results, and comparing experiments without needing heavy external dependencies, databases, or services.
|
|
17
|
+
|
|
18
|
+
Run it inside an existing project, let it set up the files your coding agent needs, and then let the agent iterate through experiments, branch into new research directions, and explore ideas on its own.
|
|
19
|
+
|
|
20
|
+
## Install
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install autoevolve-cli
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Quickstart
|
|
27
|
+
|
|
28
|
+
**1. Initialize `autoevolve` in an existing git repo**:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
autoevolve init
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
`autoevolve init` walks you through the setup for your coding harness and problem:
|
|
35
|
+
|
|
36
|
+
- `SKILL.md` or `PROGRAM.md`: the instructions your coding agent reads to use `autoevolve`
|
|
37
|
+
- `PROBLEM.md`: the goal, metric, constraints, and validation setup for your problem
|
|
38
|
+
|
|
39
|
+
For an example problem to try, see the [circle packing example repo](https://github.com/wiskojo/autoevolve-circle-packing-example).
|
|
40
|
+
|
|
41
|
+
**2. Tell your agent to read `PROGRAM.md` or activate the skill depending on your setup**:
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
Read PROGRAM.md, then start working.
|
|
45
|
+
|
|
46
|
+
# If using skills
|
|
47
|
+
$autoevolve # Codex
|
|
48
|
+
/autoevolve # Claude Code
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
From there, your agent should start working in the repo as usual. Experiment commits will include:
|
|
52
|
+
|
|
53
|
+
- `EXPERIMENT.json`: the structured record of the experiment, including summary, metrics, and any references to other experiments
|
|
54
|
+
- `JOURNAL.md`: the narrative record of the experiment, which could include the hypothesis, changes made, outcomes, reflections, etc.
|
|
55
|
+
|
|
56
|
+
**3. Start the TUI to monitor your agent's progress**:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
autoevolve dashboard
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## CLI
|
|
63
|
+
|
|
64
|
+
Here’s the CLI surface: `Human` commands handle setup and monitoring, `Lifecycle` manages experiments, and `Inspect` and `Analytics` help your agents review the experiment state.
|
|
65
|
+
|
|
66
|
+
```
|
|
67
|
+
Usage: autoevolve [OPTIONS] COMMAND [ARGS]...
|
|
68
|
+
|
|
69
|
+
Git-backed experiment loops for coding agents.
|
|
70
|
+
|
|
71
|
+
Options:
|
|
72
|
+
--help Show this message and exit.
|
|
73
|
+
|
|
74
|
+
Human:
|
|
75
|
+
init Set up PROBLEM.md and agent instructions.
|
|
76
|
+
validate Check that the repo is ready for autoevolve.
|
|
77
|
+
update Update detected prompt files to the latest version.
|
|
78
|
+
dashboard Open the experiment dashboard.
|
|
79
|
+
|
|
80
|
+
Lifecycle:
|
|
81
|
+
start Create a managed experiment branch and worktree.
|
|
82
|
+
record Validate, commit, and remove the current managed worktree.
|
|
83
|
+
clean Remove stale managed worktrees for this repository.
|
|
84
|
+
|
|
85
|
+
Inspect:
|
|
86
|
+
status Show the current experiment status.
|
|
87
|
+
log Show experiment logs.
|
|
88
|
+
show Show experiment details.
|
|
89
|
+
compare Compare two experiments.
|
|
90
|
+
lineage Show experiment lineage around one ref.
|
|
91
|
+
|
|
92
|
+
Analytics:
|
|
93
|
+
recent List the most recent recorded experiments.
|
|
94
|
+
best List the top experiments for one metric.
|
|
95
|
+
pareto List the Pareto frontier for selected metrics.
|
|
96
|
+
|
|
97
|
+
Examples:
|
|
98
|
+
autoevolve start tune-thresholds "Try a tighter threshold sweep" --from 07f1844
|
|
99
|
+
autoevolve record
|
|
100
|
+
autoevolve log
|
|
101
|
+
autoevolve recent --limit 5
|
|
102
|
+
autoevolve best --max benchmark_score --limit 5
|
|
103
|
+
|
|
104
|
+
Run "autoevolve <command> --help" for command-specific details.
|
|
105
|
+
```
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# autoevolve
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+
|
|
5
|
+
`autoevolve` lets coding agents run git-backed experiment loops autonomously. It gives agents a lightweight workflow for branching, recording results, and comparing experiments without needing heavy external dependencies, databases, or services.
|
|
6
|
+
|
|
7
|
+
Run it inside an existing project, let it set up the files your coding agent needs, and then let the agent iterate through experiments, branch into new research directions, and explore ideas on its own.
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install autoevolve-cli
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Quickstart
|
|
16
|
+
|
|
17
|
+
**1. Initialize `autoevolve` in an existing git repo**:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
autoevolve init
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
`autoevolve init` walks you through the setup for your coding harness and problem:
|
|
24
|
+
|
|
25
|
+
- `SKILL.md` or `PROGRAM.md`: the instructions your coding agent reads to use `autoevolve`
|
|
26
|
+
- `PROBLEM.md`: the goal, metric, constraints, and validation setup for your problem
|
|
27
|
+
|
|
28
|
+
For an example problem to try, see the [circle packing example repo](https://github.com/wiskojo/autoevolve-circle-packing-example).
|
|
29
|
+
|
|
30
|
+
**2. Tell your agent to read `PROGRAM.md` or activate the skill depending on your setup**:
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
Read PROGRAM.md, then start working.
|
|
34
|
+
|
|
35
|
+
# If using skills
|
|
36
|
+
$autoevolve # Codex
|
|
37
|
+
/autoevolve # Claude Code
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
From there, your agent should start working in the repo as usual. Experiment commits will include:
|
|
41
|
+
|
|
42
|
+
- `EXPERIMENT.json`: the structured record of the experiment, including summary, metrics, and any references to other experiments
|
|
43
|
+
- `JOURNAL.md`: the narrative record of the experiment, which could include the hypothesis, changes made, outcomes, reflections, etc.
|
|
44
|
+
|
|
45
|
+
**3. Start the TUI to monitor your agent's progress**:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
autoevolve dashboard
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## CLI
|
|
52
|
+
|
|
53
|
+
Here’s the CLI surface: `Human` commands handle setup and monitoring, `Lifecycle` manages experiments, and `Inspect` and `Analytics` help your agents review the experiment state.
|
|
54
|
+
|
|
55
|
+
```
|
|
56
|
+
Usage: autoevolve [OPTIONS] COMMAND [ARGS]...
|
|
57
|
+
|
|
58
|
+
Git-backed experiment loops for coding agents.
|
|
59
|
+
|
|
60
|
+
Options:
|
|
61
|
+
--help Show this message and exit.
|
|
62
|
+
|
|
63
|
+
Human:
|
|
64
|
+
init Set up PROBLEM.md and agent instructions.
|
|
65
|
+
validate Check that the repo is ready for autoevolve.
|
|
66
|
+
update Update detected prompt files to the latest version.
|
|
67
|
+
dashboard Open the experiment dashboard.
|
|
68
|
+
|
|
69
|
+
Lifecycle:
|
|
70
|
+
start Create a managed experiment branch and worktree.
|
|
71
|
+
record Validate, commit, and remove the current managed worktree.
|
|
72
|
+
clean Remove stale managed worktrees for this repository.
|
|
73
|
+
|
|
74
|
+
Inspect:
|
|
75
|
+
status Show the current experiment status.
|
|
76
|
+
log Show experiment logs.
|
|
77
|
+
show Show experiment details.
|
|
78
|
+
compare Compare two experiments.
|
|
79
|
+
lineage Show experiment lineage around one ref.
|
|
80
|
+
|
|
81
|
+
Analytics:
|
|
82
|
+
recent List the most recent recorded experiments.
|
|
83
|
+
best List the top experiments for one metric.
|
|
84
|
+
pareto List the Pareto frontier for selected metrics.
|
|
85
|
+
|
|
86
|
+
Examples:
|
|
87
|
+
autoevolve start tune-thresholds "Try a tighter threshold sweep" --from 07f1844
|
|
88
|
+
autoevolve record
|
|
89
|
+
autoevolve log
|
|
90
|
+
autoevolve recent --limit 5
|
|
91
|
+
autoevolve best --max benchmark_score --limit 5
|
|
92
|
+
|
|
93
|
+
Run "autoevolve <command> --help" for command-specific details.
|
|
94
|
+
```
|
|
Binary file
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "autoevolve-cli"
|
|
3
|
+
dynamic = ["version"]
|
|
4
|
+
description = "Git-backed experiment loops for coding agents"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"gitpython>=3.1.46",
|
|
9
|
+
"rich>=14",
|
|
10
|
+
"textual>=1",
|
|
11
|
+
"typer>=0.24.1",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[dependency-groups]
|
|
15
|
+
dev = [
|
|
16
|
+
"inline-snapshot>=0.32.5",
|
|
17
|
+
"mypy>=1.19.1",
|
|
18
|
+
"pytest>=9.0.2",
|
|
19
|
+
"ruff>=0.15.7",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[project.scripts]
|
|
23
|
+
autoevolve = "autoevolve.app:main"
|
|
24
|
+
|
|
25
|
+
[build-system]
|
|
26
|
+
requires = ["hatchling>=1.27", "hatch-vcs>=0.4"]
|
|
27
|
+
build-backend = "hatchling.build"
|
|
28
|
+
|
|
29
|
+
[tool.hatch.version]
|
|
30
|
+
source = "vcs"
|
|
31
|
+
tag-pattern = "^v(?P<version>.*)$"
|
|
32
|
+
fallback-version = "0.1.0"
|
|
33
|
+
|
|
34
|
+
[tool.hatch.build.targets.wheel]
|
|
35
|
+
packages = ["src/autoevolve"]
|
|
36
|
+
|
|
37
|
+
[tool.ruff]
|
|
38
|
+
line-length = 100
|
|
39
|
+
target-version = "py310"
|
|
40
|
+
extend-exclude = ["tests/fixtures"]
|
|
41
|
+
|
|
42
|
+
[tool.ruff.lint]
|
|
43
|
+
select = ["E", "W", "F", "I", "B", "C4", "UP"]
|
|
44
|
+
ignore = ["E501"]
|
|
45
|
+
|
|
46
|
+
[tool.mypy]
|
|
47
|
+
strict = true
|
|
48
|
+
warn_return_any = true
|
|
49
|
+
no_implicit_optional = true
|
|
50
|
+
strict_equality = true
|
|
51
|
+
warn_unreachable = true
|
|
52
|
+
|
|
53
|
+
[tool.pytest.ini_options]
|
|
54
|
+
testpaths = ["tests"]
|
|
55
|
+
|
|
56
|
+
[tool.inline-snapshot]
|
|
57
|
+
format-command = "ruff format --stdin-filename {filename} -"
|
|
File without changes
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
import typer
|
|
5
|
+
from typer.core import TyperGroup
|
|
6
|
+
from typer.main import get_command
|
|
7
|
+
|
|
8
|
+
from autoevolve.commands.analytics import app as analytics_app
|
|
9
|
+
from autoevolve.commands.human import app as human_app
|
|
10
|
+
from autoevolve.commands.inspect import app as inspect_app
|
|
11
|
+
from autoevolve.commands.lifecycle import app as lifecycle_app
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AutoevolveGroup(TyperGroup):
|
|
15
|
+
def format_commands(self, ctx: click.Context, formatter: click.HelpFormatter) -> None:
|
|
16
|
+
command_names = self.list_commands(ctx)
|
|
17
|
+
sections: dict[str, list[tuple[str, str]]] = {
|
|
18
|
+
title: [] for title in ("Human", "Lifecycle", "Inspect", "Analytics")
|
|
19
|
+
}
|
|
20
|
+
command_width = max((len(name) for name in command_names), default=0)
|
|
21
|
+
|
|
22
|
+
for command_name in command_names:
|
|
23
|
+
command = self.get_command(ctx, command_name)
|
|
24
|
+
if command is None or command.hidden:
|
|
25
|
+
continue
|
|
26
|
+
section = getattr(command, "rich_help_panel", None) or "Other"
|
|
27
|
+
sections.setdefault(section, []).append(
|
|
28
|
+
(
|
|
29
|
+
command_name.ljust(command_width),
|
|
30
|
+
command.get_short_help_str(formatter.width),
|
|
31
|
+
)
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
for title, rows in sections.items():
|
|
35
|
+
if not rows:
|
|
36
|
+
continue
|
|
37
|
+
with formatter.section(title):
|
|
38
|
+
formatter.write_dl(rows)
|
|
39
|
+
|
|
40
|
+
def format_epilog(self, ctx: click.Context, formatter: click.HelpFormatter) -> None:
|
|
41
|
+
if self.epilog is None:
|
|
42
|
+
return
|
|
43
|
+
formatter.write_paragraph()
|
|
44
|
+
formatter.write(f"{self.epilog}\n")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
app = typer.Typer(
|
|
48
|
+
cls=AutoevolveGroup,
|
|
49
|
+
help="Git-backed experiment loops for coding agents.",
|
|
50
|
+
epilog="""Examples:
|
|
51
|
+
autoevolve start tune-thresholds "Try a tighter threshold sweep" --from 07f1844
|
|
52
|
+
autoevolve record
|
|
53
|
+
autoevolve log
|
|
54
|
+
autoevolve recent --limit 5
|
|
55
|
+
autoevolve best --max benchmark_score --limit 5
|
|
56
|
+
|
|
57
|
+
Run "autoevolve <command> --help" for command-specific details.""",
|
|
58
|
+
invoke_without_command=True,
|
|
59
|
+
add_completion=False,
|
|
60
|
+
rich_markup_mode=None,
|
|
61
|
+
pretty_exceptions_enable=False,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
app.add_typer(human_app)
|
|
65
|
+
app.add_typer(lifecycle_app)
|
|
66
|
+
app.add_typer(inspect_app)
|
|
67
|
+
app.add_typer(analytics_app)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@app.callback()
|
|
71
|
+
def main_callback(ctx: typer.Context) -> None:
|
|
72
|
+
if ctx.invoked_subcommand is None and not ctx.resilient_parsing:
|
|
73
|
+
typer.echo(ctx.get_help())
|
|
74
|
+
raise typer.Exit()
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
78
|
+
command = get_command(app)
|
|
79
|
+
try:
|
|
80
|
+
command.main(
|
|
81
|
+
args=list(argv) if argv is not None else None,
|
|
82
|
+
prog_name="autoevolve",
|
|
83
|
+
standalone_mode=False,
|
|
84
|
+
)
|
|
85
|
+
return 0
|
|
86
|
+
except click.ClickException as error:
|
|
87
|
+
error.show()
|
|
88
|
+
return error.exit_code
|
|
89
|
+
except typer.Abort:
|
|
90
|
+
typer.echo("Aborted!", err=True)
|
|
91
|
+
return 1
|
|
92
|
+
except typer.Exit as error:
|
|
93
|
+
return error.exit_code
|
|
94
|
+
except Exception as error:
|
|
95
|
+
typer.echo(str(error), err=True)
|
|
96
|
+
return 1
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
if __name__ == "__main__":
|
|
100
|
+
raise SystemExit(main())
|
|
File without changes
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Annotated
|
|
3
|
+
|
|
4
|
+
import typer
|
|
5
|
+
|
|
6
|
+
from autoevolve.models.experiment import ExperimentIndexEntry, Objective
|
|
7
|
+
from autoevolve.models.types import SetOutputFormat
|
|
8
|
+
from autoevolve.repository import ExperimentRepository
|
|
9
|
+
|
|
10
|
+
app = typer.Typer()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@app.command(
|
|
14
|
+
"recent",
|
|
15
|
+
rich_help_panel="Analytics",
|
|
16
|
+
short_help="List the most recent recorded experiments.",
|
|
17
|
+
help=(
|
|
18
|
+
"List the most recent recorded experiments.\n\n"
|
|
19
|
+
"recent emits recent experiments in TSV or JSONL format for scripting "
|
|
20
|
+
"and analysis."
|
|
21
|
+
),
|
|
22
|
+
)
|
|
23
|
+
def recent(
|
|
24
|
+
limit: Annotated[int, typer.Option(min=1, help="Number of experiments to show.")] = 10,
|
|
25
|
+
output_format: Annotated[
|
|
26
|
+
SetOutputFormat,
|
|
27
|
+
typer.Option("--format", help="Output format."),
|
|
28
|
+
] = SetOutputFormat.TSV,
|
|
29
|
+
) -> None:
|
|
30
|
+
_print_records(ExperimentRepository().recent_index(limit), output_format)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@app.command(
|
|
34
|
+
"best",
|
|
35
|
+
rich_help_panel="Analytics",
|
|
36
|
+
short_help="List the top experiments for one metric.",
|
|
37
|
+
help=(
|
|
38
|
+
"List the top experiments for one metric.\n\n"
|
|
39
|
+
"best ranks recorded experiments by one metric. If no metric is "
|
|
40
|
+
"provided, it defaults to the primary metric from PROBLEM.md."
|
|
41
|
+
),
|
|
42
|
+
)
|
|
43
|
+
def best(
|
|
44
|
+
max_metric: Annotated[str | None, typer.Option("--max", help="Metric to maximize.")] = None,
|
|
45
|
+
min_metric: Annotated[str | None, typer.Option("--min", help="Metric to minimize.")] = None,
|
|
46
|
+
limit: Annotated[int, typer.Option(min=1, help="Number of experiments to show.")] = 5,
|
|
47
|
+
output_format: Annotated[
|
|
48
|
+
SetOutputFormat,
|
|
49
|
+
typer.Option("--format", help="Output format."),
|
|
50
|
+
] = SetOutputFormat.TSV,
|
|
51
|
+
) -> None:
|
|
52
|
+
if max_metric and min_metric:
|
|
53
|
+
raise typer.BadParameter("Use either --max <metric> or --min <metric>, not both.")
|
|
54
|
+
|
|
55
|
+
objective = None
|
|
56
|
+
if max_metric is not None:
|
|
57
|
+
objective = Objective(direction="max", metric=max_metric)
|
|
58
|
+
if min_metric is not None:
|
|
59
|
+
objective = Objective(direction="min", metric=min_metric)
|
|
60
|
+
|
|
61
|
+
repository = ExperimentRepository()
|
|
62
|
+
if objective is None:
|
|
63
|
+
try:
|
|
64
|
+
problem = repository.problem()
|
|
65
|
+
except (FileNotFoundError, ValueError) as error:
|
|
66
|
+
raise RuntimeError(
|
|
67
|
+
"best requires an explicit objective, or a valid PROBLEM.md primary metric."
|
|
68
|
+
) from error
|
|
69
|
+
resolved = Objective(direction=problem.direction, metric=problem.metric)
|
|
70
|
+
else:
|
|
71
|
+
resolved = objective
|
|
72
|
+
records = repository.best_records(resolved, limit)
|
|
73
|
+
if not records:
|
|
74
|
+
typer.echo(f'No experiments found with a numeric "{resolved.metric}" metric.')
|
|
75
|
+
return
|
|
76
|
+
_print_records(records, output_format)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@app.command(
|
|
80
|
+
"pareto",
|
|
81
|
+
rich_help_panel="Analytics",
|
|
82
|
+
short_help="List the Pareto frontier for selected metrics.",
|
|
83
|
+
help=(
|
|
84
|
+
"List the Pareto frontier for selected metrics.\n\n"
|
|
85
|
+
"pareto returns the non-dominated recorded experiments for the selected "
|
|
86
|
+
"metrics in TSV or JSONL format."
|
|
87
|
+
),
|
|
88
|
+
)
|
|
89
|
+
def pareto(
|
|
90
|
+
max_metrics: Annotated[
|
|
91
|
+
list[str] | None,
|
|
92
|
+
typer.Option("--max", help="Metric to maximize. Repeat as needed."),
|
|
93
|
+
] = None,
|
|
94
|
+
min_metrics: Annotated[
|
|
95
|
+
list[str] | None,
|
|
96
|
+
typer.Option("--min", help="Metric to minimize. Repeat as needed."),
|
|
97
|
+
] = None,
|
|
98
|
+
limit: Annotated[int | None, typer.Option(min=1, help="Number of experiments to show.")] = None,
|
|
99
|
+
output_format: Annotated[
|
|
100
|
+
SetOutputFormat,
|
|
101
|
+
typer.Option("--format", help="Output format."),
|
|
102
|
+
] = SetOutputFormat.TSV,
|
|
103
|
+
) -> None:
|
|
104
|
+
objectives = [Objective(direction="max", metric=metric) for metric in max_metrics or ()]
|
|
105
|
+
objectives.extend(Objective(direction="min", metric=metric) for metric in min_metrics or ())
|
|
106
|
+
if not objectives:
|
|
107
|
+
raise typer.BadParameter(
|
|
108
|
+
"pareto requires at least one metric, for example: --max primary_metric --min runtime_sec"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
records = ExperimentRepository().pareto_records(objectives, limit)
|
|
112
|
+
if not records:
|
|
113
|
+
typer.echo("No experiments found with numeric metrics for the requested Pareto objectives.")
|
|
114
|
+
return
|
|
115
|
+
_print_records(records, output_format)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _print_records(records: list[ExperimentIndexEntry], output_format: SetOutputFormat) -> None:
|
|
119
|
+
if not records:
|
|
120
|
+
typer.echo("No experiments found.")
|
|
121
|
+
return
|
|
122
|
+
if output_format is SetOutputFormat.TSV:
|
|
123
|
+
typer.echo("sha\tdate\tmetrics\tsummary")
|
|
124
|
+
for record in records:
|
|
125
|
+
typer.echo(_tsv_row(record))
|
|
126
|
+
return
|
|
127
|
+
for record in records:
|
|
128
|
+
typer.echo(json.dumps(_json_record(record)))
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _tsv_row(record: ExperimentIndexEntry) -> str:
|
|
132
|
+
return "\t".join(
|
|
133
|
+
[
|
|
134
|
+
record.sha[:7],
|
|
135
|
+
record.date,
|
|
136
|
+
_clean(_metric_pairs(record)),
|
|
137
|
+
_clean(record.document.summary),
|
|
138
|
+
]
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _json_record(record: ExperimentIndexEntry) -> dict[str, object]:
|
|
143
|
+
return {
|
|
144
|
+
"sha": record.sha,
|
|
145
|
+
"short_sha": record.sha[:7],
|
|
146
|
+
"date": record.date,
|
|
147
|
+
"summary": record.document.summary,
|
|
148
|
+
"metrics": record.document.metrics,
|
|
149
|
+
"references": [
|
|
150
|
+
{"commit": reference.commit, "why": reference.why}
|
|
151
|
+
for reference in record.document.references
|
|
152
|
+
],
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _metric_pairs(record: ExperimentIndexEntry) -> str:
|
|
157
|
+
return ", ".join(
|
|
158
|
+
f"{name}={json.dumps(value)}" for name, value in record.document.metrics.items()
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _clean(value: str) -> str:
|
|
163
|
+
return value.replace("\t", " ").replace("\r", " ").replace("\n", " ").strip()
|