gradex 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gradex-0.1.0/.evo/state.db +0 -0
- gradex-0.1.0/.evo/state.db-shm +0 -0
- gradex-0.1.0/.evo/state.db-wal +0 -0
- gradex-0.1.0/.evo/traces/12366e76-4c4f-4acf-8b3f-191008979da5.jsonl +1 -0
- gradex-0.1.0/.evo/traces/21a29126-82a1-4d73-afbc-f3e3a09acd7b.jsonl +2 -0
- gradex-0.1.0/.evo/traces/2bb4e240-8808-467f-a71a-58df88e0e64d.jsonl +2 -0
- gradex-0.1.0/.evo/traces/694b7d35-4b1e-4af2-8ba3-13ca57229cf2.jsonl +2 -0
- gradex-0.1.0/.evo/traces/b378a2bf-f760-484b-ba45-73fa497ae6bf.jsonl +1 -0
- gradex-0.1.0/.evo/traces/c84c3496-5c03-4837-a676-eabe5702edb0.jsonl +1 -0
- gradex-0.1.0/.evo/traces/d6856171-f713-4424-af6b-7eaf749aee83.jsonl +2 -0
- gradex-0.1.0/.github/workflows/ci.yml +57 -0
- gradex-0.1.0/.github/workflows/publish.yml +39 -0
- gradex-0.1.0/.gradex/state.db +0 -0
- gradex-0.1.0/PKG-INFO +92 -0
- gradex-0.1.0/README.md +66 -0
- gradex-0.1.0/_seed_demo/state.db +0 -0
- gradex-0.1.0/_seed_demo/traces/16f332f9-6cb5-4988-8b4e-5561f0d0f405.jsonl +2 -0
- gradex-0.1.0/_seed_demo/traces/249fe8c6-39d3-41a5-b97b-a2e4ca1f5aed.jsonl +2 -0
- gradex-0.1.0/_seed_demo/traces/2a5b0073-0036-4ca8-80e3-2433dc72b5c5.jsonl +3 -0
- gradex-0.1.0/_seed_demo/traces/309ffc37-e9dc-40b3-876c-d508f1190e16.jsonl +3 -0
- gradex-0.1.0/_seed_demo/traces/3931b258-6e5f-4e9a-af0e-fd64e0787475.jsonl +2 -0
- gradex-0.1.0/_seed_demo/traces/69c4f0c0-572b-451e-a6c6-bba70263eefa.jsonl +3 -0
- gradex-0.1.0/_seed_demo/traces/77ba77e8-10c5-4128-a239-9123408b3131.jsonl +3 -0
- gradex-0.1.0/_seed_demo/traces/9aac1f80-efa1-42a8-bad7-f15be2455638.jsonl +3 -0
- gradex-0.1.0/_seed_demo/traces/a3bb20bb-e5ae-4fcf-8846-50e3e6166515.jsonl +2 -0
- gradex-0.1.0/_seed_demo/traces/add76fe1-814d-4bbb-b5a9-ac07d5076c8e.jsonl +4 -0
- gradex-0.1.0/_seed_demo/traces/d7ddb230-5e36-42b0-b261-f6dc102aa8db.jsonl +2 -0
- gradex-0.1.0/_seed_demo/traces/e4dcd895-dfba-4c1a-8b4d-74e7a95ea49e.jsonl +2 -0
- gradex-0.1.0/_seed_demo/traces/e620d384-aef9-4312-b5cb-4dda6a985076.jsonl +4 -0
- gradex-0.1.0/_seed_demo/traces/e751a1ad-2eae-40d8-b922-d2378551891f.jsonl +3 -0
- gradex-0.1.0/pyproject.toml +115 -0
- gradex-0.1.0/seed_dashboard.py +52 -0
- gradex-0.1.0/skills/claude-code/discover/SKILL.md +55 -0
- gradex-0.1.0/skills/claude-code/optimize/SKILL.md +51 -0
- gradex-0.1.0/skills/cursor/discover.mdc +53 -0
- gradex-0.1.0/skills/cursor/optimize.mdc +56 -0
- gradex-0.1.0/src/gradex/__init__.py +3 -0
- gradex-0.1.0/src/gradex/ai/__init__.py +1 -0
- gradex-0.1.0/src/gradex/ai/brief.py +94 -0
- gradex-0.1.0/src/gradex/ai/client.py +232 -0
- gradex-0.1.0/src/gradex/ai/discover.py +280 -0
- gradex-0.1.0/src/gradex/ai/prompts/benchmark_design.md +21 -0
- gradex-0.1.0/src/gradex/ai/prompts/gate_design.md +16 -0
- gradex-0.1.0/src/gradex/ai/prompts/optimize_brief.md +33 -0
- gradex-0.1.0/src/gradex/ai/prompts/repo_analysis.md +21 -0
- gradex-0.1.0/src/gradex/analytics.py +200 -0
- gradex-0.1.0/src/gradex/backends/__init__.py +1 -0
- gradex-0.1.0/src/gradex/backends/base.py +75 -0
- gradex-0.1.0/src/gradex/backends/pool.py +96 -0
- gradex-0.1.0/src/gradex/backends/worktree.py +187 -0
- gradex-0.1.0/src/gradex/cli.py +588 -0
- gradex-0.1.0/src/gradex/config.py +70 -0
- gradex-0.1.0/src/gradex/dashboard/__init__.py +1 -0
- gradex-0.1.0/src/gradex/dashboard/broadcaster.py +75 -0
- gradex-0.1.0/src/gradex/dashboard/server.py +164 -0
- gradex-0.1.0/src/gradex/dashboard/templates/index.html +449 -0
- gradex-0.1.0/src/gradex/doctor.py +138 -0
- gradex-0.1.0/src/gradex/export.py +125 -0
- gradex-0.1.0/src/gradex/hosts/__init__.py +40 -0
- gradex-0.1.0/src/gradex/hosts/base.py +67 -0
- gradex-0.1.0/src/gradex/hosts/claude_code.py +191 -0
- gradex-0.1.0/src/gradex/hosts/cursor.py +142 -0
- gradex-0.1.0/src/gradex/orchestrator.py +246 -0
- gradex-0.1.0/src/gradex/repository.py +201 -0
- gradex-0.1.0/src/gradex/runner/__init__.py +1 -0
- gradex-0.1.0/src/gradex/runner/benchmark.py +127 -0
- gradex-0.1.0/src/gradex/runner/cache.py +106 -0
- gradex-0.1.0/src/gradex/runner/gate.py +71 -0
- gradex-0.1.0/src/gradex/security/__init__.py +1 -0
- gradex-0.1.0/src/gradex/security/scrubber.py +47 -0
- gradex-0.1.0/src/gradex/state.py +87 -0
- gradex-0.1.0/src/gradex/subagent.py +258 -0
- gradex-0.1.0/src/gradex/traces.py +109 -0
- gradex-0.1.0/tests/conftest.py +41 -0
- gradex-0.1.0/tests/test_ai_client.py +164 -0
- gradex-0.1.0/tests/test_analytics.py +226 -0
- gradex-0.1.0/tests/test_benchmark.py +149 -0
- gradex-0.1.0/tests/test_brief.py +113 -0
- gradex-0.1.0/tests/test_cache.py +81 -0
- gradex-0.1.0/tests/test_cli.py +64 -0
- gradex-0.1.0/tests/test_concurrent.py +78 -0
- gradex-0.1.0/tests/test_dashboard.py +166 -0
- gradex-0.1.0/tests/test_discover.py +214 -0
- gradex-0.1.0/tests/test_doctor.py +187 -0
- gradex-0.1.0/tests/test_export.py +137 -0
- gradex-0.1.0/tests/test_gate.py +113 -0
- gradex-0.1.0/tests/test_groq_provider.py +145 -0
- gradex-0.1.0/tests/test_hosts.py +383 -0
- gradex-0.1.0/tests/test_orchestrator.py +285 -0
- gradex-0.1.0/tests/test_pool.py +87 -0
- gradex-0.1.0/tests/test_runner_integration.py +76 -0
- gradex-0.1.0/tests/test_scrubber.py +81 -0
- gradex-0.1.0/tests/test_shutdown.py +76 -0
- gradex-0.1.0/tests/test_state.py +165 -0
- gradex-0.1.0/tests/test_subagent.py +281 -0
- gradex-0.1.0/tests/test_traces.py +62 -0
- gradex-0.1.0/tests/test_worktree.py +107 -0
- gradex-0.1.0/uv.lock +1636 -0
|
Binary file
|
|
Binary file
|
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"ts": 1781552055.2846615, "level": "info", "msg": "Experiment 4 started", "data": {}}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"ts": 1781552055.305381, "level": "info", "msg": "Experiment 7 started", "data": {}}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"ts": 1781552055.2998743, "level": "info", "msg": "Experiment 6 started", "data": {}}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: ["main", "master"]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: ["main", "master"]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
# ── Lint & type-check ────────────────────────────────────────────────────
|
|
11
|
+
lint:
|
|
12
|
+
name: Lint & type-check
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- name: Install uv
|
|
18
|
+
uses: astral-sh/setup-uv@v5
|
|
19
|
+
|
|
20
|
+
- name: Set up Python 3.11
|
|
21
|
+
run: uv python install 3.11
|
|
22
|
+
|
|
23
|
+
- name: Install project + dev dependencies
|
|
24
|
+
run: uv sync --all-extras
|
|
25
|
+
|
|
26
|
+
- name: ruff — lint
|
|
27
|
+
run: uv run ruff check src/ tests/
|
|
28
|
+
|
|
29
|
+
- name: ruff — format check
|
|
30
|
+
run: uv run ruff format --check src/ tests/
|
|
31
|
+
|
|
32
|
+
- name: mypy — type-check
|
|
33
|
+
run: uv run mypy src/
|
|
34
|
+
|
|
35
|
+
# ── Tests (matrix) ───────────────────────────────────────────────────────
|
|
36
|
+
test:
|
|
37
|
+
name: pytest / Python ${{ matrix.python-version }}
|
|
38
|
+
runs-on: ubuntu-latest
|
|
39
|
+
strategy:
|
|
40
|
+
fail-fast: false
|
|
41
|
+
matrix:
|
|
42
|
+
python-version: ["3.11", "3.12", "3.13"]
|
|
43
|
+
|
|
44
|
+
steps:
|
|
45
|
+
- uses: actions/checkout@v4
|
|
46
|
+
|
|
47
|
+
- name: Install uv
|
|
48
|
+
uses: astral-sh/setup-uv@v5
|
|
49
|
+
|
|
50
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
51
|
+
run: uv python install ${{ matrix.python-version }}
|
|
52
|
+
|
|
53
|
+
- name: Install project + dev dependencies
|
|
54
|
+
run: uv sync --all-extras
|
|
55
|
+
|
|
56
|
+
- name: Run tests
|
|
57
|
+
run: uv run pytest tests/ --tb=short -v
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build-and-publish:
|
|
10
|
+
name: Build & publish
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
|
|
13
|
+
environment:
|
|
14
|
+
name: pypi
|
|
15
|
+
url: https://pypi.org/p/evo-research-cli
|
|
16
|
+
|
|
17
|
+
permissions:
|
|
18
|
+
id-token: write # required for trusted publishing (OIDC)
|
|
19
|
+
|
|
20
|
+
steps:
|
|
21
|
+
- uses: actions/checkout@v4
|
|
22
|
+
|
|
23
|
+
- name: Install uv
|
|
24
|
+
uses: astral-sh/setup-uv@v5
|
|
25
|
+
|
|
26
|
+
- name: Set up Python 3.11
|
|
27
|
+
run: uv python install 3.11
|
|
28
|
+
|
|
29
|
+
- name: Install project dependencies
|
|
30
|
+
run: uv sync
|
|
31
|
+
|
|
32
|
+
- name: Build sdist + wheel
|
|
33
|
+
run: uv build
|
|
34
|
+
|
|
35
|
+
- name: Publish to PyPI
|
|
36
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
37
|
+
# Uses OIDC trusted publishing — no API token needed.
|
|
38
|
+
# Configure the "pypi" environment in GitHub repo settings
|
|
39
|
+
# and add this repo as a trusted publisher on PyPI.
|
|
Binary file
|
gradex-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gradex
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Measurable code improvement — autonomous optimization loop
|
|
5
|
+
License: MIT
|
|
6
|
+
Keywords: ai,cli,coding,plugin
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Requires-Dist: aiosqlite>=0.20.0
|
|
9
|
+
Requires-Dist: anthropic>=0.25.0
|
|
10
|
+
Requires-Dist: anyio[trio]>=4.0.0
|
|
11
|
+
Requires-Dist: fastapi>=0.111.0
|
|
12
|
+
Requires-Dist: httpx>=0.27.0
|
|
13
|
+
Requires-Dist: jinja2>=3.1.0
|
|
14
|
+
Requires-Dist: openai>=1.30.0
|
|
15
|
+
Requires-Dist: rich>=13.0.0
|
|
16
|
+
Requires-Dist: sqlmodel>=0.0.18
|
|
17
|
+
Requires-Dist: typer>=0.12.0
|
|
18
|
+
Requires-Dist: uvicorn[standard]>=0.29.0
|
|
19
|
+
Requires-Dist: websockets>=12.0
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: mypy>=1.10.0; extra == 'dev'
|
|
22
|
+
Requires-Dist: pytest-cov>=5.0.0; extra == 'dev'
|
|
23
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
24
|
+
Requires-Dist: ruff>=0.4.0; extra == 'dev'
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# Gradex — autonomous code optimization. Measurable improvement, every run.
|
|
28
|
+
|
|
29
|
+
[](https://pypi.org/project/gradex/)
|
|
30
|
+
[](https://www.python.org/downloads/)
|
|
31
|
+
[](LICENSE)
|
|
32
|
+
|
|
33
|
+
Gradex discovers performance targets, captures a baseline, runs autonomous optimization experiments, and ships the best verified patch.
|
|
34
|
+
|
|
35
|
+
## Install
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install gradex
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## 60-Second Quickstart
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install gradex
|
|
45
|
+
gradex install cursor
|
|
46
|
+
gradex discover "make this repo faster"
|
|
47
|
+
gradex optimize
|
|
48
|
+
gradex dashboard
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Providers
|
|
52
|
+
|
|
53
|
+
| Provider | Best for | Notes |
|
|
54
|
+
|---|---|---|
|
|
55
|
+
| Groq | Free cloud runs | Free tier available, fast iteration |
|
|
56
|
+
| Anthropic | High quality reasoning | Strong patch planning and code edits |
|
|
57
|
+
| OpenAI | General purpose | Broad model options |
|
|
58
|
+
| Ollama | Local/private | Runs fully on your machine |
|
|
59
|
+
|
|
60
|
+
## CLI Reference
|
|
61
|
+
|
|
62
|
+
| Command | Description |
|
|
63
|
+
|---|---|
|
|
64
|
+
| `gradex install <host>` | Installs Gradex integration for a coding host |
|
|
65
|
+
| `gradex doctor <host>` | Checks host environment and setup health |
|
|
66
|
+
| `gradex dashboard` | Starts live optimization dashboard |
|
|
67
|
+
| `gradex upgrade` | Checks PyPI for newer Gradex versions |
|
|
68
|
+
| `gradex discover [hint]` | Discovers benchmark target and baseline |
|
|
69
|
+
| `gradex optimize` | Runs autonomous optimization loop |
|
|
70
|
+
| `gradex stats` | Shows run analytics and optional exports |
|
|
71
|
+
| `gradex history` | Lists recent optimization runs |
|
|
72
|
+
|
|
73
|
+
## How It Works
|
|
74
|
+
|
|
75
|
+
```text
|
|
76
|
+
+-----------+ +-----------+ +----------------------+ +----------------+
|
|
77
|
+
| discover | --> | baseline | --> | optimize loop (N) | --> | best patch out |
|
|
78
|
+
+-----------+ +-----------+ +----------------------+ +----------------+
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
1. `discover` analyzes your repo and creates a measurable benchmark target.
|
|
82
|
+
2. Gradex records a baseline score.
|
|
83
|
+
3. `optimize` runs parallel experiment rounds (patch -> benchmark -> gate checks).
|
|
84
|
+
4. Best validated patch and run analytics are surfaced in dashboard and CLI.
|
|
85
|
+
|
|
86
|
+
## Dashboard
|
|
87
|
+
|
|
88
|
+
Dashboard screenshot: [placeholder](https://example.com/gradex-dashboard-screenshot)
|
|
89
|
+
|
|
90
|
+
## License
|
|
91
|
+
|
|
92
|
+
MIT
|
gradex-0.1.0/README.md
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Gradex — autonomous code optimization. Measurable improvement, every run.
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/gradex/)
|
|
4
|
+
[](https://www.python.org/downloads/)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
|
|
7
|
+
Gradex discovers performance targets, captures a baseline, runs autonomous optimization experiments, and ships the best verified patch.
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install gradex
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## 60-Second Quickstart
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install gradex
|
|
19
|
+
gradex install cursor
|
|
20
|
+
gradex discover "make this repo faster"
|
|
21
|
+
gradex optimize
|
|
22
|
+
gradex dashboard
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Providers
|
|
26
|
+
|
|
27
|
+
| Provider | Best for | Notes |
|
|
28
|
+
|---|---|---|
|
|
29
|
+
| Groq | Free cloud runs | Free tier available, fast iteration |
|
|
30
|
+
| Anthropic | High quality reasoning | Strong patch planning and code edits |
|
|
31
|
+
| OpenAI | General purpose | Broad model options |
|
|
32
|
+
| Ollama | Local/private | Runs fully on your machine |
|
|
33
|
+
|
|
34
|
+
## CLI Reference
|
|
35
|
+
|
|
36
|
+
| Command | Description |
|
|
37
|
+
|---|---|
|
|
38
|
+
| `gradex install <host>` | Installs Gradex integration for a coding host |
|
|
39
|
+
| `gradex doctor <host>` | Checks host environment and setup health |
|
|
40
|
+
| `gradex dashboard` | Starts live optimization dashboard |
|
|
41
|
+
| `gradex upgrade` | Checks PyPI for newer Gradex versions |
|
|
42
|
+
| `gradex discover [hint]` | Discovers benchmark target and baseline |
|
|
43
|
+
| `gradex optimize` | Runs autonomous optimization loop |
|
|
44
|
+
| `gradex stats` | Shows run analytics and optional exports |
|
|
45
|
+
| `gradex history` | Lists recent optimization runs |
|
|
46
|
+
|
|
47
|
+
## How It Works
|
|
48
|
+
|
|
49
|
+
```text
|
|
50
|
+
+-----------+ +-----------+ +----------------------+ +----------------+
|
|
51
|
+
| discover | --> | baseline | --> | optimize loop (N) | --> | best patch out |
|
|
52
|
+
+-----------+ +-----------+ +----------------------+ +----------------+
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
1. `discover` analyzes your repo and creates a measurable benchmark target.
|
|
56
|
+
2. Gradex records a baseline score.
|
|
57
|
+
3. `optimize` runs parallel experiment rounds (patch -> benchmark -> gate checks).
|
|
58
|
+
4. Best validated patch and run analytics are surfaced in dashboard and CLI.
|
|
59
|
+
|
|
60
|
+
## Dashboard
|
|
61
|
+
|
|
62
|
+
Dashboard screenshot: [placeholder](https://example.com/gradex-dashboard-screenshot)
|
|
63
|
+
|
|
64
|
+
## License
|
|
65
|
+
|
|
66
|
+
MIT
|
|
Binary file
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
{"ts": 1781551559.470149, "level": "info", "msg": "Experiment 2 started", "data": {}}
|
|
2
|
+
{"ts": 1781551559.4713607, "level": "info", "msg": "Running benchmark: python bench.py", "data": {}}
|
|
3
|
+
{"ts": 1781551559.472388, "level": "info", "msg": "Score: 29.0", "data": {}}
|
|
4
|
+
{"ts": 1781551559.4734287, "level": "error", "msg": "Gate failed: 2 tests failed", "data": {}}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
{"ts": 1781551901.0414634, "level": "info", "msg": "Experiment 2 started", "data": {}}
|
|
2
|
+
{"ts": 1781551901.0427585, "level": "info", "msg": "Running benchmark: python bench.py", "data": {}}
|
|
3
|
+
{"ts": 1781551901.0437982, "level": "info", "msg": "Score: 29.0", "data": {}}
|
|
4
|
+
{"ts": 1781551901.0447173, "level": "error", "msg": "Gate failed: 2 tests failed", "data": {}}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "gradex"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Measurable code improvement — autonomous optimization loop"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
keywords = ["cli", "coding", "plugin", "ai"]
|
|
13
|
+
dependencies = [
|
|
14
|
+
"typer>=0.12.0",
|
|
15
|
+
"rich>=13.0.0",
|
|
16
|
+
"sqlmodel>=0.0.18",
|
|
17
|
+
"aiosqlite>=0.20.0",
|
|
18
|
+
"fastapi>=0.111.0",
|
|
19
|
+
"uvicorn[standard]>=0.29.0",
|
|
20
|
+
"websockets>=12.0",
|
|
21
|
+
"jinja2>=3.1.0",
|
|
22
|
+
"httpx>=0.27.0",
|
|
23
|
+
"anyio[trio]>=4.0.0",
|
|
24
|
+
"anthropic>=0.25.0",
|
|
25
|
+
"openai>=1.30.0",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.scripts]
|
|
29
|
+
gradex = "gradex.cli:app"
|
|
30
|
+
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
dev = [
|
|
33
|
+
"pytest>=8.0.0",
|
|
34
|
+
"pytest-cov>=5.0.0",
|
|
35
|
+
"ruff>=0.4.0",
|
|
36
|
+
"mypy>=1.10.0",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[tool.hatch.build.targets.wheel]
|
|
40
|
+
packages = ["src/gradex"]
|
|
41
|
+
|
|
42
|
+
[tool.ruff]
|
|
43
|
+
target-version = "py311"
|
|
44
|
+
line-length = 88
|
|
45
|
+
src = ["src"]
|
|
46
|
+
|
|
47
|
+
[tool.ruff.lint]
|
|
48
|
+
select = ["E", "F", "I", "UP", "B", "C4"]
|
|
49
|
+
ignore = ["E501"]
|
|
50
|
+
|
|
51
|
+
[tool.ruff.lint.isort]
|
|
52
|
+
known-first-party = ["gradex"]
|
|
53
|
+
|
|
54
|
+
[tool.mypy]
|
|
55
|
+
python_version = "3.11"
|
|
56
|
+
strict = true
|
|
57
|
+
mypy_path = "src"
|
|
58
|
+
files = ["src/"]
|
|
59
|
+
|
|
60
|
+
[[tool.mypy.overrides]]
|
|
61
|
+
# SQLModel + SQLAlchemy type stubs have known gaps that produce false positives
|
|
62
|
+
# under strict mode (generic column expressions, dynamic ORM attribute types, etc.).
|
|
63
|
+
module = ["gradex.state", "gradex.repository"]
|
|
64
|
+
warn_return_any = false
|
|
65
|
+
disallow_any_generics = false
|
|
66
|
+
|
|
67
|
+
[[tool.mypy.overrides]]
|
|
68
|
+
# FastAPI/Starlette route return types and Jinja2 templating are dynamically typed.
|
|
69
|
+
module = ["gradex.dashboard.server", "gradex.dashboard.broadcaster"]
|
|
70
|
+
warn_return_any = false
|
|
71
|
+
disallow_any_generics = false
|
|
72
|
+
|
|
73
|
+
[[tool.mypy.overrides]]
|
|
74
|
+
module = ["sqlmodel.*", "sqlalchemy.*", "aiosqlite.*", "uvicorn.*", "fastapi.*"]
|
|
75
|
+
ignore_missing_imports = true
|
|
76
|
+
|
|
77
|
+
[[tool.mypy.overrides]]
|
|
78
|
+
# tomllib dynamic dict and dataclasses.fields(...) produce Any under strict.
|
|
79
|
+
module = ["gradex.config"]
|
|
80
|
+
warn_return_any = false
|
|
81
|
+
disallow_any_generics = false
|
|
82
|
+
|
|
83
|
+
[[tool.mypy.overrides]]
|
|
84
|
+
# anthropic/openai SDK attribute chains + httpx resp.json() produce Any.
|
|
85
|
+
module = ["gradex.ai.client", "gradex.ai.discover", "gradex.ai.brief"]
|
|
86
|
+
warn_return_any = false
|
|
87
|
+
disallow_any_generics = false
|
|
88
|
+
|
|
89
|
+
[[tool.mypy.overrides]]
|
|
90
|
+
module = ["anthropic.*", "openai.*"]
|
|
91
|
+
ignore_missing_imports = true
|
|
92
|
+
|
|
93
|
+
[[tool.mypy.overrides]]
|
|
94
|
+
# asyncio.gather return type with return_exceptions=True and SQLModel session
|
|
95
|
+
# usage in subagent/orchestrator produce Any under strict.
|
|
96
|
+
module = ["gradex.subagent", "gradex.orchestrator"]
|
|
97
|
+
warn_return_any = false
|
|
98
|
+
disallow_any_generics = false
|
|
99
|
+
|
|
100
|
+
[[tool.mypy.overrides]]
|
|
101
|
+
# Host installers use shutil.which (returns str | None), Path.stat(),
|
|
102
|
+
# and json.loads (returns Any) — all produce Any under strict.
|
|
103
|
+
module = ["gradex.hosts.*", "gradex.hosts"]
|
|
104
|
+
warn_return_any = false
|
|
105
|
+
disallow_any_generics = false
|
|
106
|
+
|
|
107
|
+
[tool.pytest.ini_options]
|
|
108
|
+
testpaths = ["tests"]
|
|
109
|
+
addopts = "-v --tb=short"
|
|
110
|
+
|
|
111
|
+
[dependency-groups]
|
|
112
|
+
dev = [
|
|
113
|
+
"anyio>=4.13.0",
|
|
114
|
+
"pytest>=9.1.0",
|
|
115
|
+
]
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
sys.path.insert(0, "src")
|
|
3
|
+
|
|
4
|
+
import gradex.state as state_mod
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
SEED_DIR = Path("_seed_demo")
|
|
8
|
+
SEED_DIR.mkdir(exist_ok=True)
|
|
9
|
+
state_mod.GRADEX_DIR = SEED_DIR
|
|
10
|
+
state_mod.DB_PATH = SEED_DIR / "state.db"
|
|
11
|
+
|
|
12
|
+
import gradex.backends.worktree as wt_mod
|
|
13
|
+
wt_mod.GRADEX_DIR = SEED_DIR
|
|
14
|
+
|
|
15
|
+
from gradex.repository import RunRepository, ExperimentRepository
|
|
16
|
+
from gradex.traces import TraceWriter
|
|
17
|
+
|
|
18
|
+
run_repo = RunRepository()
|
|
19
|
+
exp_repo = ExperimentRepository()
|
|
20
|
+
|
|
21
|
+
run = run_repo.create(
|
|
22
|
+
benchmark_cmd="python bench.py",
|
|
23
|
+
metric_direction="lower",
|
|
24
|
+
gate_cmds=["pytest tests/"],
|
|
25
|
+
baseline_score=41.2,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
statuses = [
|
|
29
|
+
("passed", 38.1, True),
|
|
30
|
+
("rejected", 29.0, False),
|
|
31
|
+
("passed", 35.6, True),
|
|
32
|
+
("failed", None, None),
|
|
33
|
+
("passed", 31.4, True),
|
|
34
|
+
("running", None, None),
|
|
35
|
+
("pending", None, None),
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
for i, (status, score, gate_passed) in enumerate(statuses):
|
|
39
|
+
exp = exp_repo.create(run.id, None, f"gradex/exp-00{i+1}")
|
|
40
|
+
if score is not None:
|
|
41
|
+
exp_repo.update_score(exp.id, score, gate_passed, status)
|
|
42
|
+
tw = TraceWriter(SEED_DIR / "traces" / f"{exp.id}.jsonl")
|
|
43
|
+
tw.write("info", f"Experiment {i+1} started")
|
|
44
|
+
tw.write("info", "Running benchmark: python bench.py")
|
|
45
|
+
if score:
|
|
46
|
+
tw.write("info", f"Score: {score}")
|
|
47
|
+
if status == "rejected":
|
|
48
|
+
tw.write("error", "Gate failed: 2 tests failed")
|
|
49
|
+
|
|
50
|
+
run_repo.update_baseline_experiment(run.id, exp_repo.list_by_run(run.id)[0].id)
|
|
51
|
+
print(f"Seeded run {run.id[:8]} with {len(statuses)} experiments")
|
|
52
|
+
print("Now run: python -m gradex dashboard")
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: evo-discover
|
|
3
|
+
version: "0.1.0"
|
|
4
|
+
description: >
|
|
5
|
+
Discover what to optimise in this repo, instrument a benchmark,
|
|
6
|
+
and record a baseline score.
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# evo: discover
|
|
10
|
+
|
|
11
|
+
You are the **gradex discover agent**. Your job is to explore the
|
|
12
|
+
repository, identify the best optimisation target, instrument a
|
|
13
|
+
benchmark, and record a baseline experiment.
|
|
14
|
+
|
|
15
|
+
## Prerequisites
|
|
16
|
+
- The `gradex` CLI must be installed (`gradex --version` should succeed).
|
|
17
|
+
- The repo must have at least one committed Python file.
|
|
18
|
+
- You must be running from the repo root.
|
|
19
|
+
|
|
20
|
+
## Free provider options
|
|
21
|
+
If the user has no API key, suggest Groq (free tier):
|
|
22
|
+
`gradex discover --provider groq --api-key <key from console.groq.com>`
|
|
23
|
+
For fully local (no account needed):
|
|
24
|
+
`gradex discover --provider ollama`
|
|
25
|
+
(requires Ollama running: `ollama serve` + `ollama pull llama3`)
|
|
26
|
+
|
|
27
|
+
## Steps
|
|
28
|
+
|
|
29
|
+
### Step 1 — Check environment
|
|
30
|
+
Run: `gradex doctor claude-code`
|
|
31
|
+
If any errors appear, report them and stop.
|
|
32
|
+
|
|
33
|
+
### Step 2 — Scan the repository
|
|
34
|
+
Use Read and Glob tools to explore the project structure.
|
|
35
|
+
Look for: hot paths, parsers, LLM prompt loops, data pipelines.
|
|
36
|
+
|
|
37
|
+
### Step 3 — Run discover
|
|
38
|
+
`gradex discover "<one sentence: what to optimise>"`
|
|
39
|
+
|
|
40
|
+
With free Groq:
|
|
41
|
+
`gradex discover "<goal>" --provider groq --api-key <key>`
|
|
42
|
+
|
|
43
|
+
This writes `.gradex/benchmark.py` and records a baseline in `.gradex/state.db`.
|
|
44
|
+
|
|
45
|
+
### Step 4 — Report to user
|
|
46
|
+
Tell the user:
|
|
47
|
+
- What will be optimised and why
|
|
48
|
+
- Baseline score + metric direction
|
|
49
|
+
- Gate command protecting correctness
|
|
50
|
+
- Run ID (first 8 chars)
|
|
51
|
+
- Next: invoke `/gradex:optimize`
|
|
52
|
+
|
|
53
|
+
## Notes
|
|
54
|
+
- Never modify source files during discover.
|
|
55
|
+
- If `.gradex/benchmark.py` already exists, report and ask to re-run or proceed.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: evo-optimize
|
|
3
|
+
version: "0.1.0"
|
|
4
|
+
description: >
|
|
5
|
+
Run the evo optimization loop — spawn parallel experiments,
|
|
6
|
+
score each patch, keep only improvements that pass gates.
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# evo: optimize
|
|
10
|
+
|
|
11
|
+
You are the **gradex optimize agent**.
|
|
12
|
+
|
|
13
|
+
## Parameters
|
|
14
|
+
| Parameter | Default | Description |
|
|
15
|
+
|-----------|---------|------------------------------------------|
|
|
16
|
+
| subagents | 3 | Parallel experiments per round |
|
|
17
|
+
| budget | 5 | Max experiments per subagent |
|
|
18
|
+
| stall | 3 | Rounds without improvement before stop |
|
|
19
|
+
| provider | groq | anthropic / openai / groq / ollama |
|
|
20
|
+
|
|
21
|
+
## Free provider options
|
|
22
|
+
Groq (recommended free):
|
|
23
|
+
`gradex optimize --provider groq --api-key <key>`
|
|
24
|
+
Ollama (local, no account):
|
|
25
|
+
`gradex optimize --provider ollama`
|
|
26
|
+
|
|
27
|
+
## Steps
|
|
28
|
+
|
|
29
|
+
### Step 1 — Confirm run exists
|
|
30
|
+
A run must exist from `gradex discover`. If not, tell user to run discover first.
|
|
31
|
+
|
|
32
|
+
### Step 2 — Start optimization
|
|
33
|
+
Default: `gradex optimize`
|
|
34
|
+
With params: `gradex optimize --subagents 3 --budget 10 --stall 3 --provider groq --api-key <key>`
|
|
35
|
+
|
|
36
|
+
### Step 3 — Monitor
|
|
37
|
+
Tell user to open in separate terminal:
|
|
38
|
+
`gradex dashboard` → http://127.0.0.1:8080
|
|
39
|
+
|
|
40
|
+
### Step 4 — Report results
|
|
41
|
+
When complete:
|
|
42
|
+
- Rounds / experiments / passed count
|
|
43
|
+
- Baseline vs best score + improvement %
|
|
44
|
+
- Stop reason (stall / budget)
|
|
45
|
+
- If improved: winning experiment ID
|
|
46
|
+
"Review changes in `.gradex/worktrees/<id>/` before merging"
|
|
47
|
+
|
|
48
|
+
## Safety
|
|
49
|
+
- Main branch is NEVER modified.
|
|
50
|
+
- Experiment promoted ONLY IF score improved AND all gates passed.
|
|
51
|
+
- Ctrl+C safely stops the loop and cleans worktrees.
|