gradex 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. gradex-0.1.0/.evo/state.db +0 -0
  2. gradex-0.1.0/.evo/state.db-shm +0 -0
  3. gradex-0.1.0/.evo/state.db-wal +0 -0
  4. gradex-0.1.0/.evo/traces/12366e76-4c4f-4acf-8b3f-191008979da5.jsonl +1 -0
  5. gradex-0.1.0/.evo/traces/21a29126-82a1-4d73-afbc-f3e3a09acd7b.jsonl +2 -0
  6. gradex-0.1.0/.evo/traces/2bb4e240-8808-467f-a71a-58df88e0e64d.jsonl +2 -0
  7. gradex-0.1.0/.evo/traces/694b7d35-4b1e-4af2-8ba3-13ca57229cf2.jsonl +2 -0
  8. gradex-0.1.0/.evo/traces/b378a2bf-f760-484b-ba45-73fa497ae6bf.jsonl +1 -0
  9. gradex-0.1.0/.evo/traces/c84c3496-5c03-4837-a676-eabe5702edb0.jsonl +1 -0
  10. gradex-0.1.0/.evo/traces/d6856171-f713-4424-af6b-7eaf749aee83.jsonl +2 -0
  11. gradex-0.1.0/.github/workflows/ci.yml +57 -0
  12. gradex-0.1.0/.github/workflows/publish.yml +39 -0
  13. gradex-0.1.0/.gradex/state.db +0 -0
  14. gradex-0.1.0/PKG-INFO +92 -0
  15. gradex-0.1.0/README.md +66 -0
  16. gradex-0.1.0/_seed_demo/state.db +0 -0
  17. gradex-0.1.0/_seed_demo/traces/16f332f9-6cb5-4988-8b4e-5561f0d0f405.jsonl +2 -0
  18. gradex-0.1.0/_seed_demo/traces/249fe8c6-39d3-41a5-b97b-a2e4ca1f5aed.jsonl +2 -0
  19. gradex-0.1.0/_seed_demo/traces/2a5b0073-0036-4ca8-80e3-2433dc72b5c5.jsonl +3 -0
  20. gradex-0.1.0/_seed_demo/traces/309ffc37-e9dc-40b3-876c-d508f1190e16.jsonl +3 -0
  21. gradex-0.1.0/_seed_demo/traces/3931b258-6e5f-4e9a-af0e-fd64e0787475.jsonl +2 -0
  22. gradex-0.1.0/_seed_demo/traces/69c4f0c0-572b-451e-a6c6-bba70263eefa.jsonl +3 -0
  23. gradex-0.1.0/_seed_demo/traces/77ba77e8-10c5-4128-a239-9123408b3131.jsonl +3 -0
  24. gradex-0.1.0/_seed_demo/traces/9aac1f80-efa1-42a8-bad7-f15be2455638.jsonl +3 -0
  25. gradex-0.1.0/_seed_demo/traces/a3bb20bb-e5ae-4fcf-8846-50e3e6166515.jsonl +2 -0
  26. gradex-0.1.0/_seed_demo/traces/add76fe1-814d-4bbb-b5a9-ac07d5076c8e.jsonl +4 -0
  27. gradex-0.1.0/_seed_demo/traces/d7ddb230-5e36-42b0-b261-f6dc102aa8db.jsonl +2 -0
  28. gradex-0.1.0/_seed_demo/traces/e4dcd895-dfba-4c1a-8b4d-74e7a95ea49e.jsonl +2 -0
  29. gradex-0.1.0/_seed_demo/traces/e620d384-aef9-4312-b5cb-4dda6a985076.jsonl +4 -0
  30. gradex-0.1.0/_seed_demo/traces/e751a1ad-2eae-40d8-b922-d2378551891f.jsonl +3 -0
  31. gradex-0.1.0/pyproject.toml +115 -0
  32. gradex-0.1.0/seed_dashboard.py +52 -0
  33. gradex-0.1.0/skills/claude-code/discover/SKILL.md +55 -0
  34. gradex-0.1.0/skills/claude-code/optimize/SKILL.md +51 -0
  35. gradex-0.1.0/skills/cursor/discover.mdc +53 -0
  36. gradex-0.1.0/skills/cursor/optimize.mdc +56 -0
  37. gradex-0.1.0/src/gradex/__init__.py +3 -0
  38. gradex-0.1.0/src/gradex/ai/__init__.py +1 -0
  39. gradex-0.1.0/src/gradex/ai/brief.py +94 -0
  40. gradex-0.1.0/src/gradex/ai/client.py +232 -0
  41. gradex-0.1.0/src/gradex/ai/discover.py +280 -0
  42. gradex-0.1.0/src/gradex/ai/prompts/benchmark_design.md +21 -0
  43. gradex-0.1.0/src/gradex/ai/prompts/gate_design.md +16 -0
  44. gradex-0.1.0/src/gradex/ai/prompts/optimize_brief.md +33 -0
  45. gradex-0.1.0/src/gradex/ai/prompts/repo_analysis.md +21 -0
  46. gradex-0.1.0/src/gradex/analytics.py +200 -0
  47. gradex-0.1.0/src/gradex/backends/__init__.py +1 -0
  48. gradex-0.1.0/src/gradex/backends/base.py +75 -0
  49. gradex-0.1.0/src/gradex/backends/pool.py +96 -0
  50. gradex-0.1.0/src/gradex/backends/worktree.py +187 -0
  51. gradex-0.1.0/src/gradex/cli.py +588 -0
  52. gradex-0.1.0/src/gradex/config.py +70 -0
  53. gradex-0.1.0/src/gradex/dashboard/__init__.py +1 -0
  54. gradex-0.1.0/src/gradex/dashboard/broadcaster.py +75 -0
  55. gradex-0.1.0/src/gradex/dashboard/server.py +164 -0
  56. gradex-0.1.0/src/gradex/dashboard/templates/index.html +449 -0
  57. gradex-0.1.0/src/gradex/doctor.py +138 -0
  58. gradex-0.1.0/src/gradex/export.py +125 -0
  59. gradex-0.1.0/src/gradex/hosts/__init__.py +40 -0
  60. gradex-0.1.0/src/gradex/hosts/base.py +67 -0
  61. gradex-0.1.0/src/gradex/hosts/claude_code.py +191 -0
  62. gradex-0.1.0/src/gradex/hosts/cursor.py +142 -0
  63. gradex-0.1.0/src/gradex/orchestrator.py +246 -0
  64. gradex-0.1.0/src/gradex/repository.py +201 -0
  65. gradex-0.1.0/src/gradex/runner/__init__.py +1 -0
  66. gradex-0.1.0/src/gradex/runner/benchmark.py +127 -0
  67. gradex-0.1.0/src/gradex/runner/cache.py +106 -0
  68. gradex-0.1.0/src/gradex/runner/gate.py +71 -0
  69. gradex-0.1.0/src/gradex/security/__init__.py +1 -0
  70. gradex-0.1.0/src/gradex/security/scrubber.py +47 -0
  71. gradex-0.1.0/src/gradex/state.py +87 -0
  72. gradex-0.1.0/src/gradex/subagent.py +258 -0
  73. gradex-0.1.0/src/gradex/traces.py +109 -0
  74. gradex-0.1.0/tests/conftest.py +41 -0
  75. gradex-0.1.0/tests/test_ai_client.py +164 -0
  76. gradex-0.1.0/tests/test_analytics.py +226 -0
  77. gradex-0.1.0/tests/test_benchmark.py +149 -0
  78. gradex-0.1.0/tests/test_brief.py +113 -0
  79. gradex-0.1.0/tests/test_cache.py +81 -0
  80. gradex-0.1.0/tests/test_cli.py +64 -0
  81. gradex-0.1.0/tests/test_concurrent.py +78 -0
  82. gradex-0.1.0/tests/test_dashboard.py +166 -0
  83. gradex-0.1.0/tests/test_discover.py +214 -0
  84. gradex-0.1.0/tests/test_doctor.py +187 -0
  85. gradex-0.1.0/tests/test_export.py +137 -0
  86. gradex-0.1.0/tests/test_gate.py +113 -0
  87. gradex-0.1.0/tests/test_groq_provider.py +145 -0
  88. gradex-0.1.0/tests/test_hosts.py +383 -0
  89. gradex-0.1.0/tests/test_orchestrator.py +285 -0
  90. gradex-0.1.0/tests/test_pool.py +87 -0
  91. gradex-0.1.0/tests/test_runner_integration.py +76 -0
  92. gradex-0.1.0/tests/test_scrubber.py +81 -0
  93. gradex-0.1.0/tests/test_shutdown.py +76 -0
  94. gradex-0.1.0/tests/test_state.py +165 -0
  95. gradex-0.1.0/tests/test_subagent.py +281 -0
  96. gradex-0.1.0/tests/test_traces.py +62 -0
  97. gradex-0.1.0/tests/test_worktree.py +107 -0
  98. gradex-0.1.0/uv.lock +1636 -0
Binary file
Binary file
File without changes
@@ -0,0 +1 @@
1
+ {"ts": 1781552055.2846615, "level": "info", "msg": "Experiment 4 started", "data": {}}
@@ -0,0 +1,2 @@
1
+ {"ts": 1781552055.2920816, "level": "info", "msg": "Experiment 5 started", "data": {}}
2
+ {"ts": 1781552055.2937756, "level": "info", "msg": "Score: 31.4", "data": {}}
@@ -0,0 +1,2 @@
1
+ {"ts": 1781552055.2601042, "level": "info", "msg": "Experiment 1 started", "data": {}}
2
+ {"ts": 1781552055.2610655, "level": "info", "msg": "Score: 38.1", "data": {}}
@@ -0,0 +1,2 @@
1
+ {"ts": 1781552055.279045, "level": "info", "msg": "Experiment 3 started", "data": {}}
2
+ {"ts": 1781552055.2800758, "level": "info", "msg": "Score: 35.6", "data": {}}
@@ -0,0 +1 @@
1
+ {"ts": 1781552055.305381, "level": "info", "msg": "Experiment 7 started", "data": {}}
@@ -0,0 +1 @@
1
+ {"ts": 1781552055.2998743, "level": "info", "msg": "Experiment 6 started", "data": {}}
@@ -0,0 +1,2 @@
1
+ {"ts": 1781552055.268299, "level": "info", "msg": "Experiment 2 started", "data": {}}
2
+ {"ts": 1781552055.26949, "level": "info", "msg": "Score: 29.0", "data": {}}
@@ -0,0 +1,57 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: ["main", "master"]
6
+ pull_request:
7
+ branches: ["main", "master"]
8
+
9
+ jobs:
10
+ # ── Lint & type-check ────────────────────────────────────────────────────
11
+ lint:
12
+ name: Lint & type-check
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - name: Install uv
18
+ uses: astral-sh/setup-uv@v5
19
+
20
+ - name: Set up Python 3.11
21
+ run: uv python install 3.11
22
+
23
+ - name: Install project + dev dependencies
24
+ run: uv sync --all-extras
25
+
26
+ - name: ruff — lint
27
+ run: uv run ruff check src/ tests/
28
+
29
+ - name: ruff — format check
30
+ run: uv run ruff format --check src/ tests/
31
+
32
+ - name: mypy — type-check
33
+ run: uv run mypy src/
34
+
35
+ # ── Tests (matrix) ───────────────────────────────────────────────────────
36
+ test:
37
+ name: pytest / Python ${{ matrix.python-version }}
38
+ runs-on: ubuntu-latest
39
+ strategy:
40
+ fail-fast: false
41
+ matrix:
42
+ python-version: ["3.11", "3.12", "3.13"]
43
+
44
+ steps:
45
+ - uses: actions/checkout@v4
46
+
47
+ - name: Install uv
48
+ uses: astral-sh/setup-uv@v5
49
+
50
+ - name: Set up Python ${{ matrix.python-version }}
51
+ run: uv python install ${{ matrix.python-version }}
52
+
53
+ - name: Install project + dev dependencies
54
+ run: uv sync --all-extras
55
+
56
+ - name: Run tests
57
+ run: uv run pytest tests/ --tb=short -v
@@ -0,0 +1,39 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ build-and-publish:
10
+ name: Build & publish
11
+ runs-on: ubuntu-latest
12
+
13
+ environment:
14
+ name: pypi
15
+ url: https://pypi.org/p/evo-research-cli
16
+
17
+ permissions:
18
+ id-token: write # required for trusted publishing (OIDC)
19
+
20
+ steps:
21
+ - uses: actions/checkout@v4
22
+
23
+ - name: Install uv
24
+ uses: astral-sh/setup-uv@v5
25
+
26
+ - name: Set up Python 3.11
27
+ run: uv python install 3.11
28
+
29
+ - name: Install project dependencies
30
+ run: uv sync
31
+
32
+ - name: Build sdist + wheel
33
+ run: uv build
34
+
35
+ - name: Publish to PyPI
36
+ uses: pypa/gh-action-pypi-publish@release/v1
37
+ # Uses OIDC trusted publishing — no API token needed.
38
+ # Configure the "pypi" environment in GitHub repo settings
39
+ # and add this repo as a trusted publisher on PyPI.
Binary file
gradex-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,92 @@
1
+ Metadata-Version: 2.4
2
+ Name: gradex
3
+ Version: 0.1.0
4
+ Summary: Measurable code improvement — autonomous optimization loop
5
+ License: MIT
6
+ Keywords: ai,cli,coding,plugin
7
+ Requires-Python: >=3.11
8
+ Requires-Dist: aiosqlite>=0.20.0
9
+ Requires-Dist: anthropic>=0.25.0
10
+ Requires-Dist: anyio[trio]>=4.0.0
11
+ Requires-Dist: fastapi>=0.111.0
12
+ Requires-Dist: httpx>=0.27.0
13
+ Requires-Dist: jinja2>=3.1.0
14
+ Requires-Dist: openai>=1.30.0
15
+ Requires-Dist: rich>=13.0.0
16
+ Requires-Dist: sqlmodel>=0.0.18
17
+ Requires-Dist: typer>=0.12.0
18
+ Requires-Dist: uvicorn[standard]>=0.29.0
19
+ Requires-Dist: websockets>=12.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: mypy>=1.10.0; extra == 'dev'
22
+ Requires-Dist: pytest-cov>=5.0.0; extra == 'dev'
23
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
24
+ Requires-Dist: ruff>=0.4.0; extra == 'dev'
25
+ Description-Content-Type: text/markdown
26
+
27
+ # Gradex — autonomous code optimization. Measurable improvement, every run.
28
+
29
+ [![PyPI version](https://img.shields.io/pypi/v/gradex.svg)](https://pypi.org/project/gradex/)
30
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue.svg)](https://www.python.org/downloads/)
31
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
32
+
33
+ Gradex discovers performance targets, captures a baseline, runs autonomous optimization experiments, and ships the best verified patch.
34
+
35
+ ## Install
36
+
37
+ ```bash
38
+ pip install gradex
39
+ ```
40
+
41
+ ## 60-Second Quickstart
42
+
43
+ ```bash
44
+ pip install gradex
45
+ gradex install cursor
46
+ gradex discover "make this repo faster"
47
+ gradex optimize
48
+ gradex dashboard
49
+ ```
50
+
51
+ ## Providers
52
+
53
+ | Provider | Best for | Notes |
54
+ |---|---|---|
55
+ | Groq | Free cloud runs | Free tier available, fast iteration |
56
+ | Anthropic | High quality reasoning | Strong patch planning and code edits |
57
+ | OpenAI | General purpose | Broad model options |
58
+ | Ollama | Local/private | Runs fully on your machine |
59
+
60
+ ## CLI Reference
61
+
62
+ | Command | Description |
63
+ |---|---|
64
+ | `gradex install <host>` | Installs Gradex integration for a coding host |
65
+ | `gradex doctor <host>` | Checks host environment and setup health |
66
+ | `gradex dashboard` | Starts live optimization dashboard |
67
+ | `gradex upgrade` | Checks PyPI for newer Gradex versions |
68
+ | `gradex discover [hint]` | Discovers benchmark target and baseline |
69
+ | `gradex optimize` | Runs autonomous optimization loop |
70
+ | `gradex stats` | Shows run analytics and optional exports |
71
+ | `gradex history` | Lists recent optimization runs |
72
+
73
+ ## How It Works
74
+
75
+ ```text
76
+ +-----------+ +-----------+ +----------------------+ +----------------+
77
+ | discover | --> | baseline | --> | optimize loop (N) | --> | best patch out |
78
+ +-----------+ +-----------+ +----------------------+ +----------------+
79
+ ```
80
+
81
+ 1. `discover` analyzes your repo and creates a measurable benchmark target.
82
+ 2. Gradex records a baseline score.
83
+ 3. `optimize` runs parallel experiment rounds (patch -> benchmark -> gate checks).
84
+ 4. Best validated patch and run analytics are surfaced in dashboard and CLI.
85
+
86
+ ## Dashboard
87
+
88
+ Dashboard screenshot: [placeholder](https://example.com/gradex-dashboard-screenshot)
89
+
90
+ ## License
91
+
92
+ MIT
gradex-0.1.0/README.md ADDED
@@ -0,0 +1,66 @@
1
+ # Gradex — autonomous code optimization. Measurable improvement, every run.
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/gradex.svg)](https://pypi.org/project/gradex/)
4
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue.svg)](https://www.python.org/downloads/)
5
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
6
+
7
+ Gradex discovers performance targets, captures a baseline, runs autonomous optimization experiments, and ships the best verified patch.
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ pip install gradex
13
+ ```
14
+
15
+ ## 60-Second Quickstart
16
+
17
+ ```bash
18
+ pip install gradex
19
+ gradex install cursor
20
+ gradex discover "make this repo faster"
21
+ gradex optimize
22
+ gradex dashboard
23
+ ```
24
+
25
+ ## Providers
26
+
27
+ | Provider | Best for | Notes |
28
+ |---|---|---|
29
+ | Groq | Free cloud runs | Free tier available, fast iteration |
30
+ | Anthropic | High quality reasoning | Strong patch planning and code edits |
31
+ | OpenAI | General purpose | Broad model options |
32
+ | Ollama | Local/private | Runs fully on your machine |
33
+
34
+ ## CLI Reference
35
+
36
+ | Command | Description |
37
+ |---|---|
38
+ | `gradex install <host>` | Installs Gradex integration for a coding host |
39
+ | `gradex doctor <host>` | Checks host environment and setup health |
40
+ | `gradex dashboard` | Starts live optimization dashboard |
41
+ | `gradex upgrade` | Checks PyPI for newer Gradex versions |
42
+ | `gradex discover [hint]` | Discovers benchmark target and baseline |
43
+ | `gradex optimize` | Runs autonomous optimization loop |
44
+ | `gradex stats` | Shows run analytics and optional exports |
45
+ | `gradex history` | Lists recent optimization runs |
46
+
47
+ ## How It Works
48
+
49
+ ```text
50
+ +-----------+ +-----------+ +----------------------+ +----------------+
51
+ | discover | --> | baseline | --> | optimize loop (N) | --> | best patch out |
52
+ +-----------+ +-----------+ +----------------------+ +----------------+
53
+ ```
54
+
55
+ 1. `discover` analyzes your repo and creates a measurable benchmark target.
56
+ 2. Gradex records a baseline score.
57
+ 3. `optimize` runs parallel experiment rounds (patch -> benchmark -> gate checks).
58
+ 4. Best validated patch and run analytics are surfaced in dashboard and CLI.
59
+
60
+ ## Dashboard
61
+
62
+ Dashboard screenshot: [placeholder](https://example.com/gradex-dashboard-screenshot)
63
+
64
+ ## License
65
+
66
+ MIT
Binary file
@@ -0,0 +1,2 @@
1
+ {"ts": 1781551901.083844, "level": "info", "msg": "Experiment 6 started", "data": {}}
2
+ {"ts": 1781551901.0857167, "level": "info", "msg": "Running benchmark: python bench.py", "data": {}}
@@ -0,0 +1,2 @@
1
+ {"ts": 1781551559.5186882, "level": "info", "msg": "Experiment 7 started", "data": {}}
2
+ {"ts": 1781551559.5197375, "level": "info", "msg": "Running benchmark: python bench.py", "data": {}}
@@ -0,0 +1,3 @@
1
+ {"ts": 1781551901.053676, "level": "info", "msg": "Experiment 3 started", "data": {}}
2
+ {"ts": 1781551901.0548875, "level": "info", "msg": "Running benchmark: python bench.py", "data": {}}
3
+ {"ts": 1781551901.0559962, "level": "info", "msg": "Score: 35.6", "data": {}}
@@ -0,0 +1,3 @@
1
+ {"ts": 1781551559.483333, "level": "info", "msg": "Experiment 3 started", "data": {}}
2
+ {"ts": 1781551559.484786, "level": "info", "msg": "Running benchmark: python bench.py", "data": {}}
3
+ {"ts": 1781551559.4855475, "level": "info", "msg": "Score: 35.6", "data": {}}
@@ -0,0 +1,2 @@
1
+ {"ts": 1781551559.490363, "level": "info", "msg": "Experiment 4 started", "data": {}}
2
+ {"ts": 1781551559.491506, "level": "info", "msg": "Running benchmark: python bench.py", "data": {}}
@@ -0,0 +1,3 @@
1
+ {"ts": 1781551559.4575434, "level": "info", "msg": "Experiment 1 started", "data": {}}
2
+ {"ts": 1781551559.4589937, "level": "info", "msg": "Running benchmark: python bench.py", "data": {}}
3
+ {"ts": 1781551559.4599133, "level": "info", "msg": "Score: 38.1", "data": {}}
@@ -0,0 +1,3 @@
1
+ {"ts": 1781551901.0244586, "level": "info", "msg": "Experiment 1 started", "data": {}}
2
+ {"ts": 1781551901.0256388, "level": "info", "msg": "Running benchmark: python bench.py", "data": {}}
3
+ {"ts": 1781551901.0265846, "level": "info", "msg": "Score: 38.1", "data": {}}
@@ -0,0 +1,3 @@
1
+ {"ts": 1781551559.5014331, "level": "info", "msg": "Experiment 5 started", "data": {}}
2
+ {"ts": 1781551559.5026333, "level": "info", "msg": "Running benchmark: python bench.py", "data": {}}
3
+ {"ts": 1781551559.503857, "level": "info", "msg": "Score: 31.4", "data": {}}
@@ -0,0 +1,2 @@
1
+ {"ts": 1781551901.0931492, "level": "info", "msg": "Experiment 7 started", "data": {}}
2
+ {"ts": 1781551901.09441, "level": "info", "msg": "Running benchmark: python bench.py", "data": {}}
@@ -0,0 +1,4 @@
1
+ {"ts": 1781551559.470149, "level": "info", "msg": "Experiment 2 started", "data": {}}
2
+ {"ts": 1781551559.4713607, "level": "info", "msg": "Running benchmark: python bench.py", "data": {}}
3
+ {"ts": 1781551559.472388, "level": "info", "msg": "Score: 29.0", "data": {}}
4
+ {"ts": 1781551559.4734287, "level": "error", "msg": "Gate failed: 2 tests failed", "data": {}}
@@ -0,0 +1,2 @@
1
+ {"ts": 1781551559.5113907, "level": "info", "msg": "Experiment 6 started", "data": {}}
2
+ {"ts": 1781551559.5126586, "level": "info", "msg": "Running benchmark: python bench.py", "data": {}}
@@ -0,0 +1,2 @@
1
+ {"ts": 1781551901.061851, "level": "info", "msg": "Experiment 4 started", "data": {}}
2
+ {"ts": 1781551901.0633988, "level": "info", "msg": "Running benchmark: python bench.py", "data": {}}
@@ -0,0 +1,4 @@
1
+ {"ts": 1781551901.0414634, "level": "info", "msg": "Experiment 2 started", "data": {}}
2
+ {"ts": 1781551901.0427585, "level": "info", "msg": "Running benchmark: python bench.py", "data": {}}
3
+ {"ts": 1781551901.0437982, "level": "info", "msg": "Score: 29.0", "data": {}}
4
+ {"ts": 1781551901.0447173, "level": "error", "msg": "Gate failed: 2 tests failed", "data": {}}
@@ -0,0 +1,3 @@
1
+ {"ts": 1781551901.0737042, "level": "info", "msg": "Experiment 5 started", "data": {}}
2
+ {"ts": 1781551901.0750825, "level": "info", "msg": "Running benchmark: python bench.py", "data": {}}
3
+ {"ts": 1781551901.076312, "level": "info", "msg": "Score: 31.4", "data": {}}
@@ -0,0 +1,115 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "gradex"
7
+ version = "0.1.0"
8
+ description = "Measurable code improvement — autonomous optimization loop"
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ license = { text = "MIT" }
12
+ keywords = ["cli", "coding", "plugin", "ai"]
13
+ dependencies = [
14
+ "typer>=0.12.0",
15
+ "rich>=13.0.0",
16
+ "sqlmodel>=0.0.18",
17
+ "aiosqlite>=0.20.0",
18
+ "fastapi>=0.111.0",
19
+ "uvicorn[standard]>=0.29.0",
20
+ "websockets>=12.0",
21
+ "jinja2>=3.1.0",
22
+ "httpx>=0.27.0",
23
+ "anyio[trio]>=4.0.0",
24
+ "anthropic>=0.25.0",
25
+ "openai>=1.30.0",
26
+ ]
27
+
28
+ [project.scripts]
29
+ gradex = "gradex.cli:app"
30
+
31
+ [project.optional-dependencies]
32
+ dev = [
33
+ "pytest>=8.0.0",
34
+ "pytest-cov>=5.0.0",
35
+ "ruff>=0.4.0",
36
+ "mypy>=1.10.0",
37
+ ]
38
+
39
+ [tool.hatch.build.targets.wheel]
40
+ packages = ["src/gradex"]
41
+
42
+ [tool.ruff]
43
+ target-version = "py311"
44
+ line-length = 88
45
+ src = ["src"]
46
+
47
+ [tool.ruff.lint]
48
+ select = ["E", "F", "I", "UP", "B", "C4"]
49
+ ignore = ["E501"]
50
+
51
+ [tool.ruff.lint.isort]
52
+ known-first-party = ["gradex"]
53
+
54
+ [tool.mypy]
55
+ python_version = "3.11"
56
+ strict = true
57
+ mypy_path = "src"
58
+ files = ["src/"]
59
+
60
+ [[tool.mypy.overrides]]
61
+ # SQLModel + SQLAlchemy type stubs have known gaps that produce false positives
62
+ # under strict mode (generic column expressions, dynamic ORM attribute types, etc.).
63
+ module = ["gradex.state", "gradex.repository"]
64
+ warn_return_any = false
65
+ disallow_any_generics = false
66
+
67
+ [[tool.mypy.overrides]]
68
+ # FastAPI/Starlette route return types and Jinja2 templating are dynamically typed.
69
+ module = ["gradex.dashboard.server", "gradex.dashboard.broadcaster"]
70
+ warn_return_any = false
71
+ disallow_any_generics = false
72
+
73
+ [[tool.mypy.overrides]]
74
+ module = ["sqlmodel.*", "sqlalchemy.*", "aiosqlite.*", "uvicorn.*", "fastapi.*"]
75
+ ignore_missing_imports = true
76
+
77
+ [[tool.mypy.overrides]]
78
+ # tomllib dynamic dict and dataclasses.fields(...) produce Any under strict.
79
+ module = ["gradex.config"]
80
+ warn_return_any = false
81
+ disallow_any_generics = false
82
+
83
+ [[tool.mypy.overrides]]
84
+ # anthropic/openai SDK attribute chains + httpx resp.json() produce Any.
85
+ module = ["gradex.ai.client", "gradex.ai.discover", "gradex.ai.brief"]
86
+ warn_return_any = false
87
+ disallow_any_generics = false
88
+
89
+ [[tool.mypy.overrides]]
90
+ module = ["anthropic.*", "openai.*"]
91
+ ignore_missing_imports = true
92
+
93
+ [[tool.mypy.overrides]]
94
+ # asyncio.gather return type with return_exceptions=True and SQLModel session
95
+ # usage in subagent/orchestrator produce Any under strict.
96
+ module = ["gradex.subagent", "gradex.orchestrator"]
97
+ warn_return_any = false
98
+ disallow_any_generics = false
99
+
100
+ [[tool.mypy.overrides]]
101
+ # Host installers use shutil.which (returns str | None), Path.stat(),
102
+ # and json.loads (returns Any) — all produce Any under strict.
103
+ module = ["gradex.hosts.*", "gradex.hosts"]
104
+ warn_return_any = false
105
+ disallow_any_generics = false
106
+
107
+ [tool.pytest.ini_options]
108
+ testpaths = ["tests"]
109
+ addopts = "-v --tb=short"
110
+
111
+ [dependency-groups]
112
+ dev = [
113
+ "anyio>=4.13.0",
114
+ "pytest>=9.1.0",
115
+ ]
@@ -0,0 +1,52 @@
1
+ import sys
2
+ sys.path.insert(0, "src")
3
+
4
+ import gradex.state as state_mod
5
+ from pathlib import Path
6
+
7
+ SEED_DIR = Path("_seed_demo")
8
+ SEED_DIR.mkdir(exist_ok=True)
9
+ state_mod.GRADEX_DIR = SEED_DIR
10
+ state_mod.DB_PATH = SEED_DIR / "state.db"
11
+
12
+ import gradex.backends.worktree as wt_mod
13
+ wt_mod.GRADEX_DIR = SEED_DIR
14
+
15
+ from gradex.repository import RunRepository, ExperimentRepository
16
+ from gradex.traces import TraceWriter
17
+
18
+ run_repo = RunRepository()
19
+ exp_repo = ExperimentRepository()
20
+
21
+ run = run_repo.create(
22
+ benchmark_cmd="python bench.py",
23
+ metric_direction="lower",
24
+ gate_cmds=["pytest tests/"],
25
+ baseline_score=41.2,
26
+ )
27
+
28
+ statuses = [
29
+ ("passed", 38.1, True),
30
+ ("rejected", 29.0, False),
31
+ ("passed", 35.6, True),
32
+ ("failed", None, None),
33
+ ("passed", 31.4, True),
34
+ ("running", None, None),
35
+ ("pending", None, None),
36
+ ]
37
+
38
+ for i, (status, score, gate_passed) in enumerate(statuses):
39
+ exp = exp_repo.create(run.id, None, f"gradex/exp-00{i+1}")
40
+ if score is not None:
41
+ exp_repo.update_score(exp.id, score, gate_passed, status)
42
+ tw = TraceWriter(SEED_DIR / "traces" / f"{exp.id}.jsonl")
43
+ tw.write("info", f"Experiment {i+1} started")
44
+ tw.write("info", "Running benchmark: python bench.py")
45
+ if score:
46
+ tw.write("info", f"Score: {score}")
47
+ if status == "rejected":
48
+ tw.write("error", "Gate failed: 2 tests failed")
49
+
50
+ run_repo.update_baseline_experiment(run.id, exp_repo.list_by_run(run.id)[0].id)
51
+ print(f"Seeded run {run.id[:8]} with {len(statuses)} experiments")
52
+ print("Now run: python -m gradex dashboard")
@@ -0,0 +1,55 @@
1
+ ---
2
+ name: evo-discover
3
+ version: "0.1.0"
4
+ description: >
5
+ Discover what to optimise in this repo, instrument a benchmark,
6
+ and record a baseline score.
7
+ ---
8
+
9
+ # evo: discover
10
+
11
+ You are the **gradex discover agent**. Your job is to explore the
12
+ repository, identify the best optimisation target, instrument a
13
+ benchmark, and record a baseline experiment.
14
+
15
+ ## Prerequisites
16
+ - The `gradex` CLI must be installed (`gradex --version` should succeed).
17
+ - The repo must have at least one committed Python file.
18
+ - You must be running from the repo root.
19
+
20
+ ## Free provider options
21
+ If the user has no API key, suggest Groq (free tier):
22
+ `gradex discover --provider groq --api-key <key from console.groq.com>`
23
+ For fully local (no account needed):
24
+ `gradex discover --provider ollama`
25
+ (requires Ollama running: `ollama serve` + `ollama pull llama3`)
26
+
27
+ ## Steps
28
+
29
+ ### Step 1 — Check environment
30
+ Run: `gradex doctor claude-code`
31
+ If any errors appear, report them and stop.
32
+
33
+ ### Step 2 — Scan the repository
34
+ Use Read and Glob tools to explore the project structure.
35
+ Look for: hot paths, parsers, LLM prompt loops, data pipelines.
36
+
37
+ ### Step 3 — Run discover
38
+ `gradex discover "<one sentence: what to optimise>"`
39
+
40
+ With free Groq:
41
+ `gradex discover "<goal>" --provider groq --api-key <key>`
42
+
43
+ This writes `.gradex/benchmark.py` and records a baseline in `.gradex/state.db`.
44
+
45
+ ### Step 4 — Report to user
46
+ Tell the user:
47
+ - What will be optimised and why
48
+ - Baseline score + metric direction
49
+ - Gate command protecting correctness
50
+ - Run ID (first 8 chars)
51
+ - Next: invoke `/gradex:optimize`
52
+
53
+ ## Notes
54
+ - Never modify source files during discover.
55
+ - If `.gradex/benchmark.py` already exists, report and ask to re-run or proceed.
@@ -0,0 +1,51 @@
1
+ ---
2
+ name: evo-optimize
3
+ version: "0.1.0"
4
+ description: >
5
+ Run the evo optimization loop — spawn parallel experiments,
6
+ score each patch, keep only improvements that pass gates.
7
+ ---
8
+
9
+ # evo: optimize
10
+
11
+ You are the **gradex optimize agent**.
12
+
13
+ ## Parameters
14
+ | Parameter | Default | Description |
15
+ |-----------|---------|------------------------------------------|
16
+ | subagents | 3 | Parallel experiments per round |
17
+ | budget | 5 | Max experiments per subagent |
18
+ | stall | 3 | Rounds without improvement before stop |
19
+ | provider | groq | anthropic / openai / groq / ollama |
20
+
21
+ ## Free provider options
22
+ Groq (recommended free):
23
+ `gradex optimize --provider groq --api-key <key>`
24
+ Ollama (local, no account):
25
+ `gradex optimize --provider ollama`
26
+
27
+ ## Steps
28
+
29
+ ### Step 1 — Confirm run exists
30
+ A run must exist from `gradex discover`. If not, tell user to run discover first.
31
+
32
+ ### Step 2 — Start optimization
33
+ Default: `gradex optimize`
34
+ With params: `gradex optimize --subagents 3 --budget 10 --stall 3 --provider groq --api-key <key>`
35
+
36
+ ### Step 3 — Monitor
37
+ Tell user to open in separate terminal:
38
+ `gradex dashboard` → http://127.0.0.1:8080
39
+
40
+ ### Step 4 — Report results
41
+ When complete:
42
+ - Rounds / experiments / passed count
43
+ - Baseline vs best score + improvement %
44
+ - Stop reason (stall / budget)
45
+ - If improved: winning experiment ID
46
+ "Review changes in `.gradex/worktrees/<id>/` before merging"
47
+
48
+ ## Safety
49
+ - Main branch is NEVER modified.
50
+ - Experiment promoted ONLY IF score improved AND all gates passed.
51
+ - Ctrl+C safely stops the loop and cleans worktrees.