leanlab 0.2.1__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {leanlab-0.2.1 → leanlab-0.2.2}/.github/RELEASING.md +19 -13
- {leanlab-0.2.1 → leanlab-0.2.2}/.github/workflows/ci.yml +8 -8
- {leanlab-0.2.1 → leanlab-0.2.2}/.github/workflows/publish.yml +4 -4
- {leanlab-0.2.1 → leanlab-0.2.2}/CHANGELOG.md +12 -1
- leanlab-0.2.2/PKG-INFO +110 -0
- leanlab-0.2.2/README.md +62 -0
- leanlab-0.2.1/README.md → leanlab-0.2.2/docs/OVERVIEW.md +70 -121
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/package.json +1 -1
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/cli.py +1 -1
- {leanlab-0.2.1 → leanlab-0.2.2}/pyproject.toml +1 -1
- leanlab-0.2.2/scripts/release.py +142 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/uv.lock +1 -1
- leanlab-0.2.1/PKG-INFO +0 -273
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/.version +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/ENGINEERING_LOOP.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/PRINCIPLES.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/SUPERPOWERS.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/actors.archik.actors.yaml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/build-task.archik.seq.yaml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/init-lab.archik.seq.yaml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/main.archik.yaml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/run-experiments-fix.archik.seq.yaml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/run-experiments-happy.archik.seq.yaml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/spec-task.archik.seq.yaml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/usecases/build-task.archik.uc.yaml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/usecases/diagnose-lab.archik.uc.yaml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/usecases/init-lab.archik.uc.yaml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/usecases/run-experiments.archik.uc.yaml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/usecases/spec-task.archik.uc.yaml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.archik/usecases/watch-progress.archik.uc.yaml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.github/dependabot.yml +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.gitignore +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/Critic_Feedback.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/Director_Notes.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/evaluation.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/experiments/catboost_oof_spatial_prior_01.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/experiments/hgb_geo_feats_01.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/experiments/honest_mono_catboost_logcap_01.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/experiments/lgbm_spatial_clusters_01.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/experiments/loghonest_spatial_blend_01.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/experiments/multiscale_spatial_blend_01.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/experiments/sample.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/experiments/spatial_stack_01.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/lab.json +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/results.jsonl +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/task.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/validate.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/CLAUDE.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/CODE_OF_CONDUCT.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/CONTRIBUTING.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/FUTURE.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/LICENSE +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/SECURITY.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/docs/USAGE.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/docs/architecture.svg +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/docs/build-task-flow.svg +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/docs/run-experiments-fix.svg +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/docs/run-experiments-happy.svg +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/docs/superpowers/specs/2026-06-21-dashboard-overhaul-design.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/docs/superpowers/specs/2026-06-21-leanlab-as-a-tool-design.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/docs/superpowers/specs/2026-06-22-coding-lab-design.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/.gitignore +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/README.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/index.html +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/package-lock.json +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/postcss.config.js +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/App.jsx +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/AgentChat.jsx +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/Badge.jsx +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/Header.jsx +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/LoopPanel.jsx +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/Panel.jsx +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/Playbook.jsx +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/StatRow.jsx +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/TasksTable.jsx +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/Timeline.jsx +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/TokensChart.jsx +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/hooks/useStream.js +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/index.css +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/lib/format.js +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/main.jsx +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/tailwind.config.js +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/frontend/vite.config.js +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/handoff.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/__init__.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/__init__.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/agents/__init__.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/agents/claude.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/agents/port.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/agents/protocol.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/__init__.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/board.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/board_dist/assets/index-BBCkNArL.css +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/board_dist/assets/index-CNGMDAuO.js +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/board_dist/index.html +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/engineer.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/gate.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/personas.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/playbook.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/spec.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/doctor.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/init.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/loop.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/monitor.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/templates/agents/CLAUDE.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/templates/agents/critic.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/templates/agents/director.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/templates/agents/engineer.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/templates/agents/reviewer.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/templates/agents/techlead.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/templates/skill/SKILL.md +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_coding_board.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_coding_engineer.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_coding_gate.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_coding_playbook.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_coding_spec.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_core.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_doctor.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_init.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_monitor.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_run_experiments_fix.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_run_experiments_happy.py +0 -0
- {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_tooling.py +0 -0
|
@@ -22,19 +22,25 @@ Do this once, before the first release.
|
|
|
22
22
|
|
|
23
23
|
## Cut a release
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
25
|
+
One command does everything — bump (all 3 version spots), roll the CHANGELOG,
|
|
26
|
+
run the tests, commit, tag, and push:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
uv run python scripts/release.py patch # 0.2.1 -> 0.2.2 (or: minor | major | X.Y.Z)
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
It prints the change, asks before pushing, and on confirm pushes `main` + the
|
|
33
|
+
tag — which triggers `publish.yml` (build UI → build wheel → publish to PyPI →
|
|
34
|
+
GitHub Release). Watch it under the repo's **Actions** tab.
|
|
35
|
+
|
|
36
|
+
Flags: `--dry-run` (show changes, write nothing) · `--skip-tests` · `--yes`
|
|
37
|
+
(push without the prompt). Before running, write your release notes under
|
|
38
|
+
`## [Unreleased]` in `CHANGELOG.md` — the script moves them into the new version
|
|
39
|
+
section for you.
|
|
40
|
+
|
|
41
|
+
Doing it by hand instead: bump `version` in `pyproject.toml`,
|
|
42
|
+
`frontend/package.json`, and `leanlab/cli.py` (`_version()` fallback); move the
|
|
43
|
+
CHANGELOG notes; then `git tag vX.Y.Z && git push origin main vX.Y.Z`.
|
|
38
44
|
|
|
39
45
|
## Verify
|
|
40
46
|
|
|
@@ -15,9 +15,9 @@ jobs:
|
|
|
15
15
|
name: Lint (ruff)
|
|
16
16
|
runs-on: ubuntu-latest
|
|
17
17
|
steps:
|
|
18
|
-
- uses: actions/checkout@
|
|
18
|
+
- uses: actions/checkout@v7
|
|
19
19
|
- name: Install uv
|
|
20
|
-
uses: astral-sh/setup-uv@
|
|
20
|
+
uses: astral-sh/setup-uv@v7
|
|
21
21
|
- name: Ruff
|
|
22
22
|
run: uvx ruff check leanlab tests
|
|
23
23
|
|
|
@@ -29,10 +29,10 @@ jobs:
|
|
|
29
29
|
matrix:
|
|
30
30
|
python: ["3.11", "3.12", "3.13"]
|
|
31
31
|
steps:
|
|
32
|
-
- uses: actions/checkout@
|
|
32
|
+
- uses: actions/checkout@v7
|
|
33
33
|
|
|
34
34
|
- name: Install uv
|
|
35
|
-
uses: astral-sh/setup-uv@
|
|
35
|
+
uses: astral-sh/setup-uv@v7
|
|
36
36
|
with:
|
|
37
37
|
python-version: ${{ matrix.python }}
|
|
38
38
|
enable-cache: true
|
|
@@ -47,9 +47,9 @@ jobs:
|
|
|
47
47
|
name: Build (wheel + UI)
|
|
48
48
|
runs-on: ubuntu-latest
|
|
49
49
|
steps:
|
|
50
|
-
- uses: actions/checkout@
|
|
50
|
+
- uses: actions/checkout@v7
|
|
51
51
|
|
|
52
|
-
- uses: actions/setup-node@
|
|
52
|
+
- uses: actions/setup-node@v6
|
|
53
53
|
with:
|
|
54
54
|
node-version: "20"
|
|
55
55
|
cache: npm
|
|
@@ -62,7 +62,7 @@ jobs:
|
|
|
62
62
|
npm run build
|
|
63
63
|
|
|
64
64
|
- name: Install uv
|
|
65
|
-
uses: astral-sh/setup-uv@
|
|
65
|
+
uses: astral-sh/setup-uv@v7
|
|
66
66
|
|
|
67
67
|
- name: Build sdist + wheel
|
|
68
68
|
run: uv build
|
|
@@ -73,7 +73,7 @@ jobs:
|
|
|
73
73
|
&& echo "✓ board_dist bundled in the wheel" \
|
|
74
74
|
|| (echo "✗ board_dist missing from the wheel" && exit 1)
|
|
75
75
|
|
|
76
|
-
- uses: actions/upload-artifact@
|
|
76
|
+
- uses: actions/upload-artifact@v7
|
|
77
77
|
with:
|
|
78
78
|
name: dist
|
|
79
79
|
path: dist/*
|
|
@@ -18,10 +18,10 @@ jobs:
|
|
|
18
18
|
id-token: write # OIDC: PyPI Trusted Publishing mints a short-lived token
|
|
19
19
|
steps:
|
|
20
20
|
- name: Checkout
|
|
21
|
-
uses: actions/checkout@
|
|
21
|
+
uses: actions/checkout@v7
|
|
22
22
|
|
|
23
23
|
# The wheel must ship the compiled React board (board_dist/), so build it first.
|
|
24
|
-
- uses: actions/setup-node@
|
|
24
|
+
- uses: actions/setup-node@v6
|
|
25
25
|
with:
|
|
26
26
|
node-version: "20"
|
|
27
27
|
cache: npm
|
|
@@ -33,7 +33,7 @@ jobs:
|
|
|
33
33
|
npm run build
|
|
34
34
|
|
|
35
35
|
- name: Install uv
|
|
36
|
-
uses: astral-sh/setup-uv@
|
|
36
|
+
uses: astral-sh/setup-uv@v7
|
|
37
37
|
|
|
38
38
|
- name: Build sdist + wheel
|
|
39
39
|
run: uv build
|
|
@@ -66,7 +66,7 @@ jobs:
|
|
|
66
66
|
echo "path=/tmp/release-notes.md" >> "$GITHUB_OUTPUT"
|
|
67
67
|
|
|
68
68
|
- name: Create GitHub Release
|
|
69
|
-
uses: softprops/action-gh-release@
|
|
69
|
+
uses: softprops/action-gh-release@v3
|
|
70
70
|
with:
|
|
71
71
|
tag_name: ${{ github.ref_name }}
|
|
72
72
|
name: ${{ github.ref_name }}
|
|
@@ -6,6 +6,16 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
8
|
|
|
9
|
+
## [0.2.2] - 2026-06-26
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- One-command release script (`scripts/release.py`) and a `ruff check` lint job in CI.
|
|
13
|
+
|
|
14
|
+
### Changed
|
|
15
|
+
- README is now user-facing (PyPI install + quick start for both lab types). The
|
|
16
|
+
project concept, structure, two-lab mapping, and coding-lab flow moved to
|
|
17
|
+
`docs/OVERVIEW.md`.
|
|
18
|
+
|
|
9
19
|
## [0.2.1] - 2026-06-26
|
|
10
20
|
|
|
11
21
|
### Fixed
|
|
@@ -35,7 +45,8 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
|
35
45
|
evaluator) and coding labs (spec-writer → engineer → gate → reviewer →
|
|
36
46
|
tech-lead), a live dashboard, and a Claude Code skill (`leanlab init --for-agent`).
|
|
37
47
|
|
|
38
|
-
[Unreleased]: https://github.com/bacharSalleh/leanlab/compare/v0.2.
|
|
48
|
+
[Unreleased]: https://github.com/bacharSalleh/leanlab/compare/v0.2.2...HEAD
|
|
49
|
+
[0.2.2]: https://github.com/bacharSalleh/leanlab/compare/v0.2.1...v0.2.2
|
|
39
50
|
[0.2.1]: https://github.com/bacharSalleh/leanlab/compare/v0.2.0...v0.2.1
|
|
40
51
|
[0.2.0]: https://github.com/bacharSalleh/leanlab/compare/v0.1.0...v0.2.0
|
|
41
52
|
[0.1.0]: https://github.com/bacharSalleh/leanlab/releases/tag/v0.1.0
|
leanlab-0.2.2/PKG-INFO
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: leanlab
|
|
3
|
+
Version: 0.2.2
|
|
4
|
+
Summary: A self-improving lab for AI agents — evolve ML experiments against a frozen metric, or ship coding tasks through a spec → gate → review → merge loop with locked acceptance tests.
|
|
5
|
+
Project-URL: Homepage, https://github.com/bacharSalleh/leanlab
|
|
6
|
+
Project-URL: Repository, https://github.com/bacharSalleh/leanlab
|
|
7
|
+
Project-URL: Issues, https://github.com/bacharSalleh/leanlab/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/bacharSalleh/leanlab/blob/main/CHANGELOG.md
|
|
9
|
+
Author-email: Bashar <welcomebachar@gmail.com>
|
|
10
|
+
License: MIT License
|
|
11
|
+
|
|
12
|
+
Copyright (c) 2026 Bashar
|
|
13
|
+
|
|
14
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
15
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
16
|
+
in the Software without restriction, including without limitation the rights
|
|
17
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
18
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
19
|
+
furnished to do so, subject to the following conditions:
|
|
20
|
+
|
|
21
|
+
The above copyright notice and this permission notice shall be included in all
|
|
22
|
+
copies or substantial portions of the Software.
|
|
23
|
+
|
|
24
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
25
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
26
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
27
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
28
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
29
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
30
|
+
SOFTWARE.
|
|
31
|
+
License-File: LICENSE
|
|
32
|
+
Keywords: agents,claude,cli,coding-agent,evaluation,experiment,lab,llm,self-improving
|
|
33
|
+
Classifier: Development Status :: 4 - Beta
|
|
34
|
+
Classifier: Environment :: Console
|
|
35
|
+
Classifier: Intended Audience :: Developers
|
|
36
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
37
|
+
Classifier: Operating System :: OS Independent
|
|
38
|
+
Classifier: Programming Language :: Python :: 3
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
41
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
42
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
43
|
+
Classifier: Topic :: Software Development :: Testing
|
|
44
|
+
Requires-Python: >=3.11
|
|
45
|
+
Requires-Dist: questionary>=2
|
|
46
|
+
Requires-Dist: rich>=13
|
|
47
|
+
Description-Content-Type: text/markdown
|
|
48
|
+
|
|
49
|
+
# leanlab
|
|
50
|
+
|
|
51
|
+
[](https://pypi.org/project/leanlab/)
|
|
52
|
+
[](https://github.com/bacharSalleh/leanlab/actions/workflows/ci.yml)
|
|
53
|
+
[](https://pypi.org/project/leanlab/)
|
|
54
|
+
[](LICENSE)
|
|
55
|
+
|
|
56
|
+
**Self-improving labs for AI agents.** Point leanlab at a task and a team of
|
|
57
|
+
Claude agents iterates toward a goal — evolving ML / optimization experiments
|
|
58
|
+
against a frozen metric, or shipping coding tasks through a
|
|
59
|
+
spec → gate → review → merge loop with locked acceptance tests.
|
|
60
|
+
|
|
61
|
+
## Install
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pipx install leanlab # or: pip install leanlab · uvx leanlab
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Requires **Python 3.11+** and the **`claude` CLI** (the agents run on Claude Code).
|
|
68
|
+
|
|
69
|
+
## Quick start
|
|
70
|
+
|
|
71
|
+
leanlab runs **inside your own project** — each lab lives in a `.leanlab/<name>/`
|
|
72
|
+
folder; the engine stays in the installed tool.
|
|
73
|
+
|
|
74
|
+
**Metric lab** — evolve a number (ML, optimization, anything that prints a score):
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
cd ~/my-project
|
|
78
|
+
leanlab init iris # describe the task; Claude drafts the lab + scorer
|
|
79
|
+
leanlab check iris # verify it's wired correctly (free)
|
|
80
|
+
leanlab lock iris # freeze the scorer
|
|
81
|
+
leanlab run iris --n 5 # the agents evolve experiments (uses Claude)
|
|
82
|
+
leanlab serve iris # watch the live dashboard
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
**Coding lab** — ship a coding task with locked acceptance tests:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
cd ~/my-repo # a git repository
|
|
89
|
+
leanlab spec "add a /health endpoint" # spec-writer drafts + locks the tests
|
|
90
|
+
leanlab build add-health # engineer → gate → reviewer → merge
|
|
91
|
+
leanlab board # live board: tasks, timeline, playbook
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Let Claude Code drive it
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
cd ~/my-project && leanlab init --for-agent # installs a Claude Code skill
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Then just ask Claude Code — *"use leanlab to add a /health endpoint"* — and it
|
|
101
|
+
specs, builds, and merges through the honest test gate for you.
|
|
102
|
+
|
|
103
|
+
## Docs
|
|
104
|
+
|
|
105
|
+
- **[docs/USAGE.md](docs/USAGE.md)** — every command, in order, with examples.
|
|
106
|
+
- **[docs/OVERVIEW.md](docs/OVERVIEW.md)** — how it works: the loop, the two lab
|
|
107
|
+
types, the coding-lab flow, and the project structure.
|
|
108
|
+
- **[CONTRIBUTING.md](CONTRIBUTING.md)** — local development (uv, tests, the React board).
|
|
109
|
+
|
|
110
|
+
MIT licensed — see [LICENSE](LICENSE).
|
leanlab-0.2.2/README.md
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# leanlab
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/leanlab/)
|
|
4
|
+
[](https://github.com/bacharSalleh/leanlab/actions/workflows/ci.yml)
|
|
5
|
+
[](https://pypi.org/project/leanlab/)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
**Self-improving labs for AI agents.** Point leanlab at a task and a team of
|
|
9
|
+
Claude agents iterates toward a goal — evolving ML / optimization experiments
|
|
10
|
+
against a frozen metric, or shipping coding tasks through a
|
|
11
|
+
spec → gate → review → merge loop with locked acceptance tests.
|
|
12
|
+
|
|
13
|
+
## Install
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pipx install leanlab # or: pip install leanlab · uvx leanlab
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Requires **Python 3.11+** and the **`claude` CLI** (the agents run on Claude Code).
|
|
20
|
+
|
|
21
|
+
## Quick start
|
|
22
|
+
|
|
23
|
+
leanlab runs **inside your own project** — each lab lives in a `.leanlab/<name>/`
|
|
24
|
+
folder; the engine stays in the installed tool.
|
|
25
|
+
|
|
26
|
+
**Metric lab** — evolve a number (ML, optimization, anything that prints a score):
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
cd ~/my-project
|
|
30
|
+
leanlab init iris # describe the task; Claude drafts the lab + scorer
|
|
31
|
+
leanlab check iris # verify it's wired correctly (free)
|
|
32
|
+
leanlab lock iris # freeze the scorer
|
|
33
|
+
leanlab run iris --n 5 # the agents evolve experiments (uses Claude)
|
|
34
|
+
leanlab serve iris # watch the live dashboard
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
**Coding lab** — ship a coding task with locked acceptance tests:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
cd ~/my-repo # a git repository
|
|
41
|
+
leanlab spec "add a /health endpoint" # spec-writer drafts + locks the tests
|
|
42
|
+
leanlab build add-health # engineer → gate → reviewer → merge
|
|
43
|
+
leanlab board # live board: tasks, timeline, playbook
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Let Claude Code drive it
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
cd ~/my-project && leanlab init --for-agent # installs a Claude Code skill
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Then just ask Claude Code — *"use leanlab to add a /health endpoint"* — and it
|
|
53
|
+
specs, builds, and merges through the honest test gate for you.
|
|
54
|
+
|
|
55
|
+
## Docs
|
|
56
|
+
|
|
57
|
+
- **[docs/USAGE.md](docs/USAGE.md)** — every command, in order, with examples.
|
|
58
|
+
- **[docs/OVERVIEW.md](docs/OVERVIEW.md)** — how it works: the loop, the two lab
|
|
59
|
+
types, the coding-lab flow, and the project structure.
|
|
60
|
+
- **[CONTRIBUTING.md](CONTRIBUTING.md)** — local development (uv, tests, the React board).
|
|
61
|
+
|
|
62
|
+
MIT licensed — see [LICENSE](LICENSE).
|
|
@@ -1,91 +1,28 @@
|
|
|
1
|
-
# leanlab
|
|
1
|
+
# How leanlab works
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
[
|
|
6
|
-
[](LICENSE)
|
|
3
|
+
This is the deeper tour — the idea, the two lab types, the coding-lab flow, and
|
|
4
|
+
the project structure. For installation and day-to-day commands, see the
|
|
5
|
+
[README](../README.md) and [USAGE.md](USAGE.md).
|
|
7
6
|
|
|
8
|
-
|
|
9
|
-
pipx install leanlab # or: pip install leanlab · uvx leanlab
|
|
10
|
-
```
|
|
7
|
+
## The idea
|
|
11
8
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
drives any task: you just describe the *lab* and Claude builds the scorer.
|
|
9
|
+
leanlab runs a **self-improving loop**: make an attempt → judge it against a
|
|
10
|
+
frozen criterion → keep the best → learn for next time. A team of Claude agents
|
|
11
|
+
drives the loop; you only describe the *lab*.
|
|
16
12
|
|
|
17
|
-
It
|
|
13
|
+
It generalizes the trading "selflearn" idea: **strategy → Experiment**,
|
|
18
14
|
**Manager → Director**, `results.csv → results.jsonl`, and the objective (what to
|
|
19
15
|
maximize or minimize) is configuration, not code.
|
|
20
16
|
|
|
21
17
|
leanlab is used **inside your own project** (like archik): each lab lives in a
|
|
22
|
-
`.leanlab/<name>/` folder; the engine stays in the installed tool
|
|
23
|
-
|
|
24
|
-
## Quick start
|
|
25
|
-
|
|
26
|
-
```bash
|
|
27
|
-
uv tool install --force --editable /path/to/leanlab # install the `leanlab` tool
|
|
28
|
-
cd ~/my-project && uv init # your project (a uv project)
|
|
29
|
-
|
|
30
|
-
leanlab init iris # describe the task; Claude drafts the lab
|
|
31
|
-
leanlab check iris # verify it's wired correctly (free)
|
|
32
|
-
leanlab lock iris # freeze the scorer
|
|
33
|
-
leanlab run iris --n 5 # the agents evolve experiments (costs Claude)
|
|
34
|
-
leanlab serve iris # watch the live dashboard
|
|
35
|
-
```
|
|
36
|
-
|
|
37
|
-
**Full command guide:** [docs/USAGE.md](docs/USAGE.md) — the flow and what each
|
|
38
|
-
command does exactly.
|
|
39
|
-
|
|
40
|
-
## Anatomy
|
|
41
|
-
|
|
42
|
-
```
|
|
43
|
-
leanlab/ # the installable tool (engine — never copied into your project)
|
|
44
|
-
├── cli.py # commands: init · check · fix · run · serve · list · lock · unlock
|
|
45
|
-
├── core/
|
|
46
|
-
│ ├── loop.py # run N experiments, score, log, wake Director/Critic
|
|
47
|
-
│ ├── monitor.py # live dashboard: stat chips + progress chart + table + stream
|
|
48
|
-
│ ├── init.py # interactive `init` — Claude drafts task + evaluator
|
|
49
|
-
│ ├── doctor.py # preflight checks + Claude-powered `fix`
|
|
50
|
-
│ └── agents/ # ports & adapters — the backend-agnostic agent layer
|
|
51
|
-
└── templates/agents/ # CLAUDE.md (Worker) · director.md · critic.md (injected, not copied)
|
|
18
|
+
`.leanlab/<name>/` folder; the engine stays in the installed tool and is never
|
|
19
|
+
copied into your project.
|
|
52
20
|
|
|
53
|
-
|
|
54
|
-
├── task.md goal + experiment contract
|
|
55
|
-
├── lab.json objective {metric, direction}, commands, cadences
|
|
56
|
-
├── evaluation.py the FROZEN evaluator → prints ONE line of JSON metrics
|
|
57
|
-
├── validate.py structural check the Worker runs (no score)
|
|
58
|
-
├── experiments/ where the Worker writes one file per loop
|
|
59
|
-
└── results.jsonl the book: one JSON record per experiment
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
**How a lab plugs in:** the engine never imports a lab. It runs the lab's
|
|
63
|
-
`validate_cmd` / `eval_cmd` (from `lab.json`) as subprocesses, reads the **JSON
|
|
64
|
-
metrics** the evaluator prints, and ranks by the configured **objective**. So a lab
|
|
65
|
-
can be ML, trading, graphics, optimization — anything that can print a metric.
|
|
66
|
-
|
|
67
|
-
## Make your own lab
|
|
68
|
-
|
|
69
|
-
`leanlab init <name>` is interactive: you describe the task in plain words, Claude
|
|
70
|
-
drafts `task.md` and picks the objective, then proposes an `evaluation.py` you
|
|
71
|
-
approve (or give feedback to revise). It installs the scorer's libraries and
|
|
72
|
-
self-checks the wiring before finishing. Then `leanlab lock <name>` and
|
|
73
|
-
`leanlab run <name>`.
|
|
21
|
+
## Two lab types
|
|
74
22
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
## The example lab: house-prices
|
|
79
|
-
|
|
80
|
-
This repo dogfoods itself — `.leanlab/house-prices` predicts California median
|
|
81
|
-
house value (**minimize RMSE**). Each experiment defines `build_estimator()` (any
|
|
82
|
-
scikit-learn-style model); the evaluator fits it on a fixed split and reports
|
|
83
|
-
`rmse / mae / r2 / overfit_gap / train_secs` on held-out data.
|
|
84
|
-
|
|
85
|
-
## Two lab types — naming map
|
|
86
|
-
|
|
87
|
-
leanlab runs the same loop two ways. A **metric lab** (ML/optimization — evolve a number)
|
|
88
|
-
and a **coding lab** (do coding tasks on a repo — pass tests). Same engine, different words:
|
|
23
|
+
The same loop runs two ways. A **metric lab** (ML / optimization — evolve a
|
|
24
|
+
number) and a **coding lab** (do coding tasks on a repo — pass tests). Same
|
|
25
|
+
engine, different words:
|
|
89
26
|
|
|
90
27
|
**The team (agents)**
|
|
91
28
|
|
|
@@ -119,23 +56,16 @@ and a **coding lab** (do coding tasks on a repo — pass tests). Same engine, di
|
|
|
119
56
|
| `serve` (dashboard) | `board` (dashboard) |
|
|
120
57
|
| `lock` / `unlock` | (lock is automatic in `spec`) |
|
|
121
58
|
|
|
122
|
-
**
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|------------|-----------|
|
|
126
|
-
| `loop` | `engineer` |
|
|
127
|
-
| `evaluator` | `gate-runner` |
|
|
128
|
-
| `results-store` | `playbook` + `coding-results` |
|
|
129
|
-
| `dashboard` | `coding-board` |
|
|
130
|
-
|
|
131
|
-
Same idea both ways: **make an attempt → judge it → keep the best → learn for next time** —
|
|
132
|
-
just "experiment + metric + memory" swapped for "code change + tests + playbook."
|
|
59
|
+
Same idea both ways: **make an attempt → judge it → keep the best → learn for
|
|
60
|
+
next time** — just "experiment + metric + memory" swapped for "code change +
|
|
61
|
+
tests + playbook."
|
|
133
62
|
|
|
134
63
|
## The coding lab flow
|
|
135
64
|
|
|
136
|
-
A coding lab is an **assembly line with quality gates**. Each step hands off to
|
|
137
|
-
any failed gate sends the work back to the engineer — up to
|
|
138
|
-
`main` until the tests pass, the work is proven
|
|
65
|
+
A coding lab is an **assembly line with quality gates**. Each step hands off to
|
|
66
|
+
the next, and any failed gate sends the work back to the engineer — up to
|
|
67
|
+
`--max-attempts`. Nothing reaches `main` until the tests pass, the work is proven
|
|
68
|
+
honest, and every reviewer approves.
|
|
139
69
|
|
|
140
70
|
```
|
|
141
71
|
Developer
|
|
@@ -182,44 +112,63 @@ any failed gate sends the work back to the engineer — up to `--max-attempts`.
|
|
|
182
112
|
| Merge | *automated* | The branch merges into `main` — the change ships. |
|
|
183
113
|
| Playbook | **Tech-lead** | Rewrites `PLAYBOOK.md` so the next task starts with the project's conventions and pitfalls. |
|
|
184
114
|
|
|
185
|
-
Watch
|
|
186
|
-
(every session, with token cost), and the
|
|
115
|
+
Watch it live with `leanlab board`: the four roles, a per-task round-by-round
|
|
116
|
+
timeline, the agent chat (every session, with token cost), and the playbook.
|
|
187
117
|
|
|
188
|
-
**Why it compounds:** every merged task adds its locked tests to `main` (a
|
|
189
|
-
loosens), and the playbook accumulates — so the lab keeps
|
|
118
|
+
**Why it compounds:** every merged task adds its locked tests to `main` (a
|
|
119
|
+
ratchet that never loosens), and the playbook accumulates — so the lab keeps
|
|
120
|
+
getting better at *your* project.
|
|
190
121
|
|
|
191
|
-
##
|
|
122
|
+
## Structure
|
|
192
123
|
|
|
193
|
-
```bash
|
|
194
|
-
uv sync
|
|
195
|
-
uv run pytest # the test suite
|
|
196
|
-
uv run leanlab list # run the tool from the checkout, no install
|
|
197
124
|
```
|
|
125
|
+
leanlab/ # the installable tool (engine — never copied into your project)
|
|
126
|
+
├── cli.py # commands: init · check · fix · run · serve · spec · build · board · list · lock · unlock
|
|
127
|
+
├── core/
|
|
128
|
+
│ ├── loop.py # run N experiments, score, log, wake Director/Critic
|
|
129
|
+
│ ├── monitor.py # metric-lab live dashboard
|
|
130
|
+
│ ├── init.py # interactive `init` — Claude drafts task + evaluator
|
|
131
|
+
│ ├── doctor.py # preflight checks + Claude-powered `fix`
|
|
132
|
+
│ ├── coding/ # the coding lab: spec · engineer · gate · reviewer · tech-lead · board
|
|
133
|
+
│ └── agents/ # ports & adapters — the backend-agnostic agent layer
|
|
134
|
+
├── templates/agents/ # the agent personas (injected into prompts, not copied)
|
|
135
|
+
└── core/coding/board_dist/ # the React board UI, compiled (built from frontend/)
|
|
198
136
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
cd frontend && npm install && npm run build # compile the UI (re-run after editing src/)
|
|
137
|
+
<your project>/.leanlab/<name>/ # a metric lab — only YOUR files
|
|
138
|
+
├── task.md goal + experiment contract
|
|
139
|
+
├── lab.json objective {metric, direction}, commands, cadences
|
|
140
|
+
├── evaluation.py the FROZEN evaluator → prints ONE line of JSON metrics
|
|
141
|
+
├── validate.py structural check the Worker runs (no score)
|
|
142
|
+
├── experiments/ where the Worker writes one file per loop
|
|
143
|
+
└── results.jsonl the book: one JSON record per experiment
|
|
207
144
|
```
|
|
208
145
|
|
|
209
|
-
|
|
210
|
-
|
|
146
|
+
**How a lab plugs in:** the engine never imports a lab. It runs the lab's
|
|
147
|
+
`validate_cmd` / `eval_cmd` (from `lab.json`) as subprocesses, reads the **JSON
|
|
148
|
+
metrics** the evaluator prints, and ranks by the configured **objective**. So a
|
|
149
|
+
lab can be ML, trading, graphics, optimization — anything that can print a
|
|
150
|
+
metric.
|
|
211
151
|
|
|
212
|
-
##
|
|
152
|
+
## Making a metric lab
|
|
213
153
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
154
|
+
`leanlab init <name>` is interactive: you describe the task in plain words, Claude
|
|
155
|
+
drafts `task.md` and picks the objective, then proposes an `evaluation.py` you
|
|
156
|
+
approve (or give feedback to revise). It installs the scorer's libraries and
|
|
157
|
+
self-checks the wiring before finishing. Then `leanlab lock <name>` and
|
|
158
|
+
`leanlab run <name>`. If a lab is mis-wired, `leanlab check` says what's wrong and
|
|
159
|
+
`leanlab fix` has Claude repair it.
|
|
160
|
+
|
|
161
|
+
**Example — house-prices:** this repo dogfoods itself. `.leanlab/house-prices`
|
|
162
|
+
predicts California median house value (**minimize RMSE**). Each experiment
|
|
163
|
+
defines `build_estimator()` (any scikit-learn-style model); the evaluator fits it
|
|
164
|
+
on a fixed split and reports `rmse / mae / r2 / overfit_gap / train_secs` on
|
|
165
|
+
held-out data.
|
|
219
166
|
|
|
220
|
-
##
|
|
167
|
+
## Honesty model
|
|
221
168
|
|
|
222
169
|
- Agents get full tools and are told to be proactive researchers (web, ML, `uv add`).
|
|
223
|
-
- The Worker never runs the evaluator, so scores stay honest; `lock` freezes it.
|
|
224
|
-
-
|
|
225
|
-
|
|
170
|
+
- The Worker never runs the evaluator, so metric scores stay honest; `lock` freezes it.
|
|
171
|
+
- In coding labs, acceptance tests are locked (sha256, out of the worktree),
|
|
172
|
+
restored before every gate, and re-run in isolation to catch fixture-gaming.
|
|
173
|
+
- The evaluator and agent personas live in the package and are injected into
|
|
174
|
+
prompts — nothing framework-level is copied into your project.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "leanlab"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.2"
|
|
4
4
|
description = "A self-improving lab for AI agents — evolve ML experiments against a frozen metric, or ship coding tasks through a spec → gate → review → merge loop with locked acceptance tests."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.11"
|