leanlab 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. {leanlab-0.2.1 → leanlab-0.2.2}/.github/RELEASING.md +19 -13
  2. {leanlab-0.2.1 → leanlab-0.2.2}/.github/workflows/ci.yml +8 -8
  3. {leanlab-0.2.1 → leanlab-0.2.2}/.github/workflows/publish.yml +4 -4
  4. {leanlab-0.2.1 → leanlab-0.2.2}/CHANGELOG.md +12 -1
  5. leanlab-0.2.2/PKG-INFO +110 -0
  6. leanlab-0.2.2/README.md +62 -0
  7. leanlab-0.2.1/README.md → leanlab-0.2.2/docs/OVERVIEW.md +70 -121
  8. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/package.json +1 -1
  9. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/cli.py +1 -1
  10. {leanlab-0.2.1 → leanlab-0.2.2}/pyproject.toml +1 -1
  11. leanlab-0.2.2/scripts/release.py +142 -0
  12. {leanlab-0.2.1 → leanlab-0.2.2}/uv.lock +1 -1
  13. leanlab-0.2.1/PKG-INFO +0 -273
  14. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/.version +0 -0
  15. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/ENGINEERING_LOOP.md +0 -0
  16. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/PRINCIPLES.md +0 -0
  17. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/SUPERPOWERS.md +0 -0
  18. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/actors.archik.actors.yaml +0 -0
  19. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/build-task.archik.seq.yaml +0 -0
  20. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/init-lab.archik.seq.yaml +0 -0
  21. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/main.archik.yaml +0 -0
  22. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/run-experiments-fix.archik.seq.yaml +0 -0
  23. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/run-experiments-happy.archik.seq.yaml +0 -0
  24. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/spec-task.archik.seq.yaml +0 -0
  25. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/usecases/build-task.archik.uc.yaml +0 -0
  26. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/usecases/diagnose-lab.archik.uc.yaml +0 -0
  27. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/usecases/init-lab.archik.uc.yaml +0 -0
  28. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/usecases/run-experiments.archik.uc.yaml +0 -0
  29. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/usecases/spec-task.archik.uc.yaml +0 -0
  30. {leanlab-0.2.1 → leanlab-0.2.2}/.archik/usecases/watch-progress.archik.uc.yaml +0 -0
  31. {leanlab-0.2.1 → leanlab-0.2.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  32. {leanlab-0.2.1 → leanlab-0.2.2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  33. {leanlab-0.2.1 → leanlab-0.2.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  34. {leanlab-0.2.1 → leanlab-0.2.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  35. {leanlab-0.2.1 → leanlab-0.2.2}/.github/dependabot.yml +0 -0
  36. {leanlab-0.2.1 → leanlab-0.2.2}/.gitignore +0 -0
  37. {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/Critic_Feedback.md +0 -0
  38. {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/Director_Notes.md +0 -0
  39. {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/evaluation.py +0 -0
  40. {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/experiments/catboost_oof_spatial_prior_01.py +0 -0
  41. {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/experiments/hgb_geo_feats_01.py +0 -0
  42. {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/experiments/honest_mono_catboost_logcap_01.py +0 -0
  43. {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/experiments/lgbm_spatial_clusters_01.py +0 -0
  44. {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/experiments/loghonest_spatial_blend_01.py +0 -0
  45. {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/experiments/multiscale_spatial_blend_01.py +0 -0
  46. {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/experiments/sample.py +0 -0
  47. {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/experiments/spatial_stack_01.py +0 -0
  48. {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/lab.json +0 -0
  49. {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/results.jsonl +0 -0
  50. {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/task.md +0 -0
  51. {leanlab-0.2.1 → leanlab-0.2.2}/.leanlab/house-prices/validate.py +0 -0
  52. {leanlab-0.2.1 → leanlab-0.2.2}/CLAUDE.md +0 -0
  53. {leanlab-0.2.1 → leanlab-0.2.2}/CODE_OF_CONDUCT.md +0 -0
  54. {leanlab-0.2.1 → leanlab-0.2.2}/CONTRIBUTING.md +0 -0
  55. {leanlab-0.2.1 → leanlab-0.2.2}/FUTURE.md +0 -0
  56. {leanlab-0.2.1 → leanlab-0.2.2}/LICENSE +0 -0
  57. {leanlab-0.2.1 → leanlab-0.2.2}/SECURITY.md +0 -0
  58. {leanlab-0.2.1 → leanlab-0.2.2}/docs/USAGE.md +0 -0
  59. {leanlab-0.2.1 → leanlab-0.2.2}/docs/architecture.svg +0 -0
  60. {leanlab-0.2.1 → leanlab-0.2.2}/docs/build-task-flow.svg +0 -0
  61. {leanlab-0.2.1 → leanlab-0.2.2}/docs/run-experiments-fix.svg +0 -0
  62. {leanlab-0.2.1 → leanlab-0.2.2}/docs/run-experiments-happy.svg +0 -0
  63. {leanlab-0.2.1 → leanlab-0.2.2}/docs/superpowers/specs/2026-06-21-dashboard-overhaul-design.md +0 -0
  64. {leanlab-0.2.1 → leanlab-0.2.2}/docs/superpowers/specs/2026-06-21-leanlab-as-a-tool-design.md +0 -0
  65. {leanlab-0.2.1 → leanlab-0.2.2}/docs/superpowers/specs/2026-06-22-coding-lab-design.md +0 -0
  66. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/.gitignore +0 -0
  67. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/README.md +0 -0
  68. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/index.html +0 -0
  69. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/package-lock.json +0 -0
  70. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/postcss.config.js +0 -0
  71. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/App.jsx +0 -0
  72. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/AgentChat.jsx +0 -0
  73. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/Badge.jsx +0 -0
  74. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/Header.jsx +0 -0
  75. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/LoopPanel.jsx +0 -0
  76. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/Panel.jsx +0 -0
  77. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/Playbook.jsx +0 -0
  78. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/StatRow.jsx +0 -0
  79. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/TasksTable.jsx +0 -0
  80. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/Timeline.jsx +0 -0
  81. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/components/TokensChart.jsx +0 -0
  82. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/hooks/useStream.js +0 -0
  83. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/index.css +0 -0
  84. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/lib/format.js +0 -0
  85. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/src/main.jsx +0 -0
  86. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/tailwind.config.js +0 -0
  87. {leanlab-0.2.1 → leanlab-0.2.2}/frontend/vite.config.js +0 -0
  88. {leanlab-0.2.1 → leanlab-0.2.2}/handoff.md +0 -0
  89. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/__init__.py +0 -0
  90. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/__init__.py +0 -0
  91. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/agents/__init__.py +0 -0
  92. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/agents/claude.py +0 -0
  93. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/agents/port.py +0 -0
  94. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/agents/protocol.py +0 -0
  95. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/__init__.py +0 -0
  96. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/board.py +0 -0
  97. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/board_dist/assets/index-BBCkNArL.css +0 -0
  98. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/board_dist/assets/index-CNGMDAuO.js +0 -0
  99. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/board_dist/index.html +0 -0
  100. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/engineer.py +0 -0
  101. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/gate.py +0 -0
  102. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/personas.py +0 -0
  103. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/playbook.py +0 -0
  104. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/coding/spec.py +0 -0
  105. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/doctor.py +0 -0
  106. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/init.py +0 -0
  107. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/loop.py +0 -0
  108. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/core/monitor.py +0 -0
  109. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/templates/agents/CLAUDE.md +0 -0
  110. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/templates/agents/critic.md +0 -0
  111. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/templates/agents/director.md +0 -0
  112. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/templates/agents/engineer.md +0 -0
  113. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/templates/agents/reviewer.md +0 -0
  114. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/templates/agents/techlead.md +0 -0
  115. {leanlab-0.2.1 → leanlab-0.2.2}/leanlab/templates/skill/SKILL.md +0 -0
  116. {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_coding_board.py +0 -0
  117. {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_coding_engineer.py +0 -0
  118. {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_coding_gate.py +0 -0
  119. {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_coding_playbook.py +0 -0
  120. {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_coding_spec.py +0 -0
  121. {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_core.py +0 -0
  122. {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_doctor.py +0 -0
  123. {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_init.py +0 -0
  124. {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_monitor.py +0 -0
  125. {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_run_experiments_fix.py +0 -0
  126. {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_run_experiments_happy.py +0 -0
  127. {leanlab-0.2.1 → leanlab-0.2.2}/tests/test_tooling.py +0 -0
@@ -22,19 +22,25 @@ Do this once, before the first release.
22
22
 
23
23
  ## Cut a release
24
24
 
25
- 1. Bump the version in **three** places (keep them in sync):
26
- - `pyproject.toml` `version`
27
- - `frontend/package.json` → `version`
28
- - `leanlab/cli.py` → `_version()` fallback
29
- 2. Move the `## [Unreleased]` notes into a new dated section in `CHANGELOG.md`.
30
- 3. Commit, then tag and push:
31
- ```bash
32
- git commit -am "release: v0.2.2"
33
- git tag v0.2.2
34
- git push origin main --tags
35
- ```
36
- 4. The `Publish to PyPI` workflow runs: build UI build wheel → publish → release.
37
- Watch it under the repo's **Actions** tab.
25
+ One command does everything bump (all 3 version spots), roll the CHANGELOG,
26
+ run the tests, commit, tag, and push:
27
+
28
+ ```bash
29
+ uv run python scripts/release.py patch # 0.2.1 -> 0.2.2 (or: minor | major | X.Y.Z)
30
+ ```
31
+
32
+ It prints the change, asks before pushing, and on confirm pushes `main` + the
33
+ tag — which triggers `publish.yml` (build UI → build wheel → publish to PyPI →
34
+ GitHub Release). Watch it under the repo's **Actions** tab.
35
+
36
+ Flags: `--dry-run` (show changes, write nothing) · `--skip-tests` · `--yes`
37
+ (push without the prompt). Before running, write your release notes under
38
+ `## [Unreleased]` in `CHANGELOG.md` — the script moves them into the new version
39
+ section for you.
40
+
41
+ Doing it by hand instead: bump `version` in `pyproject.toml`,
42
+ `frontend/package.json`, and `leanlab/cli.py` (`_version()` fallback); move the
43
+ CHANGELOG notes; then `git tag vX.Y.Z && git push origin main vX.Y.Z`.
38
44
 
39
45
  ## Verify
40
46
 
@@ -15,9 +15,9 @@ jobs:
15
15
  name: Lint (ruff)
16
16
  runs-on: ubuntu-latest
17
17
  steps:
18
- - uses: actions/checkout@v4
18
+ - uses: actions/checkout@v7
19
19
  - name: Install uv
20
- uses: astral-sh/setup-uv@v5
20
+ uses: astral-sh/setup-uv@v7
21
21
  - name: Ruff
22
22
  run: uvx ruff check leanlab tests
23
23
 
@@ -29,10 +29,10 @@ jobs:
29
29
  matrix:
30
30
  python: ["3.11", "3.12", "3.13"]
31
31
  steps:
32
- - uses: actions/checkout@v4
32
+ - uses: actions/checkout@v7
33
33
 
34
34
  - name: Install uv
35
- uses: astral-sh/setup-uv@v5
35
+ uses: astral-sh/setup-uv@v7
36
36
  with:
37
37
  python-version: ${{ matrix.python }}
38
38
  enable-cache: true
@@ -47,9 +47,9 @@ jobs:
47
47
  name: Build (wheel + UI)
48
48
  runs-on: ubuntu-latest
49
49
  steps:
50
- - uses: actions/checkout@v4
50
+ - uses: actions/checkout@v7
51
51
 
52
- - uses: actions/setup-node@v4
52
+ - uses: actions/setup-node@v6
53
53
  with:
54
54
  node-version: "20"
55
55
  cache: npm
@@ -62,7 +62,7 @@ jobs:
62
62
  npm run build
63
63
 
64
64
  - name: Install uv
65
- uses: astral-sh/setup-uv@v5
65
+ uses: astral-sh/setup-uv@v7
66
66
 
67
67
  - name: Build sdist + wheel
68
68
  run: uv build
@@ -73,7 +73,7 @@ jobs:
73
73
  && echo "✓ board_dist bundled in the wheel" \
74
74
  || (echo "✗ board_dist missing from the wheel" && exit 1)
75
75
 
76
- - uses: actions/upload-artifact@v4
76
+ - uses: actions/upload-artifact@v7
77
77
  with:
78
78
  name: dist
79
79
  path: dist/*
@@ -18,10 +18,10 @@ jobs:
18
18
  id-token: write # OIDC: PyPI Trusted Publishing mints a short-lived token
19
19
  steps:
20
20
  - name: Checkout
21
- uses: actions/checkout@v4
21
+ uses: actions/checkout@v7
22
22
 
23
23
  # The wheel must ship the compiled React board (board_dist/), so build it first.
24
- - uses: actions/setup-node@v4
24
+ - uses: actions/setup-node@v6
25
25
  with:
26
26
  node-version: "20"
27
27
  cache: npm
@@ -33,7 +33,7 @@ jobs:
33
33
  npm run build
34
34
 
35
35
  - name: Install uv
36
- uses: astral-sh/setup-uv@v5
36
+ uses: astral-sh/setup-uv@v7
37
37
 
38
38
  - name: Build sdist + wheel
39
39
  run: uv build
@@ -66,7 +66,7 @@ jobs:
66
66
  echo "path=/tmp/release-notes.md" >> "$GITHUB_OUTPUT"
67
67
 
68
68
  - name: Create GitHub Release
69
- uses: softprops/action-gh-release@v2
69
+ uses: softprops/action-gh-release@v3
70
70
  with:
71
71
  tag_name: ${{ github.ref_name }}
72
72
  name: ${{ github.ref_name }}
@@ -6,6 +6,16 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.2.2] - 2026-06-26
10
+
11
+ ### Added
12
+ - One-command release script (`scripts/release.py`) and a `ruff check` lint job in CI.
13
+
14
+ ### Changed
15
+ - README is now user-facing (PyPI install + quick start for both lab types). The
16
+ project concept, structure, two-lab mapping, and coding-lab flow moved to
17
+ `docs/OVERVIEW.md`.
18
+
9
19
  ## [0.2.1] - 2026-06-26
10
20
 
11
21
  ### Fixed
@@ -35,7 +45,8 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
35
45
  evaluator) and coding labs (spec-writer → engineer → gate → reviewer →
36
46
  tech-lead), a live dashboard, and a Claude Code skill (`leanlab init --for-agent`).
37
47
 
38
- [Unreleased]: https://github.com/bacharSalleh/leanlab/compare/v0.2.1...HEAD
48
+ [Unreleased]: https://github.com/bacharSalleh/leanlab/compare/v0.2.2...HEAD
49
+ [0.2.2]: https://github.com/bacharSalleh/leanlab/compare/v0.2.1...v0.2.2
39
50
  [0.2.1]: https://github.com/bacharSalleh/leanlab/compare/v0.2.0...v0.2.1
40
51
  [0.2.0]: https://github.com/bacharSalleh/leanlab/compare/v0.1.0...v0.2.0
41
52
  [0.1.0]: https://github.com/bacharSalleh/leanlab/releases/tag/v0.1.0
leanlab-0.2.2/PKG-INFO ADDED
@@ -0,0 +1,110 @@
1
+ Metadata-Version: 2.4
2
+ Name: leanlab
3
+ Version: 0.2.2
4
+ Summary: A self-improving lab for AI agents — evolve ML experiments against a frozen metric, or ship coding tasks through a spec → gate → review → merge loop with locked acceptance tests.
5
+ Project-URL: Homepage, https://github.com/bacharSalleh/leanlab
6
+ Project-URL: Repository, https://github.com/bacharSalleh/leanlab
7
+ Project-URL: Issues, https://github.com/bacharSalleh/leanlab/issues
8
+ Project-URL: Changelog, https://github.com/bacharSalleh/leanlab/blob/main/CHANGELOG.md
9
+ Author-email: Bashar <welcomebachar@gmail.com>
10
+ License: MIT License
11
+
12
+ Copyright (c) 2026 Bashar
13
+
14
+ Permission is hereby granted, free of charge, to any person obtaining a copy
15
+ of this software and associated documentation files (the "Software"), to deal
16
+ in the Software without restriction, including without limitation the rights
17
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18
+ copies of the Software, and to permit persons to whom the Software is
19
+ furnished to do so, subject to the following conditions:
20
+
21
+ The above copyright notice and this permission notice shall be included in all
22
+ copies or substantial portions of the Software.
23
+
24
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
+ SOFTWARE.
31
+ License-File: LICENSE
32
+ Keywords: agents,claude,cli,coding-agent,evaluation,experiment,lab,llm,self-improving
33
+ Classifier: Development Status :: 4 - Beta
34
+ Classifier: Environment :: Console
35
+ Classifier: Intended Audience :: Developers
36
+ Classifier: License :: OSI Approved :: MIT License
37
+ Classifier: Operating System :: OS Independent
38
+ Classifier: Programming Language :: Python :: 3
39
+ Classifier: Programming Language :: Python :: 3.11
40
+ Classifier: Programming Language :: Python :: 3.12
41
+ Classifier: Programming Language :: Python :: 3.13
42
+ Classifier: Topic :: Software Development :: Quality Assurance
43
+ Classifier: Topic :: Software Development :: Testing
44
+ Requires-Python: >=3.11
45
+ Requires-Dist: questionary>=2
46
+ Requires-Dist: rich>=13
47
+ Description-Content-Type: text/markdown
48
+
49
+ # leanlab
50
+
51
+ [![PyPI](https://img.shields.io/pypi/v/leanlab.svg)](https://pypi.org/project/leanlab/)
52
+ [![CI](https://github.com/bacharSalleh/leanlab/actions/workflows/ci.yml/badge.svg)](https://github.com/bacharSalleh/leanlab/actions/workflows/ci.yml)
53
+ [![Python](https://img.shields.io/pypi/pyversions/leanlab.svg)](https://pypi.org/project/leanlab/)
54
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
55
+
56
+ **Self-improving labs for AI agents.** Point leanlab at a task and a team of
57
+ Claude agents iterates toward a goal — evolving ML / optimization experiments
58
+ against a frozen metric, or shipping coding tasks through a
59
+ spec → gate → review → merge loop with locked acceptance tests.
60
+
61
+ ## Install
62
+
63
+ ```bash
64
+ pipx install leanlab # or: pip install leanlab · uvx leanlab
65
+ ```
66
+
67
+ Requires **Python 3.11+** and the **`claude` CLI** (the agents run on Claude Code).
68
+
69
+ ## Quick start
70
+
71
+ leanlab runs **inside your own project** — each lab lives in a `.leanlab/<name>/`
72
+ folder; the engine stays in the installed tool.
73
+
74
+ **Metric lab** — evolve a number (ML, optimization, anything that prints a score):
75
+
76
+ ```bash
77
+ cd ~/my-project
78
+ leanlab init iris # describe the task; Claude drafts the lab + scorer
79
+ leanlab check iris # verify it's wired correctly (free)
80
+ leanlab lock iris # freeze the scorer
81
+ leanlab run iris --n 5 # the agents evolve experiments (uses Claude)
82
+ leanlab serve iris # watch the live dashboard
83
+ ```
84
+
85
+ **Coding lab** — ship a coding task with locked acceptance tests:
86
+
87
+ ```bash
88
+ cd ~/my-repo # a git repository
89
+ leanlab spec "add a /health endpoint" # spec-writer drafts + locks the tests
90
+ leanlab build add-health # engineer → gate → reviewer → merge
91
+ leanlab board # live board: tasks, timeline, playbook
92
+ ```
93
+
94
+ ## Let Claude Code drive it
95
+
96
+ ```bash
97
+ cd ~/my-project && leanlab init --for-agent # installs a Claude Code skill
98
+ ```
99
+
100
+ Then just ask Claude Code — *"use leanlab to add a /health endpoint"* — and it
101
+ specs, builds, and merges through the honest test gate for you.
102
+
103
+ ## Docs
104
+
105
+ - **[docs/USAGE.md](docs/USAGE.md)** — every command, in order, with examples.
106
+ - **[docs/OVERVIEW.md](docs/OVERVIEW.md)** — how it works: the loop, the two lab
107
+ types, the coding-lab flow, and the project structure.
108
+ - **[CONTRIBUTING.md](CONTRIBUTING.md)** — local development (uv, tests, the React board).
109
+
110
+ MIT licensed — see [LICENSE](LICENSE).
@@ -0,0 +1,62 @@
1
+ # leanlab
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/leanlab.svg)](https://pypi.org/project/leanlab/)
4
+ [![CI](https://github.com/bacharSalleh/leanlab/actions/workflows/ci.yml/badge.svg)](https://github.com/bacharSalleh/leanlab/actions/workflows/ci.yml)
5
+ [![Python](https://img.shields.io/pypi/pyversions/leanlab.svg)](https://pypi.org/project/leanlab/)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
7
+
8
+ **Self-improving labs for AI agents.** Point leanlab at a task and a team of
9
+ Claude agents iterates toward a goal — evolving ML / optimization experiments
10
+ against a frozen metric, or shipping coding tasks through a
11
+ spec → gate → review → merge loop with locked acceptance tests.
12
+
13
+ ## Install
14
+
15
+ ```bash
16
+ pipx install leanlab # or: pip install leanlab · uvx leanlab
17
+ ```
18
+
19
+ Requires **Python 3.11+** and the **`claude` CLI** (the agents run on Claude Code).
20
+
21
+ ## Quick start
22
+
23
+ leanlab runs **inside your own project** — each lab lives in a `.leanlab/<name>/`
24
+ folder; the engine stays in the installed tool.
25
+
26
+ **Metric lab** — evolve a number (ML, optimization, anything that prints a score):
27
+
28
+ ```bash
29
+ cd ~/my-project
30
+ leanlab init iris # describe the task; Claude drafts the lab + scorer
31
+ leanlab check iris # verify it's wired correctly (free)
32
+ leanlab lock iris # freeze the scorer
33
+ leanlab run iris --n 5 # the agents evolve experiments (uses Claude)
34
+ leanlab serve iris # watch the live dashboard
35
+ ```
36
+
37
+ **Coding lab** — ship a coding task with locked acceptance tests:
38
+
39
+ ```bash
40
+ cd ~/my-repo # a git repository
41
+ leanlab spec "add a /health endpoint" # spec-writer drafts + locks the tests
42
+ leanlab build add-health # engineer → gate → reviewer → merge
43
+ leanlab board # live board: tasks, timeline, playbook
44
+ ```
45
+
46
+ ## Let Claude Code drive it
47
+
48
+ ```bash
49
+ cd ~/my-project && leanlab init --for-agent # installs a Claude Code skill
50
+ ```
51
+
52
+ Then just ask Claude Code — *"use leanlab to add a /health endpoint"* — and it
53
+ specs, builds, and merges through the honest test gate for you.
54
+
55
+ ## Docs
56
+
57
+ - **[docs/USAGE.md](docs/USAGE.md)** — every command, in order, with examples.
58
+ - **[docs/OVERVIEW.md](docs/OVERVIEW.md)** — how it works: the loop, the two lab
59
+ types, the coding-lab flow, and the project structure.
60
+ - **[CONTRIBUTING.md](CONTRIBUTING.md)** — local development (uv, tests, the React board).
61
+
62
+ MIT licensed — see [LICENSE](LICENSE).
@@ -1,91 +1,28 @@
1
- # leanlab
1
+ # How leanlab works
2
2
 
3
- [![PyPI](https://img.shields.io/pypi/v/leanlab.svg)](https://pypi.org/project/leanlab/)
4
- [![CI](https://github.com/bacharSalleh/leanlab/actions/workflows/ci.yml/badge.svg)](https://github.com/bacharSalleh/leanlab/actions/workflows/ci.yml)
5
- [![Python](https://img.shields.io/pypi/pyversions/leanlab.svg)](https://pypi.org/project/leanlab/)
6
- [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
3
+ This is the deeper tour — the idea, the two lab types, the coding-lab flow, and
4
+ the project structure. For installation and day-to-day commands, see the
5
+ [README](../README.md) and [USAGE.md](USAGE.md).
7
6
 
8
- ```bash
9
- pipx install leanlab # or: pip install leanlab · uvx leanlab
10
- ```
7
+ ## The idea
11
8
 
12
- A small **tool for self-improving experiment labs**. A team of agents
13
- **Workers** (experimenters), a **Director**, and **HyperCritics** evolve
14
- solutions against a **frozen evaluator**, one experiment at a time. The same loop
15
- drives any task: you just describe the *lab* and Claude builds the scorer.
9
+ leanlab runs a **self-improving loop**: make an attempt judge it against a
10
+ frozen criterion keep the best learn for next time. A team of Claude agents
11
+ drives the loop; you only describe the *lab*.
16
12
 
17
- It is the trading "selflearn" idea, generalized: **strategy → Experiment**,
13
+ It generalizes the trading "selflearn" idea: **strategy → Experiment**,
18
14
  **Manager → Director**, `results.csv → results.jsonl`, and the objective (what to
19
15
  maximize or minimize) is configuration, not code.
20
16
 
21
17
  leanlab is used **inside your own project** (like archik): each lab lives in a
22
- `.leanlab/<name>/` folder; the engine stays in the installed tool.
23
-
24
- ## Quick start
25
-
26
- ```bash
27
- uv tool install --force --editable /path/to/leanlab # install the `leanlab` tool
28
- cd ~/my-project && uv init # your project (a uv project)
29
-
30
- leanlab init iris # describe the task; Claude drafts the lab
31
- leanlab check iris # verify it's wired correctly (free)
32
- leanlab lock iris # freeze the scorer
33
- leanlab run iris --n 5 # the agents evolve experiments (costs Claude)
34
- leanlab serve iris # watch the live dashboard
35
- ```
36
-
37
- **Full command guide:** [docs/USAGE.md](docs/USAGE.md) — the flow and what each
38
- command does exactly.
39
-
40
- ## Anatomy
41
-
42
- ```
43
- leanlab/ # the installable tool (engine — never copied into your project)
44
- ├── cli.py # commands: init · check · fix · run · serve · list · lock · unlock
45
- ├── core/
46
- │ ├── loop.py # run N experiments, score, log, wake Director/Critic
47
- │ ├── monitor.py # live dashboard: stat chips + progress chart + table + stream
48
- │ ├── init.py # interactive `init` — Claude drafts task + evaluator
49
- │ ├── doctor.py # preflight checks + Claude-powered `fix`
50
- │ └── agents/ # ports & adapters — the backend-agnostic agent layer
51
- └── templates/agents/ # CLAUDE.md (Worker) · director.md · critic.md (injected, not copied)
18
+ `.leanlab/<name>/` folder; the engine stays in the installed tool and is never
19
+ copied into your project.
52
20
 
53
- <your project>/.leanlab/<name>/ # a lab — only YOUR files
54
- ├── task.md goal + experiment contract
55
- ├── lab.json objective {metric, direction}, commands, cadences
56
- ├── evaluation.py the FROZEN evaluator → prints ONE line of JSON metrics
57
- ├── validate.py structural check the Worker runs (no score)
58
- ├── experiments/ where the Worker writes one file per loop
59
- └── results.jsonl the book: one JSON record per experiment
60
- ```
61
-
62
- **How a lab plugs in:** the engine never imports a lab. It runs the lab's
63
- `validate_cmd` / `eval_cmd` (from `lab.json`) as subprocesses, reads the **JSON
64
- metrics** the evaluator prints, and ranks by the configured **objective**. So a lab
65
- can be ML, trading, graphics, optimization — anything that can print a metric.
66
-
67
- ## Make your own lab
68
-
69
- `leanlab init <name>` is interactive: you describe the task in plain words, Claude
70
- drafts `task.md` and picks the objective, then proposes an `evaluation.py` you
71
- approve (or give feedback to revise). It installs the scorer's libraries and
72
- self-checks the wiring before finishing. Then `leanlab lock <name>` and
73
- `leanlab run <name>`.
21
+ ## Two lab types
74
22
 
75
- If a lab is mis-wired, `leanlab check` tells you what's wrong and `leanlab fix`
76
- has Claude repair it.
77
-
78
- ## The example lab: house-prices
79
-
80
- This repo dogfoods itself — `.leanlab/house-prices` predicts California median
81
- house value (**minimize RMSE**). Each experiment defines `build_estimator()` (any
82
- scikit-learn-style model); the evaluator fits it on a fixed split and reports
83
- `rmse / mae / r2 / overfit_gap / train_secs` on held-out data.
84
-
85
- ## Two lab types — naming map
86
-
87
- leanlab runs the same loop two ways. A **metric lab** (ML/optimization — evolve a number)
88
- and a **coding lab** (do coding tasks on a repo — pass tests). Same engine, different words:
23
+ The same loop runs two ways. A **metric lab** (ML / optimization evolve a
24
+ number) and a **coding lab** (do coding tasks on a repo — pass tests). Same
25
+ engine, different words:
89
26
 
90
27
  **The team (agents)**
91
28
 
@@ -119,23 +56,16 @@ and a **coding lab** (do coding tasks on a repo — pass tests). Same engine, di
119
56
  | `serve` (dashboard) | `board` (dashboard) |
120
57
  | `lock` / `unlock` | (lock is automatic in `spec`) |
121
58
 
122
- **archik nodes**
123
-
124
- | Metric lab | Coding lab |
125
- |------------|-----------|
126
- | `loop` | `engineer` |
127
- | `evaluator` | `gate-runner` |
128
- | `results-store` | `playbook` + `coding-results` |
129
- | `dashboard` | `coding-board` |
130
-
131
- Same idea both ways: **make an attempt → judge it → keep the best → learn for next time** —
132
- just "experiment + metric + memory" swapped for "code change + tests + playbook."
59
+ Same idea both ways: **make an attempt → judge it → keep the best → learn for
60
+ next time** — just "experiment + metric + memory" swapped for "code change +
61
+ tests + playbook."
133
62
 
134
63
  ## The coding lab flow
135
64
 
136
- A coding lab is an **assembly line with quality gates**. Each step hands off to the next, and
137
- any failed gate sends the work back to the engineer — up to `--max-attempts`. Nothing reaches
138
- `main` until the tests pass, the work is proven honest, and every reviewer approves.
65
+ A coding lab is an **assembly line with quality gates**. Each step hands off to
66
+ the next, and any failed gate sends the work back to the engineer — up to
67
+ `--max-attempts`. Nothing reaches `main` until the tests pass, the work is proven
68
+ honest, and every reviewer approves.
139
69
 
140
70
  ```
141
71
  Developer
@@ -182,44 +112,63 @@ any failed gate sends the work back to the engineer — up to `--max-attempts`.
182
112
  | Merge | *automated* | The branch merges into `main` — the change ships. |
183
113
  | Playbook | **Tech-lead** | Rewrites `PLAYBOOK.md` so the next task starts with the project's conventions and pitfalls. |
184
114
 
185
- Watch all of it live with `leanlab board`: the four roles, a per-task timeline, the agent chat
186
- (every session, with token cost), and the growing playbook.
115
+ Watch it live with `leanlab board`: the four roles, a per-task round-by-round
116
+ timeline, the agent chat (every session, with token cost), and the playbook.
187
117
 
188
- **Why it compounds:** every merged task adds its locked tests to `main` (a ratchet that never
189
- loosens), and the playbook accumulates — so the lab keeps getting better at *your* project.
118
+ **Why it compounds:** every merged task adds its locked tests to `main` (a
119
+ ratchet that never loosens), and the playbook accumulates — so the lab keeps
120
+ getting better at *your* project.
190
121
 
191
- ## Develop / test
122
+ ## Structure
192
123
 
193
- ```bash
194
- uv sync
195
- uv run pytest # the test suite
196
- uv run leanlab list # run the tool from the checkout, no install
197
124
  ```
125
+ leanlab/ # the installable tool (engine — never copied into your project)
126
+ ├── cli.py # commands: init · check · fix · run · serve · spec · build · board · list · lock · unlock
127
+ ├── core/
128
+ │ ├── loop.py # run N experiments, score, log, wake Director/Critic
129
+ │ ├── monitor.py # metric-lab live dashboard
130
+ │ ├── init.py # interactive `init` — Claude drafts task + evaluator
131
+ │ ├── doctor.py # preflight checks + Claude-powered `fix`
132
+ │ ├── coding/ # the coding lab: spec · engineer · gate · reviewer · tech-lead · board
133
+ │ └── agents/ # ports & adapters — the backend-agnostic agent layer
134
+ ├── templates/agents/ # the agent personas (injected into prompts, not copied)
135
+ └── core/coding/board_dist/ # the React board UI, compiled (built from frontend/)
198
136
 
199
- ### Board UI (React + Tailwind)
200
-
201
- The `leanlab board` dashboard is a React + Tailwind app in [`frontend/`](frontend/), built
202
- into `leanlab/core/coding/board_dist/` and served by the Python board server. The Python side
203
- exposes the data as `/api/state`, `/api/task`, and `/api/stream` (SSE); React renders it.
204
-
205
- ```bash
206
- cd frontend && npm install && npm run build # compile the UI (re-run after editing src/)
137
+ <your project>/.leanlab/<name>/ # a metric lab — only YOUR files
138
+ ├── task.md goal + experiment contract
139
+ ├── lab.json objective {metric, direction}, commands, cadences
140
+ ├── evaluation.py the FROZEN evaluator prints ONE line of JSON metrics
141
+ ├── validate.py structural check the Worker runs (no score)
142
+ ├── experiments/ where the Worker writes one file per loop
143
+ └── results.jsonl the book: one JSON record per experiment
207
144
  ```
208
145
 
209
- For live UI work, run `leanlab board --no-open` (API on `:8766`) and `npm run dev` in `frontend/`
210
- (Vite on `:5173`, proxying `/api`). The compiled `board_dist/` ships inside the wheel.
146
+ **How a lab plugs in:** the engine never imports a lab. It runs the lab's
147
+ `validate_cmd` / `eval_cmd` (from `lab.json`) as subprocesses, reads the **JSON
148
+ metrics** the evaluator prints, and ranks by the configured **objective**. So a
149
+ lab can be ML, trading, graphics, optimization — anything that can print a
150
+ metric.
211
151
 
212
- ## Let Claude Code drive it
152
+ ## Making a metric lab
213
153
 
214
- ```bash
215
- cd ~/my-project && leanlab init --for-agent # installs .claude/skills/leanlab/SKILL.md
216
- ```
217
- Then talk to Claude Code *"use leanlab to add a /health endpoint"* — and it specs, builds, and
218
- merges through the honest test gate (`spec --yes` / `build` run headless). See `docs/USAGE.md`.
154
+ `leanlab init <name>` is interactive: you describe the task in plain words, Claude
155
+ drafts `task.md` and picks the objective, then proposes an `evaluation.py` you
156
+ approve (or give feedback to revise). It installs the scorer's libraries and
157
+ self-checks the wiring before finishing. Then `leanlab lock <name>` and
158
+ `leanlab run <name>`. If a lab is mis-wired, `leanlab check` says what's wrong and
159
+ `leanlab fix` has Claude repair it.
160
+
161
+ **Example — house-prices:** this repo dogfoods itself. `.leanlab/house-prices`
162
+ predicts California median house value (**minimize RMSE**). Each experiment
163
+ defines `build_estimator()` (any scikit-learn-style model); the evaluator fits it
164
+ on a fixed split and reports `rmse / mae / r2 / overfit_gap / train_secs` on
165
+ held-out data.
219
166
 
220
- ## Notes
167
+ ## Honesty model
221
168
 
222
169
  - Agents get full tools and are told to be proactive researchers (web, ML, `uv add`).
223
- - The Worker never runs the evaluator, so scores stay honest; `lock` freezes it.
224
- - The evaluator (and agent specs) live in the package and are injected into prompts —
225
- nothing framework-level is copied into your project.
170
+ - The Worker never runs the evaluator, so metric scores stay honest; `lock` freezes it.
171
+ - In coding labs, acceptance tests are locked (sha256, out of the worktree),
172
+ restored before every gate, and re-run in isolation to catch fixture-gaming.
173
+ - The evaluator and agent personas live in the package and are injected into
174
+ prompts — nothing framework-level is copied into your project.
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "leanlab-board",
3
3
  "private": true,
4
- "version": "0.2.1",
4
+ "version": "0.2.2",
5
5
  "type": "module",
6
6
  "description": "React + Tailwind UI for the leanlab coding board (built into the Python wheel).",
7
7
  "scripts": {
@@ -29,7 +29,7 @@ def _version() -> str:
29
29
  from importlib.metadata import PackageNotFoundError, version
30
30
  return version("leanlab")
31
31
  except (ImportError, PackageNotFoundError):
32
- return "0.2.1"
32
+ return "0.2.2"
33
33
 
34
34
 
35
35
  def labs_dir() -> Path:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "leanlab"
3
- version = "0.2.1"
3
+ version = "0.2.2"
4
4
  description = "A self-improving lab for AI agents — evolve ML experiments against a frozen metric, or ship coding tasks through a spec → gate → review → merge loop with locked acceptance tests."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"