millstone 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- millstone-0.3.3/LICENSE +21 -0
- millstone-0.3.3/PKG-INFO +346 -0
- millstone-0.3.3/README.md +294 -0
- millstone-0.3.3/pyproject.toml +137 -0
- millstone-0.3.3/setup.cfg +4 -0
- millstone-0.3.3/src/millstone/__init__.py +8 -0
- millstone-0.3.3/src/millstone/agent_providers/__init__.py +22 -0
- millstone-0.3.3/src/millstone/agent_providers/base.py +102 -0
- millstone-0.3.3/src/millstone/agent_providers/implementations.py +572 -0
- millstone-0.3.3/src/millstone/agent_providers/registry.py +29 -0
- millstone-0.3.3/src/millstone/artifact_providers/__init__.py +47 -0
- millstone-0.3.3/src/millstone/artifact_providers/base.py +151 -0
- millstone-0.3.3/src/millstone/artifact_providers/file.py +724 -0
- millstone-0.3.3/src/millstone/artifact_providers/mcp.py +841 -0
- millstone-0.3.3/src/millstone/artifact_providers/protocols.py +49 -0
- millstone-0.3.3/src/millstone/artifact_providers/registry.py +132 -0
- millstone-0.3.3/src/millstone/artifacts/__init__.py +31 -0
- millstone-0.3.3/src/millstone/artifacts/eval_manager.py +2648 -0
- millstone-0.3.3/src/millstone/artifacts/evidence_store.py +239 -0
- millstone-0.3.3/src/millstone/artifacts/models.py +196 -0
- millstone-0.3.3/src/millstone/artifacts/tasklist.py +1441 -0
- millstone-0.3.3/src/millstone/config.py +464 -0
- millstone-0.3.3/src/millstone/loops/__init__.py +25 -0
- millstone-0.3.3/src/millstone/loops/engine.py +154 -0
- millstone-0.3.3/src/millstone/loops/inner.py +572 -0
- millstone-0.3.3/src/millstone/loops/outer.py +2345 -0
- millstone-0.3.3/src/millstone/loops/registry/__init__.py +11 -0
- millstone-0.3.3/src/millstone/loops/registry/loops.py +120 -0
- millstone-0.3.3/src/millstone/loops/registry_adapter.py +50 -0
- millstone-0.3.3/src/millstone/loops/types/__init__.py +38 -0
- millstone-0.3.3/src/millstone/loops/types/core.py +28 -0
- millstone-0.3.3/src/millstone/loops/types/loops.py +170 -0
- millstone-0.3.3/src/millstone/loops/validation.py +70 -0
- millstone-0.3.3/src/millstone/orchestrate.py +9 -0
- millstone-0.3.3/src/millstone/policy/__init__.py +29 -0
- millstone-0.3.3/src/millstone/policy/capability.py +94 -0
- millstone-0.3.3/src/millstone/policy/effects.py +164 -0
- millstone-0.3.3/src/millstone/policy/reference_integrity.py +114 -0
- millstone-0.3.3/src/millstone/policy/schemas.py +542 -0
- millstone-0.3.3/src/millstone/prompts/__init__.py +0 -0
- millstone-0.3.3/src/millstone/prompts/analyze_fix_prompt.md +38 -0
- millstone-0.3.3/src/millstone/prompts/analyze_prompt.md +81 -0
- millstone-0.3.3/src/millstone/prompts/analyze_review_prompt.md +70 -0
- millstone-0.3.3/src/millstone/prompts/commit_prompt.md +32 -0
- millstone-0.3.3/src/millstone/prompts/compact_tasklist.md +68 -0
- millstone-0.3.3/src/millstone/prompts/complexity_prompt.md +47 -0
- millstone-0.3.3/src/millstone/prompts/context_extraction_prompt.md +38 -0
- millstone-0.3.3/src/millstone/prompts/design_fix_prompt.md +28 -0
- millstone-0.3.3/src/millstone/prompts/design_prompt.md +75 -0
- millstone-0.3.3/src/millstone/prompts/plan_fix_prompt.md +25 -0
- millstone-0.3.3/src/millstone/prompts/plan_prompt.md +102 -0
- millstone-0.3.3/src/millstone/prompts/plan_review_prompt.md +49 -0
- millstone-0.3.3/src/millstone/prompts/release_prompt.md +14 -0
- millstone-0.3.3/src/millstone/prompts/research_prompt.md +108 -0
- millstone-0.3.3/src/millstone/prompts/review_design_prompt.md +55 -0
- millstone-0.3.3/src/millstone/prompts/review_diff_prompt.md +22 -0
- millstone-0.3.3/src/millstone/prompts/review_prompt.md +88 -0
- millstone-0.3.3/src/millstone/prompts/sanity_check_impl.md +56 -0
- millstone-0.3.3/src/millstone/prompts/sanity_check_review.md +47 -0
- millstone-0.3.3/src/millstone/prompts/split_task_prompt.md +52 -0
- millstone-0.3.3/src/millstone/prompts/sre_prompt.md +14 -0
- millstone-0.3.3/src/millstone/prompts/task_prompt.md +58 -0
- millstone-0.3.3/src/millstone/prompts/task_split_prompt.md +105 -0
- millstone-0.3.3/src/millstone/prompts/tasklist_prompt.md +82 -0
- millstone-0.3.3/src/millstone/prompts/utils.py +19 -0
- millstone-0.3.3/src/millstone/runtime/__init__.py +5 -0
- millstone-0.3.3/src/millstone/runtime/context.py +248 -0
- millstone-0.3.3/src/millstone/runtime/locks.py +83 -0
- millstone-0.3.3/src/millstone/runtime/merge_pipeline.py +300 -0
- millstone-0.3.3/src/millstone/runtime/orchestrator.py +3887 -0
- millstone-0.3.3/src/millstone/runtime/parallel.py +857 -0
- millstone-0.3.3/src/millstone/runtime/parallel_state.py +116 -0
- millstone-0.3.3/src/millstone/runtime/profile.py +75 -0
- millstone-0.3.3/src/millstone/runtime/scheduler.py +198 -0
- millstone-0.3.3/src/millstone/runtime/worktree.py +212 -0
- millstone-0.3.3/src/millstone/utils.py +367 -0
- millstone-0.3.3/src/millstone.egg-info/PKG-INFO +346 -0
- millstone-0.3.3/src/millstone.egg-info/SOURCES.txt +113 -0
- millstone-0.3.3/src/millstone.egg-info/dependency_links.txt +1 -0
- millstone-0.3.3/src/millstone.egg-info/entry_points.txt +2 -0
- millstone-0.3.3/src/millstone.egg-info/requires.txt +33 -0
- millstone-0.3.3/src/millstone.egg-info/top_level.txt +1 -0
- millstone-0.3.3/tests/test_artifact_models.py +372 -0
- millstone-0.3.3/tests/test_artifact_provider_registry.py +334 -0
- millstone-0.3.3/tests/test_artifact_providers.py +326 -0
- millstone-0.3.3/tests/test_capability_policy.py +71 -0
- millstone-0.3.3/tests/test_cli_providers.py +1107 -0
- millstone-0.3.3/tests/test_complexity_analyzer.py +80 -0
- millstone-0.3.3/tests/test_config.py +200 -0
- millstone-0.3.3/tests/test_effect_gate_wire.py +90 -0
- millstone-0.3.3/tests/test_effect_provider.py +171 -0
- millstone-0.3.3/tests/test_evidence.py +505 -0
- millstone-0.3.3/tests/test_file_artifact_providers.py +1898 -0
- millstone-0.3.3/tests/test_gemini_noise.py +91 -0
- millstone-0.3.3/tests/test_gemini_provider.py +178 -0
- millstone-0.3.3/tests/test_git_add_regression.py +82 -0
- millstone-0.3.3/tests/test_integration.py +1107 -0
- millstone-0.3.3/tests/test_locks.py +65 -0
- millstone-0.3.3/tests/test_loop_engine.py +180 -0
- millstone-0.3.3/tests/test_loop_registry_adapter.py +49 -0
- millstone-0.3.3/tests/test_loops_registry_roles.py +24 -0
- millstone-0.3.3/tests/test_mcp_providers.py +1421 -0
- millstone-0.3.3/tests/test_merge_pipeline.py +402 -0
- millstone-0.3.3/tests/test_no_code_flow.py +129 -0
- millstone-0.3.3/tests/test_orchestrator.py +16159 -0
- millstone-0.3.3/tests/test_outer_loops.py +1972 -0
- millstone-0.3.3/tests/test_parallel.py +1448 -0
- millstone-0.3.3/tests/test_parallel_state.py +74 -0
- millstone-0.3.3/tests/test_profile.py +151 -0
- millstone-0.3.3/tests/test_prompt_placeholders.py +167 -0
- millstone-0.3.3/tests/test_provider_conformance.py +245 -0
- millstone-0.3.3/tests/test_reference_integrity.py +451 -0
- millstone-0.3.3/tests/test_scheduler.py +157 -0
- millstone-0.3.3/tests/test_tasklist.py +87 -0
- millstone-0.3.3/tests/test_worktree.py +151 -0
millstone-0.3.3/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 millstone contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
millstone-0.3.3/PKG-INFO
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: millstone
|
|
3
|
+
Version: 0.3.3
|
|
4
|
+
Summary: Orchestrator for agentic coding tools
|
|
5
|
+
Author: millstone contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/wittekin/millstone
|
|
8
|
+
Project-URL: Repository, https://github.com/wittekin/millstone
|
|
9
|
+
Project-URL: Issues, https://github.com/wittekin/millstone/issues
|
|
10
|
+
Project-URL: Changelog, https://github.com/wittekin/millstone/blob/main/CHANGELOG.md
|
|
11
|
+
Project-URL: Documentation, https://wittekin.github.io/millstone/
|
|
12
|
+
Keywords: agentic,automation,cli,developer-tools,orchestration
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Environment :: Console
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Software Development :: Build Tools
|
|
21
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: requests>=2.31.0
|
|
26
|
+
Provides-Extra: test
|
|
27
|
+
Requires-Dist: pytest>=7.0; extra == "test"
|
|
28
|
+
Requires-Dist: pytest-cov>=4.0; extra == "test"
|
|
29
|
+
Provides-Extra: quality
|
|
30
|
+
Requires-Dist: ruff>=0.6.0; extra == "quality"
|
|
31
|
+
Requires-Dist: mypy>=1.10.0; extra == "quality"
|
|
32
|
+
Requires-Dist: types-requests>=2.31.0; extra == "quality"
|
|
33
|
+
Requires-Dist: vulture>=2.11; extra == "quality"
|
|
34
|
+
Provides-Extra: security
|
|
35
|
+
Requires-Dist: pip-audit>=2.7.0; extra == "security"
|
|
36
|
+
Provides-Extra: release
|
|
37
|
+
Requires-Dist: build>=1.2.0; extra == "release"
|
|
38
|
+
Requires-Dist: twine>=5.1.0; extra == "release"
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
41
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
42
|
+
Requires-Dist: ruff>=0.6.0; extra == "dev"
|
|
43
|
+
Requires-Dist: mypy>=1.10.0; extra == "dev"
|
|
44
|
+
Requires-Dist: types-requests>=2.31.0; extra == "dev"
|
|
45
|
+
Requires-Dist: vulture>=2.11; extra == "dev"
|
|
46
|
+
Provides-Extra: docs
|
|
47
|
+
Requires-Dist: mkdocs>=1.6.0; extra == "docs"
|
|
48
|
+
Requires-Dist: mkdocs-material>=9.5.0; extra == "docs"
|
|
49
|
+
Provides-Extra: viz
|
|
50
|
+
Requires-Dist: graphviz>=0.20; extra == "viz"
|
|
51
|
+
Dynamic: license-file
|
|
52
|
+
|
|
53
|
+
# millstone
|
|
54
|
+
[](https://github.com/wittekin/millstone/actions/workflows/ci.yml)
|
|
55
|
+
[](https://github.com/wittekin/millstone/actions/workflows/quality.yml)
|
|
56
|
+
[](https://codecov.io/gh/wittekin/millstone)
|
|
57
|
+
[](https://github.com/wittekin/millstone/actions/workflows/docs.yml)
|
|
58
|
+
[](https://github.com/wittekin/millstone/actions/workflows/release.yml)
|
|
59
|
+
[](https://pypi.org/project/millstone/)
|
|
60
|
+
[](https://pypi.org/project/millstone/)
|
|
61
|
+
[](https://github.com/wittekin/millstone/blob/main/LICENSE)
|
|
62
|
+
|
|
63
|
+
Coding agents produce dramatically better results when they plan before they code, and when their output is reviewed by a second agent — ideally from a different model provider. The catch: manually running that cycle (design → review → revise → approve → plan → review → revise → implement → review → revise → commit) across multiple agents is extremely time-consuming.
|
|
64
|
+
|
|
65
|
+
`millstone` automates that end to end. It wraps any combination of coding CLIs (Claude Code, Codex, Gemini, OpenCode) in a deterministic build-review loop: one agent authors, a second reviews, feedback cycles until the reviewer approves, then the change is committed. The same loop governs designs, plans, and code — with optional autonomous outer loops that discover opportunities, generate designs, and break them into tasks without human prompting.
|
|
66
|
+
|
|
67
|
+
[Documentation](https://wittekin.github.io/millstone/) | [Getting Started](docs/getting-started.md) | [Meta Invoke](docs/prompts/execute.md) | [Contributing](CONTRIBUTING.md) | [Changelog](CHANGELOG.md)
|
|
68
|
+
|
|
69
|
+
## Quick Start
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
# 1) Install
|
|
73
|
+
pipx install millstone
|
|
74
|
+
|
|
75
|
+
# 2) Move into the repo you want to run on
|
|
76
|
+
cd /path/to/your/project
|
|
77
|
+
|
|
78
|
+
# 3) Recommended: give your coding agent an operator prompt
|
|
79
|
+
# @docs/prompts/execute.md (run a tasklist)
|
|
80
|
+
# @docs/prompts/design.md (design + plan a new feature)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Manual run to complete one task from your tasklist:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
millstone -n 1
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Highlights
|
|
90
|
+
|
|
91
|
+
- Deterministic inner loop: `Builder -> Sanity -> Reviewer -> Sanity -> Fix -> Commit`.
|
|
92
|
+
- Autonomous outer loops: `analyze`, `design`, `plan`, `cycle` — every authoring step is write/review gated.
|
|
93
|
+
- `--max-cycles` governs both inner build-review iterations and outer-loop authoring loops.
|
|
94
|
+
- Parallel execution via `git worktree` — run multiple tasks concurrently with isolated checkouts and a serialized merge queue.
|
|
95
|
+
- Primary operating mode is coding-agent-invoked execution (`docs/prompts/execute.md`).
|
|
96
|
+
- Built-in evaluation flow with result capture and regression comparison.
|
|
97
|
+
- Multi-provider CLI routing per role (`claude`, `codex`, `gemini`, `opencode`).
|
|
98
|
+
- Stateful runs with logs, evals, and recovery under `.millstone/`.
|
|
99
|
+
|
|
100
|
+
## Usage Patterns
|
|
101
|
+
|
|
102
|
+
| Goal | Command |
|
|
103
|
+
|---|---|
|
|
104
|
+
| Coding agent mediated execution (recommended) | Give your coding agent `docs/prompts/execute.md` |
|
|
105
|
+
| Execute next tasks from tasklist | `millstone` |
|
|
106
|
+
| Limit to one task | `millstone -n 1` |
|
|
107
|
+
| Run custom one-off task | `millstone --task "..."` |
|
|
108
|
+
| Claude code as author, codex as reviewer, one task, max of 6 write/review cycles task | `millstone --cli claude --cli-reviewer codex -n 1 --max-cycles 6` |
|
|
109
|
+
| Run 4 tasks in parallel (worktree mode) | `millstone --worktrees --concurrency 4` |
|
|
110
|
+
| Dry-run prompt flow without invoking agents | `millstone --dry-run` |
|
|
111
|
+
| Scan codebase for opportunities | `millstone --analyze` |
|
|
112
|
+
| Generate a design doc | `millstone --design "Add caching layer"` |
|
|
113
|
+
| Turn design into atomic tasks | `millstone --plan .millstone/designs/add-caching-layer.md` |
|
|
114
|
+
| Run autonomous cycle end-to-end | `millstone --cycle` |
|
|
115
|
+
|
|
116
|
+
## How It Works
|
|
117
|
+
|
|
118
|
+
Inner loop (delivery):
|
|
119
|
+
|
|
120
|
+
```text
|
|
121
|
+
Builder -> Sanity Check -> Reviewer -> Sanity Check -> Fix Loop -> Commit
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Outer loop (self-direction):
|
|
125
|
+
|
|
126
|
+
```text
|
|
127
|
+
Analyze -> Design -> Plan -> [Inner Loop] -> Eval -> (repeat)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Every authoring step in the outer loop (analyze, design, plan) is write/review gated: a
|
|
131
|
+
reviewer agent checks the output and requests revisions until it approves or `--max-cycles`
|
|
132
|
+
is exhausted. This is the same iterative loop that governs inner-loop code changes.
|
|
133
|
+
|
|
134
|
+
> **Supersedes prior behavior**: `--analyze` previously ran the analysis agent once with no
|
|
135
|
+
> review step. All outer-loop authoring steps (analyze, design, plan) now run an iterative
|
|
136
|
+
> write/review/fix loop identical in structure to the inner build-review loop.
|
|
137
|
+
|
|
138
|
+
## Installation Options
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
# PyPI (recommended when release is available)
|
|
142
|
+
pipx install millstone
|
|
143
|
+
|
|
144
|
+
# GitHub latest
|
|
145
|
+
pipx install git+https://github.com/wittekin/millstone.git
|
|
146
|
+
|
|
147
|
+
# Contributor install
|
|
148
|
+
pip install -e .
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Optional extras:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
pip install -e .[test] # pytest + coverage
|
|
155
|
+
pip install -e .[quality] # ruff + mypy
|
|
156
|
+
pip install -e .[security] # pip-audit
|
|
157
|
+
pip install -e .[release] # build + twine
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
## Minimal Tasklist Format
|
|
161
|
+
|
|
162
|
+
```markdown
|
|
163
|
+
# Tasklist
|
|
164
|
+
|
|
165
|
+
- [ ] First task to implement
|
|
166
|
+
- [ ] Second task
|
|
167
|
+
- [x] Already completed task
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
`millstone` executes the first unchecked `- [ ]` task.
|
|
171
|
+
|
|
172
|
+
## Configuration Snapshot
|
|
173
|
+
|
|
174
|
+
Create `.millstone/config.toml` in the target repo:
|
|
175
|
+
|
|
176
|
+
```toml
|
|
177
|
+
max_cycles = 3
|
|
178
|
+
max_tasks = 5
|
|
179
|
+
tasklist = ".millstone/tasklist.md"
|
|
180
|
+
|
|
181
|
+
cli = "claude"
|
|
182
|
+
cli_builder = "codex"
|
|
183
|
+
cli_reviewer = "claude"
|
|
184
|
+
|
|
185
|
+
eval_on_commit = false
|
|
186
|
+
approve_opportunities = true
|
|
187
|
+
approve_designs = true
|
|
188
|
+
approve_plans = true
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### Multi-maintainer setup
|
|
192
|
+
|
|
193
|
+
By default, artifact files (tasklist, designs, opportunities) are written under `.millstone/` and are gitignored — suitable for single-maintainer or local-only workflows.
|
|
194
|
+
|
|
195
|
+
To commit artifacts to the repo and share them with teammates, opt in per artifact type:
|
|
196
|
+
|
|
197
|
+
```toml
|
|
198
|
+
commit_tasklist = true # stores at docs/tasklist.md
|
|
199
|
+
commit_designs = true # stores at designs/
|
|
200
|
+
commit_opportunities = true # stores at opportunities.md
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
For full multi-maintainer collaboration, use an external artifact provider (Jira, Linear, or GitHub Issues) instead of file-backed defaults.
|
|
204
|
+
|
|
205
|
+
### Tasklist filter contract
|
|
206
|
+
|
|
207
|
+
All tasklist providers (Jira, Linear, GitHub Issues) respect a provider-agnostic `[tasklist_filter]` section in `.millstone/config.toml`:
|
|
208
|
+
|
|
209
|
+
```toml
|
|
210
|
+
[tasklist_filter]
|
|
211
|
+
labels = ["sprint-1"] # AND – task must carry ALL listed labels
|
|
212
|
+
assignees = ["alice", "bob"] # OR – task assigned to ANY of these users
|
|
213
|
+
statuses = ["Todo", "In Progress"] # OR – task in ANY of these statuses
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
Omit any key (or leave the list empty) to skip filtering on that dimension. The filter is applied when the outer loop fetches the next task from the remote provider. An explicit `filter` key inside `[tasklist_provider_options]` takes precedence over this section.
|
|
217
|
+
|
|
218
|
+
### Scoping remote backlogs
|
|
219
|
+
|
|
220
|
+
When using a remote tasklist provider (Jira, Linear, or GitHub Issues), the default scope is the full open-issue set for the configured project/team/repo. Use `[millstone.tasklist_filter]` to restrict millstone to a specific subset without modifying provider options.
|
|
221
|
+
|
|
222
|
+
**When to use local tasklist vs remote filters**
|
|
223
|
+
|
|
224
|
+
| Situation | Recommendation |
|
|
225
|
+
|---|---|
|
|
226
|
+
| Personal project or solo maintainer | Local `.millstone/tasklist.md` |
|
|
227
|
+
| Team with shared backlog in Jira/Linear/GitHub | Remote provider + `[millstone.tasklist_filter]` |
|
|
228
|
+
| Ad-hoc spike or one-off work | `millstone --task "..."` |
|
|
229
|
+
| Sprint-scoped automation on a shared board | Remote provider + label/cycle/milestone filter |
|
|
230
|
+
|
|
231
|
+
**Quick examples by backend**
|
|
232
|
+
|
|
233
|
+
Jira — current sprint label:
|
|
234
|
+
```toml
|
|
235
|
+
[tasklist_provider_options]
|
|
236
|
+
type = "jira"
|
|
237
|
+
project = "PROJ"
|
|
238
|
+
|
|
239
|
+
[millstone.tasklist_filter]
|
|
240
|
+
label = "sprint-1"
|
|
241
|
+
assignee = "john.doe"
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
Linear — active cycle for a team:
|
|
245
|
+
```toml
|
|
246
|
+
[tasklist_provider_options]
|
|
247
|
+
type = "linear"
|
|
248
|
+
team_id = "<uuid>"
|
|
249
|
+
|
|
250
|
+
[millstone.tasklist_filter]
|
|
251
|
+
cycles = ["Cycle 5"]
|
|
252
|
+
label = "millstone"
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
GitHub Issues — label + milestone:
|
|
256
|
+
```toml
|
|
257
|
+
[tasklist_provider_options]
|
|
258
|
+
type = "github"
|
|
259
|
+
owner = "myorg"
|
|
260
|
+
repo = "myrepo"
|
|
261
|
+
|
|
262
|
+
[millstone.tasklist_filter]
|
|
263
|
+
label = "sprint-1"
|
|
264
|
+
milestone = "v1.2"
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
See full filter option reference in the per-backend docs under `docs/providers/`.
|
|
268
|
+
|
|
269
|
+
See full config and CLI options with:
|
|
270
|
+
|
|
271
|
+
```bash
|
|
272
|
+
millstone --help
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
## Project Signals
|
|
276
|
+
|
|
277
|
+
- Canonical loop ontology: `docs/architecture/ontology.md`
|
|
278
|
+
- Scope and safety boundaries: `docs/architecture/scope.md`
|
|
279
|
+
- Parallel execution with worktrees: `docs/worktrees.md`
|
|
280
|
+
- CLI providers: `docs/cli-providers/`
|
|
281
|
+
- Artifact providers: `docs/providers/`
|
|
282
|
+
- Release checklist: `docs/maintainer/release_checklist.md`
|
|
283
|
+
|
|
284
|
+
## Build and Release Workflows
|
|
285
|
+
|
|
286
|
+
This repository ships with CI, quality, docs, release, security, CodeQL, dependency review, and weekly maintenance workflows in `.github/workflows/`.
|
|
287
|
+
|
|
288
|
+
Tag release flow:
|
|
289
|
+
|
|
290
|
+
```bash
|
|
291
|
+
git tag -a vX.Y.Z -m "Release vX.Y.Z"
|
|
292
|
+
git push origin vX.Y.Z
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
## Star History
|
|
296
|
+
|
|
297
|
+
Planned after initial public release and first community adoption.
|
|
298
|
+
|
|
299
|
+
## Working Directory
|
|
300
|
+
|
|
301
|
+
Creates `.millstone/` in your repo containing:
|
|
302
|
+
- `runs/` - Timestamped logs of each run
|
|
303
|
+
- `evals/` - JSON eval results for comparison
|
|
304
|
+
- `cycles/` - Logs of autonomous cycle decisions
|
|
305
|
+
- `state.json` - Saved state for --continue
|
|
306
|
+
- `config.toml` - Per-repo configuration
|
|
307
|
+
- `STOP.md` - Created by sanity check to halt
|
|
308
|
+
|
|
309
|
+
This directory is auto-added to `.gitignore`.
|
|
310
|
+
|
|
311
|
+
## Safety Checks
|
|
312
|
+
|
|
313
|
+
**Mechanical:**
|
|
314
|
+
- No changes detected -> Warn (proceeds to review)
|
|
315
|
+
- Too many lines changed -> Halt for human review
|
|
316
|
+
- Sensitive files (`.env`, credentials) -> Halt for human review
|
|
317
|
+
- New test failures (with `--eval-on-commit`) -> Halt
|
|
318
|
+
|
|
319
|
+
**Judgment (via LLM):**
|
|
320
|
+
- Builder output is gibberish -> Create `STOP.md` -> Halt
|
|
321
|
+
- Reviewer feedback is nonsensical -> Create `STOP.md` -> Halt
|
|
322
|
+
|
|
323
|
+
## Exit Codes
|
|
324
|
+
|
|
325
|
+
- `0` - Success
|
|
326
|
+
- `1` - Halted (needs human intervention)
|
|
327
|
+
|
|
328
|
+
## Expected Runtime
|
|
329
|
+
|
|
330
|
+
Each task typically takes 2-4 minutes. The orchestrator makes 5+ LLM calls per task: builder, implementation sanity check, reviewer, review sanity check, commit delegation, plus fix cycles if needed.
|
|
331
|
+
|
|
332
|
+
## Requirements
|
|
333
|
+
|
|
334
|
+
- Python 3.10+
|
|
335
|
+
- `claude` CLI installed and authenticated (default), or
|
|
336
|
+
- `codex` CLI installed and authenticated (if using `--cli codex`), or
|
|
337
|
+
- `gemini` CLI installed and authenticated (if using `--cli gemini`), or
|
|
338
|
+
- `opencode` CLI installed and authenticated (if using `--cli opencode`)
|
|
339
|
+
|
|
340
|
+
## Open Source Project Files
|
|
341
|
+
|
|
342
|
+
- License: [LICENSE](LICENSE)
|
|
343
|
+
- Contributing guide: [CONTRIBUTING.md](CONTRIBUTING.md)
|
|
344
|
+
- Code of conduct: [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md)
|
|
345
|
+
- Security policy: [SECURITY.md](SECURITY.md)
|
|
346
|
+
- Changelog: [CHANGELOG.md](CHANGELOG.md)
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
# millstone
|
|
2
|
+
[](https://github.com/wittekin/millstone/actions/workflows/ci.yml)
|
|
3
|
+
[](https://github.com/wittekin/millstone/actions/workflows/quality.yml)
|
|
4
|
+
[](https://codecov.io/gh/wittekin/millstone)
|
|
5
|
+
[](https://github.com/wittekin/millstone/actions/workflows/docs.yml)
|
|
6
|
+
[](https://github.com/wittekin/millstone/actions/workflows/release.yml)
|
|
7
|
+
[](https://pypi.org/project/millstone/)
|
|
8
|
+
[](https://pypi.org/project/millstone/)
|
|
9
|
+
[](https://github.com/wittekin/millstone/blob/main/LICENSE)
|
|
10
|
+
|
|
11
|
+
Coding agents produce dramatically better results when they plan before they code, and when their output is reviewed by a second agent — ideally from a different model provider. The catch: manually running that cycle (design → review → revise → approve → plan → review → revise → implement → review → revise → commit) across multiple agents is extremely time-consuming.
|
|
12
|
+
|
|
13
|
+
`millstone` automates that end to end. It wraps any combination of coding CLIs (Claude Code, Codex, Gemini, OpenCode) in a deterministic build-review loop: one agent authors, a second reviews, feedback cycles until the reviewer approves, then the change is committed. The same loop governs designs, plans, and code — with optional autonomous outer loops that discover opportunities, generate designs, and break them into tasks without human prompting.
|
|
14
|
+
|
|
15
|
+
[Documentation](https://wittekin.github.io/millstone/) | [Getting Started](docs/getting-started.md) | [Meta Invoke](docs/prompts/execute.md) | [Contributing](CONTRIBUTING.md) | [Changelog](CHANGELOG.md)
|
|
16
|
+
|
|
17
|
+
## Quick Start
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# 1) Install
|
|
21
|
+
pipx install millstone
|
|
22
|
+
|
|
23
|
+
# 2) Move into the repo you want to run on
|
|
24
|
+
cd /path/to/your/project
|
|
25
|
+
|
|
26
|
+
# 3) Recommended: give your coding agent an operator prompt
|
|
27
|
+
# @docs/prompts/execute.md (run a tasklist)
|
|
28
|
+
# @docs/prompts/design.md (design + plan a new feature)
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Manual run to complete one task from your tasklist:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
millstone -n 1
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Highlights
|
|
38
|
+
|
|
39
|
+
- Deterministic inner loop: `Builder -> Sanity -> Reviewer -> Sanity -> Fix -> Commit`.
|
|
40
|
+
- Autonomous outer loops: `analyze`, `design`, `plan`, `cycle` — every authoring step is write/review gated.
|
|
41
|
+
- `--max-cycles` governs both inner build-review iterations and outer-loop authoring loops.
|
|
42
|
+
- Parallel execution via `git worktree` — run multiple tasks concurrently with isolated checkouts and a serialized merge queue.
|
|
43
|
+
- Primary operating mode is coding-agent-invoked execution (`docs/prompts/execute.md`).
|
|
44
|
+
- Built-in evaluation flow with result capture and regression comparison.
|
|
45
|
+
- Multi-provider CLI routing per role (`claude`, `codex`, `gemini`, `opencode`).
|
|
46
|
+
- Stateful runs with logs, evals, and recovery under `.millstone/`.
|
|
47
|
+
|
|
48
|
+
## Usage Patterns
|
|
49
|
+
|
|
50
|
+
| Goal | Command |
|
|
51
|
+
|---|---|
|
|
52
|
+
| Coding agent mediated execution (recommended) | Give your coding agent `docs/prompts/execute.md` |
|
|
53
|
+
| Execute next tasks from tasklist | `millstone` |
|
|
54
|
+
| Limit to one task | `millstone -n 1` |
|
|
55
|
+
| Run custom one-off task | `millstone --task "..."` |
|
|
56
|
+
| Claude code as author, codex as reviewer, one task, max of 6 write/review cycles task | `millstone --cli claude --cli-reviewer codex -n 1 --max-cycles 6` |
|
|
57
|
+
| Run 4 tasks in parallel (worktree mode) | `millstone --worktrees --concurrency 4` |
|
|
58
|
+
| Dry-run prompt flow without invoking agents | `millstone --dry-run` |
|
|
59
|
+
| Scan codebase for opportunities | `millstone --analyze` |
|
|
60
|
+
| Generate a design doc | `millstone --design "Add caching layer"` |
|
|
61
|
+
| Turn design into atomic tasks | `millstone --plan .millstone/designs/add-caching-layer.md` |
|
|
62
|
+
| Run autonomous cycle end-to-end | `millstone --cycle` |
|
|
63
|
+
|
|
64
|
+
## How It Works
|
|
65
|
+
|
|
66
|
+
Inner loop (delivery):
|
|
67
|
+
|
|
68
|
+
```text
|
|
69
|
+
Builder -> Sanity Check -> Reviewer -> Sanity Check -> Fix Loop -> Commit
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Outer loop (self-direction):
|
|
73
|
+
|
|
74
|
+
```text
|
|
75
|
+
Analyze -> Design -> Plan -> [Inner Loop] -> Eval -> (repeat)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Every authoring step in the outer loop (analyze, design, plan) is write/review gated: a
|
|
79
|
+
reviewer agent checks the output and requests revisions until it approves or `--max-cycles`
|
|
80
|
+
is exhausted. This is the same iterative loop that governs inner-loop code changes.
|
|
81
|
+
|
|
82
|
+
> **Supersedes prior behavior**: `--analyze` previously ran the analysis agent once with no
|
|
83
|
+
> review step. All outer-loop authoring steps (analyze, design, plan) now run an iterative
|
|
84
|
+
> write/review/fix loop identical in structure to the inner build-review loop.
|
|
85
|
+
|
|
86
|
+
## Installation Options
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
# PyPI (recommended when release is available)
|
|
90
|
+
pipx install millstone
|
|
91
|
+
|
|
92
|
+
# GitHub latest
|
|
93
|
+
pipx install git+https://github.com/wittekin/millstone.git
|
|
94
|
+
|
|
95
|
+
# Contributor install
|
|
96
|
+
pip install -e .
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Optional extras:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
pip install -e .[test] # pytest + coverage
|
|
103
|
+
pip install -e .[quality] # ruff + mypy
|
|
104
|
+
pip install -e .[security] # pip-audit
|
|
105
|
+
pip install -e .[release] # build + twine
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Minimal Tasklist Format
|
|
109
|
+
|
|
110
|
+
```markdown
|
|
111
|
+
# Tasklist
|
|
112
|
+
|
|
113
|
+
- [ ] First task to implement
|
|
114
|
+
- [ ] Second task
|
|
115
|
+
- [x] Already completed task
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
`millstone` executes the first unchecked `- [ ]` task.
|
|
119
|
+
|
|
120
|
+
## Configuration Snapshot
|
|
121
|
+
|
|
122
|
+
Create `.millstone/config.toml` in the target repo:
|
|
123
|
+
|
|
124
|
+
```toml
|
|
125
|
+
max_cycles = 3
|
|
126
|
+
max_tasks = 5
|
|
127
|
+
tasklist = ".millstone/tasklist.md"
|
|
128
|
+
|
|
129
|
+
cli = "claude"
|
|
130
|
+
cli_builder = "codex"
|
|
131
|
+
cli_reviewer = "claude"
|
|
132
|
+
|
|
133
|
+
eval_on_commit = false
|
|
134
|
+
approve_opportunities = true
|
|
135
|
+
approve_designs = true
|
|
136
|
+
approve_plans = true
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Multi-maintainer setup
|
|
140
|
+
|
|
141
|
+
By default, artifact files (tasklist, designs, opportunities) are written under `.millstone/` and are gitignored — suitable for single-maintainer or local-only workflows.
|
|
142
|
+
|
|
143
|
+
To commit artifacts to the repo and share them with teammates, opt in per artifact type:
|
|
144
|
+
|
|
145
|
+
```toml
|
|
146
|
+
commit_tasklist = true # stores at docs/tasklist.md
|
|
147
|
+
commit_designs = true # stores at designs/
|
|
148
|
+
commit_opportunities = true # stores at opportunities.md
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
For full multi-maintainer collaboration, use an external artifact provider (Jira, Linear, or GitHub Issues) instead of file-backed defaults.
|
|
152
|
+
|
|
153
|
+
### Tasklist filter contract
|
|
154
|
+
|
|
155
|
+
All tasklist providers (Jira, Linear, GitHub Issues) respect a provider-agnostic `[tasklist_filter]` section in `.millstone/config.toml`:
|
|
156
|
+
|
|
157
|
+
```toml
|
|
158
|
+
[tasklist_filter]
|
|
159
|
+
labels = ["sprint-1"] # AND – task must carry ALL listed labels
|
|
160
|
+
assignees = ["alice", "bob"] # OR – task assigned to ANY of these users
|
|
161
|
+
statuses = ["Todo", "In Progress"] # OR – task in ANY of these statuses
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Omit any key (or leave the list empty) to skip filtering on that dimension. The filter is applied when the outer loop fetches the next task from the remote provider. An explicit `filter` key inside `[tasklist_provider_options]` takes precedence over this section.
|
|
165
|
+
|
|
166
|
+
### Scoping remote backlogs
|
|
167
|
+
|
|
168
|
+
When using a remote tasklist provider (Jira, Linear, or GitHub Issues), the default scope is the full open-issue set for the configured project/team/repo. Use `[millstone.tasklist_filter]` to restrict millstone to a specific subset without modifying provider options.
|
|
169
|
+
|
|
170
|
+
**When to use local tasklist vs remote filters**
|
|
171
|
+
|
|
172
|
+
| Situation | Recommendation |
|
|
173
|
+
|---|---|
|
|
174
|
+
| Personal project or solo maintainer | Local `.millstone/tasklist.md` |
|
|
175
|
+
| Team with shared backlog in Jira/Linear/GitHub | Remote provider + `[millstone.tasklist_filter]` |
|
|
176
|
+
| Ad-hoc spike or one-off work | `millstone --task "..."` |
|
|
177
|
+
| Sprint-scoped automation on a shared board | Remote provider + label/cycle/milestone filter |
|
|
178
|
+
|
|
179
|
+
**Quick examples by backend**
|
|
180
|
+
|
|
181
|
+
Jira — current sprint label:
|
|
182
|
+
```toml
|
|
183
|
+
[tasklist_provider_options]
|
|
184
|
+
type = "jira"
|
|
185
|
+
project = "PROJ"
|
|
186
|
+
|
|
187
|
+
[millstone.tasklist_filter]
|
|
188
|
+
label = "sprint-1"
|
|
189
|
+
assignee = "john.doe"
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
Linear — active cycle for a team:
|
|
193
|
+
```toml
|
|
194
|
+
[tasklist_provider_options]
|
|
195
|
+
type = "linear"
|
|
196
|
+
team_id = "<uuid>"
|
|
197
|
+
|
|
198
|
+
[millstone.tasklist_filter]
|
|
199
|
+
cycles = ["Cycle 5"]
|
|
200
|
+
label = "millstone"
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
GitHub Issues — label + milestone:
|
|
204
|
+
```toml
|
|
205
|
+
[tasklist_provider_options]
|
|
206
|
+
type = "github"
|
|
207
|
+
owner = "myorg"
|
|
208
|
+
repo = "myrepo"
|
|
209
|
+
|
|
210
|
+
[millstone.tasklist_filter]
|
|
211
|
+
label = "sprint-1"
|
|
212
|
+
milestone = "v1.2"
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
See full filter option reference in the per-backend docs under `docs/providers/`.
|
|
216
|
+
|
|
217
|
+
See full config and CLI options with:
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
millstone --help
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
## Project Signals
|
|
224
|
+
|
|
225
|
+
- Canonical loop ontology: `docs/architecture/ontology.md`
|
|
226
|
+
- Scope and safety boundaries: `docs/architecture/scope.md`
|
|
227
|
+
- Parallel execution with worktrees: `docs/worktrees.md`
|
|
228
|
+
- CLI providers: `docs/cli-providers/`
|
|
229
|
+
- Artifact providers: `docs/providers/`
|
|
230
|
+
- Release checklist: `docs/maintainer/release_checklist.md`
|
|
231
|
+
|
|
232
|
+
## Build and Release Workflows
|
|
233
|
+
|
|
234
|
+
This repository ships with CI, quality, docs, release, security, CodeQL, dependency review, and weekly maintenance workflows in `.github/workflows/`.
|
|
235
|
+
|
|
236
|
+
Tag release flow:
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
git tag -a vX.Y.Z -m "Release vX.Y.Z"
|
|
240
|
+
git push origin vX.Y.Z
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
## Star History
|
|
244
|
+
|
|
245
|
+
Planned after initial public release and first community adoption.
|
|
246
|
+
|
|
247
|
+
## Working Directory
|
|
248
|
+
|
|
249
|
+
Creates `.millstone/` in your repo containing:
|
|
250
|
+
- `runs/` - Timestamped logs of each run
|
|
251
|
+
- `evals/` - JSON eval results for comparison
|
|
252
|
+
- `cycles/` - Logs of autonomous cycle decisions
|
|
253
|
+
- `state.json` - Saved state for --continue
|
|
254
|
+
- `config.toml` - Per-repo configuration
|
|
255
|
+
- `STOP.md` - Created by sanity check to halt
|
|
256
|
+
|
|
257
|
+
This directory is auto-added to `.gitignore`.
|
|
258
|
+
|
|
259
|
+
## Safety Checks
|
|
260
|
+
|
|
261
|
+
**Mechanical:**
|
|
262
|
+
- No changes detected -> Warn (proceeds to review)
|
|
263
|
+
- Too many lines changed -> Halt for human review
|
|
264
|
+
- Sensitive files (`.env`, credentials) -> Halt for human review
|
|
265
|
+
- New test failures (with `--eval-on-commit`) -> Halt
|
|
266
|
+
|
|
267
|
+
**Judgment (via LLM):**
|
|
268
|
+
- Builder output is gibberish -> Create `STOP.md` -> Halt
|
|
269
|
+
- Reviewer feedback is nonsensical -> Create `STOP.md` -> Halt
|
|
270
|
+
|
|
271
|
+
## Exit Codes
|
|
272
|
+
|
|
273
|
+
- `0` - Success
|
|
274
|
+
- `1` - Halted (needs human intervention)
|
|
275
|
+
|
|
276
|
+
## Expected Runtime
|
|
277
|
+
|
|
278
|
+
Each task typically takes 2-4 minutes. The orchestrator makes 5+ LLM calls per task: builder, implementation sanity check, reviewer, review sanity check, commit delegation, plus fix cycles if needed.
|
|
279
|
+
|
|
280
|
+
## Requirements
|
|
281
|
+
|
|
282
|
+
- Python 3.10+
|
|
283
|
+
- `claude` CLI installed and authenticated (default), or
|
|
284
|
+
- `codex` CLI installed and authenticated (if using `--cli codex`), or
|
|
285
|
+
- `gemini` CLI installed and authenticated (if using `--cli gemini`), or
|
|
286
|
+
- `opencode` CLI installed and authenticated (if using `--cli opencode`)
|
|
287
|
+
|
|
288
|
+
## Open Source Project Files
|
|
289
|
+
|
|
290
|
+
- License: [LICENSE](LICENSE)
|
|
291
|
+
- Contributing guide: [CONTRIBUTING.md](CONTRIBUTING.md)
|
|
292
|
+
- Code of conduct: [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md)
|
|
293
|
+
- Security policy: [SECURITY.md](SECURITY.md)
|
|
294
|
+
- Changelog: [CHANGELOG.md](CHANGELOG.md)
|