claude-autopilot 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. claude_autopilot-0.2.1/.github/workflows/ci.yml +36 -0
  2. claude_autopilot-0.2.1/.github/workflows/docs.yml +43 -0
  3. claude_autopilot-0.2.1/.github/workflows/release.yml +58 -0
  4. claude_autopilot-0.2.1/.gitignore +35 -0
  5. claude_autopilot-0.2.1/CLAUDE.md +91 -0
  6. claude_autopilot-0.2.1/CONTRIBUTING.md +49 -0
  7. claude_autopilot-0.2.1/Makefile +10 -0
  8. claude_autopilot-0.2.1/PKG-INFO +314 -0
  9. claude_autopilot-0.2.1/README.md +286 -0
  10. claude_autopilot-0.2.1/autopilot.example.md +37 -0
  11. claude_autopilot-0.2.1/docs/content/assets/choo-choo.jpg +0 -0
  12. claude_autopilot-0.2.1/docs/content/assets/hero-dark.jpg +0 -0
  13. claude_autopilot-0.2.1/docs/content/assets/hero-light.jpg +0 -0
  14. claude_autopilot-0.2.1/docs/content/assets/train-runner.jpg +0 -0
  15. claude_autopilot-0.2.1/docs/content/css/custom.css +185 -0
  16. claude_autopilot-0.2.1/docs/content/guides/index.md +23 -0
  17. claude_autopilot-0.2.1/docs/content/guides/quick-start.md +107 -0
  18. claude_autopilot-0.2.1/docs/content/guides/scenarios/plan-and-sprint.md +141 -0
  19. claude_autopilot-0.2.1/docs/content/guides/scenarios/portfolio.md +98 -0
  20. claude_autopilot-0.2.1/docs/content/guides/scenarios/ralph.md +137 -0
  21. claude_autopilot-0.2.1/docs/content/guides/scenarios/roadmap-driven.md +138 -0
  22. claude_autopilot-0.2.1/docs/content/guides/scenarios/single-task.md +90 -0
  23. claude_autopilot-0.2.1/docs/content/index.md +101 -0
  24. claude_autopilot-0.2.1/docs/content/js/mermaid-init.js +20 -0
  25. claude_autopilot-0.2.1/docs/content/js/theme-toggle.js +64 -0
  26. claude_autopilot-0.2.1/docs/content/reference/agent-roles.md +111 -0
  27. claude_autopilot-0.2.1/docs/content/reference/cli.md +174 -0
  28. claude_autopilot-0.2.1/docs/content/reference/index.md +11 -0
  29. claude_autopilot-0.2.1/docs/content/reference/manifest-format.md +212 -0
  30. claude_autopilot-0.2.1/docs/overrides/main.html +157 -0
  31. claude_autopilot-0.2.1/docs/overrides/templates/sidebar.html +48 -0
  32. claude_autopilot-0.2.1/mkdocs.yml +58 -0
  33. claude_autopilot-0.2.1/pyproject.toml +62 -0
  34. claude_autopilot-0.2.1/src/autopilot/__init__.py +5 -0
  35. claude_autopilot-0.2.1/src/autopilot/__main__.py +5 -0
  36. claude_autopilot-0.2.1/src/autopilot/agent.py +94 -0
  37. claude_autopilot-0.2.1/src/autopilot/agents/critic.md +94 -0
  38. claude_autopilot-0.2.1/src/autopilot/agents/deep-researcher.md +148 -0
  39. claude_autopilot-0.2.1/src/autopilot/agents/judge.md +81 -0
  40. claude_autopilot-0.2.1/src/autopilot/agents/planner.md +187 -0
  41. claude_autopilot-0.2.1/src/autopilot/agents/portfolio.md +89 -0
  42. claude_autopilot-0.2.1/src/autopilot/agents/researcher.md +101 -0
  43. claude_autopilot-0.2.1/src/autopilot/agents/roadmap.md +189 -0
  44. claude_autopilot-0.2.1/src/autopilot/agents/worker.md +61 -0
  45. claude_autopilot-0.2.1/src/autopilot/cli.py +365 -0
  46. claude_autopilot-0.2.1/src/autopilot/config.py +111 -0
  47. claude_autopilot-0.2.1/src/autopilot/log.py +16 -0
  48. claude_autopilot-0.2.1/src/autopilot/manifest.py +565 -0
  49. claude_autopilot-0.2.1/src/autopilot/models.py +69 -0
  50. claude_autopilot-0.2.1/src/autopilot/orchestrator.py +788 -0
  51. claude_autopilot-0.2.1/src/autopilot/prompts.py +423 -0
  52. claude_autopilot-0.2.1/src/autopilot/runbooks/.gitkeep +0 -0
  53. claude_autopilot-0.2.1/src/autopilot/runbooks/__init__.py +0 -0
  54. claude_autopilot-0.2.1/src/autopilot/runbooks/python-cli.md +38 -0
  55. claude_autopilot-0.2.1/tests/__init__.py +0 -0
  56. claude_autopilot-0.2.1/tests/test_cli.py +85 -0
  57. claude_autopilot-0.2.1/tests/test_manifest.py +299 -0
  58. claude_autopilot-0.2.1/uv.lock +1519 -0
@@ -0,0 +1,36 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint-and-test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.11", "3.12"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python ${{ matrix.python-version }}
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install uv
25
+ uses: astral-sh/setup-uv@v3
26
+
27
+ - name: Install dependencies
28
+ run: uv pip install -e ".[dev]" --system
29
+
30
+ - name: Lint
31
+ run: |
32
+ uv run ruff check src/
33
+ uv run ruff format --check src/
34
+
35
+ - name: Test
36
+ run: uv run pytest tests/ -v
@@ -0,0 +1,43 @@
1
+ name: Deploy docs
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ paths:
7
+ - 'docs/**'
8
+ - 'mkdocs.yml'
9
+ - 'overrides/**'
10
+ workflow_dispatch:
11
+
12
+ permissions:
13
+ contents: read
14
+ pages: write
15
+ id-token: write
16
+
17
+ concurrency:
18
+ group: pages
19
+ cancel-in-progress: false
20
+
21
+ jobs:
22
+ build:
23
+ runs-on: ubuntu-latest
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+ - uses: actions/setup-python@v5
27
+ with:
28
+ python-version: '3.12'
29
+ - run: pip install "mkdocs>=1.6,<2" mkdocs-shadcn
30
+ - run: mkdocs build --strict
31
+ - uses: actions/upload-pages-artifact@v3
32
+ with:
33
+ path: docs/site
34
+
35
+ deploy:
36
+ needs: build
37
+ runs-on: ubuntu-latest
38
+ environment:
39
+ name: github-pages
40
+ url: ${{ steps.deployment.outputs.page_url }}
41
+ steps:
42
+ - id: deployment
43
+ uses: actions/deploy-pages@v4
@@ -0,0 +1,58 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ permissions:
9
+ contents: read
10
+ id-token: write # Required for Trusted Publishing
11
+
12
+ jobs:
13
+ build:
14
+ name: Build distribution
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: "3.11"
23
+
24
+ - name: Install uv
25
+ uses: astral-sh/setup-uv@v3
26
+
27
+ - name: Build package
28
+ run: uv build
29
+
30
+ - name: Check distribution
31
+ run: uvx twine check dist/*
32
+
33
+ - name: Store distribution packages
34
+ uses: actions/upload-artifact@v4
35
+ with:
36
+ name: python-package-distributions
37
+ path: dist/
38
+
39
+ publish-pypi:
40
+ name: Publish to PyPI
41
+ needs: build
42
+ runs-on: ubuntu-latest
43
+ environment:
44
+ name: release
45
+ url: https://pypi.org/project/claude-autopilot/
46
+ permissions:
47
+ id-token: write
48
+ steps:
49
+ - name: Download distributions
50
+ uses: actions/download-artifact@v4
51
+ with:
52
+ name: python-package-distributions
53
+ path: dist/
54
+
55
+ - name: Publish to PyPI
56
+ uses: pypa/gh-action-pypi-publish@release/v1
57
+ with:
58
+ attestations: false
@@ -0,0 +1,35 @@
1
+ # Autopilot local dev/orchestration state
2
+ .dev/
3
+
4
+ # MkDocs build output
5
+ docs/site/
6
+
7
+ # Python
8
+ __pycache__/
9
+ *.py[cod]
10
+ *$py.class
11
+ *.egg-info/
12
+ dist/
13
+ build/
14
+ *.egg
15
+ .eggs/
16
+
17
+ # Virtual environments
18
+ .venv/
19
+ venv/
20
+ ENV/
21
+
22
+ # IDE
23
+ .idea/
24
+ .vscode/
25
+ *.swp
26
+ *.swo
27
+ *~
28
+
29
+ # OS
30
+ .DS_Store
31
+ Thumbs.db
32
+
33
+ # Environment variables
34
+ .env
35
+ .env.local
@@ -0,0 +1,91 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code when working with this repository.
4
+
5
+ ## Project Overview
6
+
7
+ Autopilot is an autonomous project session orchestrator for Claude Code. It reads project manifests (`.dev/sprint.md`), evaluates readiness via an LLM judge, and executes tasks sequentially through the Anthropic Agent SDK. It automates the outer loop of hobby project development — you write the plan, autopilot runs it.
8
+
9
+ ## Architecture
10
+
11
+ Single Python package at `src/autopilot/`. Key modules:
12
+
13
+ | Module | Responsibility |
14
+ |--------|---------------|
15
+ | `cli.py` | Argparse CLI, project discovery, fork-filtering, async entry point |
16
+ | `orchestrator.py` | All agent pipelines: judge/worker/planner/researcher/portfolio/roadmap/ralph |
17
+ | `manifest.py` | Parse/load/write manifests, task dependency resolution, agent config loading, runbook I/O, sprint log I/O |
18
+ | `prompts.py` | Prompt builders for all agent roles; judge verdict parsing |
19
+ | `agent.py` | Thin wrapper around `claude_agent_sdk.query()` — streams messages, tracks cost, names sessions |
20
+ | `models.py` | Dataclasses: `Task`, `Manifest`, `AgentConfig`, `AgentResult`, `SprintResult` |
21
+ | `config.py` | `AutopilotConfig` dataclass with v2 fields; `load_config()` from TOML |
22
+ | `log.py` | Timestamped status logging |
23
+
24
+ **Agent role configs** live in `src/autopilot/agents/*.md` — markdown files with YAML frontmatter defining system prompts, allowed tools, budget, and permission mode for each role: `judge`, `worker`, `planner`, `critic`, `researcher`, `portfolio`, `roadmap`, `roadmap-evaluate`, `deep-researcher`.
25
+
26
+ **Bundled runbooks** live in `src/autopilot/runbooks/*.md` — markdown reference docs loaded at runtime via `load_runbook(archetype, cfg)`. The `python-cli` runbook ships with the package. Custom runbooks can be added to a project-local `runbooks/` directory (configured via `AutopilotConfig.runbooks_dir`).
27
+
28
+ ## Core Pipeline
29
+
30
+ **`sprint`** (`orchestrator.py: execute_sprint()`): Executes an approved `.dev/sprint.md` — loops through pending tasks sequentially. Each task: spawn worker agent → verify marked done → retry on failure up to `max_task_attempts`. Pass `--auto-approve` to bypass the approval check. Pass `--resume` to reset stuck projects and retry failed tasks.
31
+
32
+ **`build`** (`orchestrator.py: build_project()`): One-shot workflow: runs `plan` then `sprint`. Equivalent to `autopilot plan . && autopilot sprint --auto-approve .`. Pass `--context <file>` to seed the planner.
33
+
34
+ **`plan`**: Lazily runs roadmap agent if `.dev/roadmap.md` doesn't exist, then runs the planner agent to write `.dev/sprint.md`. The critic agent always runs if its config exists, followed by a judge loop (up to 2 rounds) that evaluates the plan and revises if needed. On judge READY, sets `approved: true` in sprint.md. Pass `--context <file>` to skip lazy research and seed the planner directly.
35
+
36
+ **`roadmap`**: Runs roadmap agent → writes `.dev/roadmap.md` with `goal:`, `archetype:`, and `validate:` frontmatter plus shipping steps. Uses research summary if available. The roadmap is the authoritative goal+validate artifact. Pass `--deep` to run deep research first. Pass `--topic "question"` or `--topic-file brief.md` to run topic research (writes `.dev/research/{slug}/report.md`, no roadmap written).
37
+
38
+ **`ralph`** (`orchestrator.py: ralph_project()`): Outer loop: `(plan → sprint → evaluate) × N` until GOAL_MET or stuck. Requires `.dev/roadmap.md`. Each iteration calls `plan_project()` (planner + critic + judge), `execute_sprint()` (worker loop), `run_validation_hooks()`, and `evaluate_project()`. If tasks fail, appends a deferred investigation task to `roadmap.md` and stops. Loops until `goal_met=True` or `max_sprints` reached.
39
+
40
+ **`portfolio`**: Runs portfolio agent across all discovered projects → writes `<scan_dir>/.dev/portfolio.md`. Requires `--scan` or explicit paths. Auto-generates `.dev/roadmap.md` for any project that lacks one before building the portfolio (uses deep research if no existing research artifacts). The portfolio agent uses `roadmap.md` as its primary input per project.
41
+
42
+ ## Key Patterns
43
+
44
+ - **Manifest format**: YAML frontmatter + markdown checkboxes at `.dev/sprint.md`. Task metadata persisted inline: `[id: foo]`, `[depends: bar]`, `[attempts: 2]`, `[status: failed]`, `[error: ...]`.
45
+ - **Session naming**: Every `run_agent()` call sets `extra_args={"session-name": "autopilot/{project}/{role}"}` so sessions appear distinctively in Claude Code's `/resume` history.
46
+ - **Project discovery**: `discover_projects()` finds dirs with `.dev/sprint.md`; `discover_all_projects()` finds any project-like dir (git, package.json, pyproject.toml, etc.).
47
+ - **Fork filtering**: In scan mode, non-owned repos are skipped by comparing git remote owner to detected user (`AUTOPILOT_GIT_USER` env → `git config autopilot.user` → `gh api user`). Use `--all` to disable.
48
+ - **Default cwd**: When no path arg is provided, autopilot defaults to the current directory for all modes.
49
+
50
+ ## .dev Convention
51
+
52
+ All autopilot working files within a project live under `.dev/` (which should be in `.gitignore`):
53
+ - `.dev/sprint.md` — task manifest (`plan` output); used by `sprint` for worker loop
54
+ - `.dev/roadmap.md` — roadmap agent output; contains `goal:`, `archetype:`, and `validate:` frontmatter; used by `sprint` as the goal + validate definition
55
+ - `.dev/sprint-log.md` — sprint history, append-only, feeds planner context each sprint
56
+ - `.dev/project-summary.md` — researcher agent output
57
+ - `<scan_dir>/.dev/portfolio.md` — portfolio agent output
58
+
59
+ ## Development Commands
60
+
61
+ ```bash
62
+ uv pip install -e . # Install (editable)
63
+ autopilot sprint . # Execute approved sprint plan
64
+ autopilot sprint --auto-approve . # Execute, bypassing approval check
65
+ autopilot sprint --resume . # Reset stuck projects and retry
66
+ autopilot build . # Plan then execute (one-shot)
67
+ autopilot build --context spec.md . # Plan with context, then execute
68
+ autopilot plan . # Generate/improve manifest (plan + critic + judge)
69
+ autopilot roadmap . # Build shipping roadmap (goal + validate)
70
+ autopilot roadmap --deep . # Deep research then build roadmap
71
+ autopilot roadmap --topic "question" . # Research a specific topic
72
+ autopilot ralph . # Outer loop until goal met or stuck
73
+ autopilot sprint --scan ~/Projects # Auto-discover and process all projects
74
+ uv run ruff check src/ # Lint
75
+ uv run ruff format --check src/ # Format check
76
+ ```
77
+
78
+ There are no tests yet (smoke tests planned before v0.1.0 release — see `.dev/plans/01-naming.md`).
79
+
80
+ ## Code Style
81
+
82
+ - Python 3.11+, async/await throughout
83
+ - Ruff linter: line length 100, rules `E, F, I, N, W, UP`
84
+ - Dataclasses (not Pydantic)
85
+ - Type hints with `X | None` union syntax (not `Optional`)
86
+
87
+ ## Documentation
88
+
89
+ When adding or changing features that affect CLI usage, agent roles, or the manifest format, update `README.md`. The README is the primary user-facing documentation.
90
+
91
+ Release plans and post-MVP features tracked in `.dev/roadmap.md` and `.dev/plans/`.
@@ -0,0 +1,49 @@
1
+ # Contributing to autopilot
2
+
3
+ ## Setup
4
+
5
+ ```bash
6
+ git clone https://github.com/timainge/autopilot
7
+ cd autopilot
8
+ uv pip install -e ".[dev]"
9
+ ```
10
+
11
+ ## Adding a New Agent Role
12
+
13
+ 1. Create `src/autopilot/agents/<role>.md` with YAML frontmatter:
14
+ ```markdown
15
+ ---
16
+ name: role-name
17
+ description: What this agent does
18
+ allowed_tools:
19
+ - Read
20
+ - Write
21
+ - Bash
22
+ max_turns: 20
23
+ max_budget_usd: 1.00
24
+ permission_mode: default
25
+ ---
26
+
27
+ You are an expert...
28
+ ```
29
+ 2. Add a `build_<role>_prompt()` function in `prompts.py`
30
+ 3. Add a `<role>_project()` function in `orchestrator.py`
31
+ 4. Wire up the CLI flag in `cli.py`
32
+
33
+ ## Running Linter
34
+
35
+ ```bash
36
+ make lint
37
+ ```
38
+
39
+ ## Running Tests
40
+
41
+ ```bash
42
+ make test
43
+ ```
44
+
45
+ ## Submitting PRs
46
+
47
+ - Keep changes focused; one feature per PR
48
+ - Run `make ci` before opening a PR
49
+ - Update `README.md` if you add CLI flags or agent roles
@@ -0,0 +1,10 @@
1
+ .PHONY: test lint ci
2
+
3
+ test:
4
+ uv run pytest tests/ -v
5
+
6
+ lint:
7
+ uv run ruff check src/
8
+ uv run ruff format --check src/
9
+
10
+ ci: lint test
@@ -0,0 +1,314 @@
1
+ Metadata-Version: 2.4
2
+ Name: claude-autopilot
3
+ Version: 0.2.1
4
+ Summary: Autonomous project session orchestrator for Claude Code
5
+ Project-URL: Homepage, https://github.com/timainge/autopilot
6
+ Project-URL: Documentation, https://timainge.github.io/autopilot
7
+ Project-URL: Repository, https://github.com/timainge/autopilot
8
+ Project-URL: Bug Tracker, https://github.com/timainge/autopilot/issues
9
+ Project-URL: Changelog, https://github.com/timainge/autopilot/releases
10
+ License-Expression: MIT
11
+ Keywords: ai,automation,claude,claude-code,developer-tools
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Software Development :: Build Tools
19
+ Classifier: Topic :: Utilities
20
+ Requires-Python: >=3.11
21
+ Requires-Dist: claude-agent-sdk>=0.1.0
22
+ Requires-Dist: pyyaml>=6.0
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest>=8.0; extra == 'dev'
25
+ Requires-Dist: ruff>=0.4; extra == 'dev'
26
+ Requires-Dist: twine>=5.0; extra == 'dev'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # autopilot
30
+
31
+ [![PyPI](https://img.shields.io/pypi/v/claude-autopilot)](https://pypi.org/project/claude-autopilot/)
32
+ [![CI](https://github.com/timainge/autopilot/actions/workflows/ci.yml/badge.svg)](https://github.com/timainge/autopilot/actions/workflows/ci.yml)
33
+ [![Python](https://img.shields.io/pypi/pyversions/claude-autopilot)](https://pypi.org/project/claude-autopilot/)
34
+ [![Docs](https://img.shields.io/badge/docs-github--pages-blue)](https://timainge.github.io/autopilot)
35
+
36
+ Autopilot is the outer loop for Claude Code. You describe what needs building; autopilot plans it, executes it task by task, and checks whether the goal was met — without you sitting there typing "continue".
37
+
38
+ ---
39
+
40
+ ## How it works
41
+
42
+ Autopilot runs a three-stage cycle:
43
+
44
+ ```
45
+ roadmap → plan → sprint
46
+ ↑ |
47
+ └──── evaluate ←─────┘
48
+ ```
49
+
50
+ **Roadmap** — defines the goal, the shipping target, and what "done" looks like (validation commands). Optional but makes everything downstream sharper.
51
+
52
+ **Plan** — a planner agent reads the roadmap and writes `.dev/sprint.md`: a set of structured tasks. A critic reviews it, a judge approves it. No execution until the plan is approved.
53
+
54
+ **Sprint** — each task in the manifest gets a fresh Claude Code session. The worker implements, commits, and marks the task done. Failed tasks retry up to a configured limit.
55
+
56
+ **Ralph** is the outer loop that drives this cycle autonomously — planning a sprint, executing it, running validation, evaluating whether the goal is met, and repeating until it is.
57
+
58
+ ---
59
+
60
+ ## Install
61
+
62
+ ```bash
63
+ pip install claude-autopilot
64
+ # or
65
+ uv pip install claude-autopilot
66
+ ```
67
+
68
+ Requires a Claude API key or Claude Code subscription token:
69
+
70
+ ```bash
71
+ # Option A: API key
72
+ export ANTHROPIC_API_KEY=your-key-here
73
+
74
+ # Option B: Claude Code subscription (Max/Pro)
75
+ claude setup-token
76
+ export CLAUDE_CODE_OAUTH_TOKEN=<token from above>
77
+ ```
78
+
79
+ ---
80
+
81
+ ## Quick start
82
+
83
+ **One-shot build** — plan then execute in a single command:
84
+
85
+ ```bash
86
+ autopilot build .
87
+ autopilot build --context spec.md . # seed the planner with a spec or TODO list
88
+ ```
89
+
90
+ **Step by step** — more control:
91
+
92
+ ```bash
93
+ autopilot roadmap . # optional: build a goal + validate spec
94
+ autopilot plan . # write + approve the task manifest
95
+ autopilot sprint . # execute the approved manifest
96
+ ```
97
+
98
+ **Fully autonomous loop** — keeps going until the goal is met:
99
+
100
+ ```bash
101
+ autopilot roadmap . # required for ralph: defines the goal and validate commands
102
+ autopilot ralph . # plan → sprint → evaluate, repeat
103
+ ```
104
+
105
+ ---
106
+
107
+ ## Commands
108
+
109
+ ### `roadmap`
110
+
111
+ Writes `.dev/roadmap.md` — the goal, archetype, validation commands, and shipping phases. Used
112
+ as the primary input for planning and as the termination condition for ralph.
113
+
114
+ ```bash
115
+ autopilot roadmap . # assess the project and write a roadmap
116
+ autopilot roadmap --deep . # run deep research (web + ecosystem) first
117
+ autopilot roadmap --topic "question" . # research a specific question → .dev/research/
118
+ autopilot roadmap --topic-file brief.md . # same, from a file
119
+ ```
120
+
121
+ ### `plan`
122
+
123
+ Runs the planner → critic → judge pipeline and writes an approved `.dev/sprint.md`.
124
+
125
+ ```bash
126
+ autopilot plan . # auto-runs roadmap first if it doesn't exist
127
+ autopilot plan --context TODO.md . # seed with a spec or todo list, skip research
128
+ ```
129
+
130
+ The critic reviews the plan adversarially. The judge evaluates readiness: if NOT_READY, the
131
+ planner revises once with the judge's feedback and the judge re-evaluates. When approved,
132
+ `approved: true` is set in the manifest automatically.
133
+
134
+ ### `sprint`
135
+
136
+ Executes the approved `.dev/sprint.md` task manifest. Each task spawns a fresh Claude Code
137
+ session.
138
+
139
+ ```bash
140
+ autopilot sprint .
141
+ autopilot sprint --auto-approve . # skip the approval check
142
+ autopilot sprint --resume . # reset stuck projects, retry failed tasks
143
+ ```
144
+
145
+ ### `build`
146
+
147
+ Shorthand for `plan` + `sprint --auto-approve` in one command.
148
+
149
+ ```bash
150
+ autopilot build .
151
+ autopilot build --context spec.md .
152
+ ```
153
+
154
+ ### `ralph`
155
+
156
+ The fully autonomous outer loop. Requires `.dev/roadmap.md` (run `autopilot roadmap .` first).
157
+
158
+ ```bash
159
+ autopilot ralph .
160
+ autopilot ralph --auto-approve .
161
+ ```
162
+
163
+ Each iteration: plan a sprint → execute tasks → run `validate` commands from roadmap frontmatter
164
+ → evaluate whether the goal is met. Stops when:
165
+ - The evaluator returns `GOAL_MET`
166
+ - Tasks fail (appends a deferred investigation task to `roadmap.md`)
167
+ - `max_sprints` is reached
168
+
169
+ ### `portfolio`
170
+
171
+ Builds a cross-project index — goal, tech stack, current state, and prioritised quick wins.
172
+ Auto-generates `.dev/roadmap.md` for any project that lacks one before building.
173
+
174
+ ```bash
175
+ autopilot portfolio --scan ~/Projects
176
+ autopilot portfolio path/to/proj-a path/to/proj-b
177
+ ```
178
+
179
+ Output: `<scan_dir>/.dev/portfolio.md`.
180
+
181
+ ---
182
+
183
+ ## Multi-project scanning
184
+
185
+ Every command works with `--scan` to operate across a directory of projects:
186
+
187
+ ```bash
188
+ autopilot roadmap --scan ~/Projects
189
+ autopilot plan --scan ~/Projects
190
+ autopilot sprint --auto-approve --scan ~/Projects
191
+ autopilot ralph --scan ~/Projects
192
+ ```
193
+
194
+ Repos you don't own are skipped by default. Autopilot compares the git remote owner against
195
+ your username (checked in order: `AUTOPILOT_GIT_USER` env, `git config autopilot.user`, `gh`
196
+ CLI auth). Use `--all` to include forks and clones.
197
+
198
+ ---
199
+
200
+ ## Configuration
201
+
202
+ ### Per-project: `autopilot.toml`
203
+
204
+ ```toml
205
+ [autopilot]
206
+ max_budget_usd = 10.0
207
+ max_task_attempts = 3
208
+ max_sprints = 5
209
+ ```
210
+
211
+ ### Global: `~/.config/autopilot/config.toml`
212
+
213
+ Same format. Per-project config takes precedence.
214
+
215
+ ### Manifest frontmatter
216
+
217
+ `.dev/sprint.md` and `.dev/roadmap.md` use YAML frontmatter for structured config:
218
+
219
+ | Field | File | Default | Description |
220
+ |-------|------|---------|-------------|
221
+ | `name` | sprint | dir name | Project display name |
222
+ | `approved` | sprint | false | Approval gate — must be true before sprint runs |
223
+ | `status` | sprint | pending | pending / active / stuck / completed |
224
+ | `max_budget_usd` | sprint | 5.0 | Budget cap per sprint |
225
+ | `max_task_attempts` | sprint | 3 | Max retries per failed task |
226
+ | `goal` | roadmap | — | Goal type: launch / publish / complete |
227
+ | `archetype` | roadmap | — | Project archetype (e.g. `python-cli`) |
228
+ | `validate` | roadmap | — | Shell commands that must pass for goal completion |
229
+
230
+ Add `.dev/` to `.gitignore` — it contains orchestration state, not source code.
231
+
232
+ ### Task format
233
+
234
+ Tasks in `.dev/sprint.md` are level-3 headings with a checkbox, a slug ID, and an optional
235
+ body:
236
+
237
+ ```markdown
238
+ ### [ ] create-api-client
239
+
240
+ Implement `src/client.py` with a `get()` and `post()` method using `httpx`.
241
+ Use the base URL from `config.py`. Raise `APIError` on non-2xx responses.
242
+
243
+ **Done**: `pytest tests/test_client.py` passes.
244
+
245
+ ---
246
+
247
+ ### [ ] add-retry-logic [depends: create-api-client]
248
+
249
+ Add exponential backoff to the client using `tenacity`. Max 3 retries,
250
+ starting at 1s. Log each retry attempt at WARNING level.
251
+
252
+ ---
253
+
254
+ ### [x] completed-task
255
+ ```
256
+
257
+ - IDs are the heading text — must be `lowercase-with-dashes`
258
+ - Dependencies: `[depends: task-id]` or `[depends: a, b]` inline in the heading
259
+ - Retry metadata (`[attempts: N]`, `[status: failed]`, `[error: ...]`) is written by autopilot — don't edit manually
260
+
261
+ ---
262
+
263
+ ## Agent roles
264
+
265
+ Agent configs live in `src/autopilot/agents/*.md` — YAML frontmatter + system prompt. Sessions
266
+ appear in Claude Code's `/resume` history as `autopilot/projectname/role`.
267
+
268
+ | Role | Invoked by | What it does |
269
+ |------|-----------|--------------|
270
+ | `planner` | `plan`, `build`, `ralph` | Writes `.dev/sprint.md` |
271
+ | `critic` | `plan`, `build`, `ralph` | Reviews the plan adversarially |
272
+ | `judge` | `plan`, `build`, `ralph` | Approves or rejects the plan |
273
+ | `worker` | `sprint`, `build`, `ralph` | Executes a task, commits |
274
+ | `roadmap` | `roadmap`, `ralph` | Writes `.dev/roadmap.md`; evaluates goal completion |
275
+ | `researcher` | (lazy, before `plan`) | Analyses codebase → `.dev/project-summary.md` |
276
+ | `deep-researcher` | `roadmap --deep` | Extended web research before roadmapping |
277
+ | `portfolio` | `portfolio` | Cross-project index → `.dev/portfolio.md` |
278
+
279
+ ### Custom roles
280
+
281
+ Drop a markdown file into `agents/` (or use `--agents-dir` to point to a custom directory):
282
+
283
+ ```markdown
284
+ ---
285
+ name: reviewer
286
+ description: Reviews completed tasks for quality
287
+ allowed_tools: [Read, Glob, Bash, Grep]
288
+ permission_mode: default
289
+ max_turns: 20
290
+ max_budget_usd: 0.50
291
+ ---
292
+
293
+ You are a code reviewer. You read recently completed tasks and assess quality...
294
+ ```
295
+
296
+ ---
297
+
298
+ ## Design notes
299
+
300
+ **Why the Agent SDK, not CLI pipes?**
301
+ The SDK wraps Claude Code programmatically — same tools, proper message streaming, error
302
+ handling. Each `query()` call is a fresh Claude Code session with clean context.
303
+
304
+ **Why sequential tasks, not parallel?**
305
+ Simpler to debug, cheaper, and avoids merge conflicts. Parallel execution via git worktrees is
306
+ planned for a future release.
307
+
308
+ **Why a human approval gate?**
309
+ The judge evaluates readiness, but a human must explicitly set `approved: true` (or pass
310
+ `--auto-approve`). This prevents runaway execution on half-baked plans.
311
+
312
+ **Why markdown manifests?**
313
+ The manifest doubles as project documentation. YAML frontmatter gives structured config; the
314
+ markdown body gives context that humans and agents can both read naturally.