claude-autopilot 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_autopilot-0.2.1/.github/workflows/ci.yml +36 -0
- claude_autopilot-0.2.1/.github/workflows/docs.yml +43 -0
- claude_autopilot-0.2.1/.github/workflows/release.yml +58 -0
- claude_autopilot-0.2.1/.gitignore +35 -0
- claude_autopilot-0.2.1/CLAUDE.md +91 -0
- claude_autopilot-0.2.1/CONTRIBUTING.md +49 -0
- claude_autopilot-0.2.1/Makefile +10 -0
- claude_autopilot-0.2.1/PKG-INFO +314 -0
- claude_autopilot-0.2.1/README.md +286 -0
- claude_autopilot-0.2.1/autopilot.example.md +37 -0
- claude_autopilot-0.2.1/docs/content/assets/choo-choo.jpg +0 -0
- claude_autopilot-0.2.1/docs/content/assets/hero-dark.jpg +0 -0
- claude_autopilot-0.2.1/docs/content/assets/hero-light.jpg +0 -0
- claude_autopilot-0.2.1/docs/content/assets/train-runner.jpg +0 -0
- claude_autopilot-0.2.1/docs/content/css/custom.css +185 -0
- claude_autopilot-0.2.1/docs/content/guides/index.md +23 -0
- claude_autopilot-0.2.1/docs/content/guides/quick-start.md +107 -0
- claude_autopilot-0.2.1/docs/content/guides/scenarios/plan-and-sprint.md +141 -0
- claude_autopilot-0.2.1/docs/content/guides/scenarios/portfolio.md +98 -0
- claude_autopilot-0.2.1/docs/content/guides/scenarios/ralph.md +137 -0
- claude_autopilot-0.2.1/docs/content/guides/scenarios/roadmap-driven.md +138 -0
- claude_autopilot-0.2.1/docs/content/guides/scenarios/single-task.md +90 -0
- claude_autopilot-0.2.1/docs/content/index.md +101 -0
- claude_autopilot-0.2.1/docs/content/js/mermaid-init.js +20 -0
- claude_autopilot-0.2.1/docs/content/js/theme-toggle.js +64 -0
- claude_autopilot-0.2.1/docs/content/reference/agent-roles.md +111 -0
- claude_autopilot-0.2.1/docs/content/reference/cli.md +174 -0
- claude_autopilot-0.2.1/docs/content/reference/index.md +11 -0
- claude_autopilot-0.2.1/docs/content/reference/manifest-format.md +212 -0
- claude_autopilot-0.2.1/docs/overrides/main.html +157 -0
- claude_autopilot-0.2.1/docs/overrides/templates/sidebar.html +48 -0
- claude_autopilot-0.2.1/mkdocs.yml +58 -0
- claude_autopilot-0.2.1/pyproject.toml +62 -0
- claude_autopilot-0.2.1/src/autopilot/__init__.py +5 -0
- claude_autopilot-0.2.1/src/autopilot/__main__.py +5 -0
- claude_autopilot-0.2.1/src/autopilot/agent.py +94 -0
- claude_autopilot-0.2.1/src/autopilot/agents/critic.md +94 -0
- claude_autopilot-0.2.1/src/autopilot/agents/deep-researcher.md +148 -0
- claude_autopilot-0.2.1/src/autopilot/agents/judge.md +81 -0
- claude_autopilot-0.2.1/src/autopilot/agents/planner.md +187 -0
- claude_autopilot-0.2.1/src/autopilot/agents/portfolio.md +89 -0
- claude_autopilot-0.2.1/src/autopilot/agents/researcher.md +101 -0
- claude_autopilot-0.2.1/src/autopilot/agents/roadmap.md +189 -0
- claude_autopilot-0.2.1/src/autopilot/agents/worker.md +61 -0
- claude_autopilot-0.2.1/src/autopilot/cli.py +365 -0
- claude_autopilot-0.2.1/src/autopilot/config.py +111 -0
- claude_autopilot-0.2.1/src/autopilot/log.py +16 -0
- claude_autopilot-0.2.1/src/autopilot/manifest.py +565 -0
- claude_autopilot-0.2.1/src/autopilot/models.py +69 -0
- claude_autopilot-0.2.1/src/autopilot/orchestrator.py +788 -0
- claude_autopilot-0.2.1/src/autopilot/prompts.py +423 -0
- claude_autopilot-0.2.1/src/autopilot/runbooks/.gitkeep +0 -0
- claude_autopilot-0.2.1/src/autopilot/runbooks/__init__.py +0 -0
- claude_autopilot-0.2.1/src/autopilot/runbooks/python-cli.md +38 -0
- claude_autopilot-0.2.1/tests/__init__.py +0 -0
- claude_autopilot-0.2.1/tests/test_cli.py +85 -0
- claude_autopilot-0.2.1/tests/test_manifest.py +299 -0
- claude_autopilot-0.2.1/uv.lock +1519 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint-and-test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.11", "3.12"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: ${{ matrix.python-version }}
|
|
23
|
+
|
|
24
|
+
- name: Install uv
|
|
25
|
+
uses: astral-sh/setup-uv@v3
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: uv pip install -e ".[dev]" --system
|
|
29
|
+
|
|
30
|
+
- name: Lint
|
|
31
|
+
run: |
|
|
32
|
+
uv run ruff check src/
|
|
33
|
+
uv run ruff format --check src/
|
|
34
|
+
|
|
35
|
+
- name: Test
|
|
36
|
+
run: uv run pytest tests/ -v
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
name: Deploy docs
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
paths:
|
|
7
|
+
- 'docs/**'
|
|
8
|
+
- 'mkdocs.yml'
|
|
9
|
+
- 'overrides/**'
|
|
10
|
+
workflow_dispatch:
|
|
11
|
+
|
|
12
|
+
permissions:
|
|
13
|
+
contents: read
|
|
14
|
+
pages: write
|
|
15
|
+
id-token: write
|
|
16
|
+
|
|
17
|
+
concurrency:
|
|
18
|
+
group: pages
|
|
19
|
+
cancel-in-progress: false
|
|
20
|
+
|
|
21
|
+
jobs:
|
|
22
|
+
build:
|
|
23
|
+
runs-on: ubuntu-latest
|
|
24
|
+
steps:
|
|
25
|
+
- uses: actions/checkout@v4
|
|
26
|
+
- uses: actions/setup-python@v5
|
|
27
|
+
with:
|
|
28
|
+
python-version: '3.12'
|
|
29
|
+
- run: pip install "mkdocs>=1.6,<2" mkdocs-shadcn
|
|
30
|
+
- run: mkdocs build --strict
|
|
31
|
+
- uses: actions/upload-pages-artifact@v3
|
|
32
|
+
with:
|
|
33
|
+
path: docs/site
|
|
34
|
+
|
|
35
|
+
deploy:
|
|
36
|
+
needs: build
|
|
37
|
+
runs-on: ubuntu-latest
|
|
38
|
+
environment:
|
|
39
|
+
name: github-pages
|
|
40
|
+
url: ${{ steps.deployment.outputs.page_url }}
|
|
41
|
+
steps:
|
|
42
|
+
- id: deployment
|
|
43
|
+
uses: actions/deploy-pages@v4
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: read
|
|
10
|
+
id-token: write # Required for Trusted Publishing
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
build:
|
|
14
|
+
name: Build distribution
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Set up Python
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: "3.11"
|
|
23
|
+
|
|
24
|
+
- name: Install uv
|
|
25
|
+
uses: astral-sh/setup-uv@v3
|
|
26
|
+
|
|
27
|
+
- name: Build package
|
|
28
|
+
run: uv build
|
|
29
|
+
|
|
30
|
+
- name: Check distribution
|
|
31
|
+
run: uvx twine check dist/*
|
|
32
|
+
|
|
33
|
+
- name: Store distribution packages
|
|
34
|
+
uses: actions/upload-artifact@v4
|
|
35
|
+
with:
|
|
36
|
+
name: python-package-distributions
|
|
37
|
+
path: dist/
|
|
38
|
+
|
|
39
|
+
publish-pypi:
|
|
40
|
+
name: Publish to PyPI
|
|
41
|
+
needs: build
|
|
42
|
+
runs-on: ubuntu-latest
|
|
43
|
+
environment:
|
|
44
|
+
name: release
|
|
45
|
+
url: https://pypi.org/project/claude-autopilot/
|
|
46
|
+
permissions:
|
|
47
|
+
id-token: write
|
|
48
|
+
steps:
|
|
49
|
+
- name: Download distributions
|
|
50
|
+
uses: actions/download-artifact@v4
|
|
51
|
+
with:
|
|
52
|
+
name: python-package-distributions
|
|
53
|
+
path: dist/
|
|
54
|
+
|
|
55
|
+
- name: Publish to PyPI
|
|
56
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
57
|
+
with:
|
|
58
|
+
attestations: false
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Autopilot local dev/orchestration state
|
|
2
|
+
.dev/
|
|
3
|
+
|
|
4
|
+
# MkDocs build output
|
|
5
|
+
docs/site/
|
|
6
|
+
|
|
7
|
+
# Python
|
|
8
|
+
__pycache__/
|
|
9
|
+
*.py[cod]
|
|
10
|
+
*$py.class
|
|
11
|
+
*.egg-info/
|
|
12
|
+
dist/
|
|
13
|
+
build/
|
|
14
|
+
*.egg
|
|
15
|
+
.eggs/
|
|
16
|
+
|
|
17
|
+
# Virtual environments
|
|
18
|
+
.venv/
|
|
19
|
+
venv/
|
|
20
|
+
ENV/
|
|
21
|
+
|
|
22
|
+
# IDE
|
|
23
|
+
.idea/
|
|
24
|
+
.vscode/
|
|
25
|
+
*.swp
|
|
26
|
+
*.swo
|
|
27
|
+
*~
|
|
28
|
+
|
|
29
|
+
# OS
|
|
30
|
+
.DS_Store
|
|
31
|
+
Thumbs.db
|
|
32
|
+
|
|
33
|
+
# Environment variables
|
|
34
|
+
.env
|
|
35
|
+
.env.local
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code when working with this repository.
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
Autopilot is an autonomous project session orchestrator for Claude Code. It reads project manifests (`.dev/sprint.md`), evaluates readiness via an LLM judge, and executes tasks sequentially through the Anthropic Agent SDK. It automates the outer loop of hobby project development — you write the plan, autopilot runs it.
|
|
8
|
+
|
|
9
|
+
## Architecture
|
|
10
|
+
|
|
11
|
+
Single Python package at `src/autopilot/`. Key modules:
|
|
12
|
+
|
|
13
|
+
| Module | Responsibility |
|
|
14
|
+
|--------|---------------|
|
|
15
|
+
| `cli.py` | Argparse CLI, project discovery, fork-filtering, async entry point |
|
|
16
|
+
| `orchestrator.py` | All agent pipelines: judge/worker/planner/researcher/portfolio/roadmap/ralph |
|
|
17
|
+
| `manifest.py` | Parse/load/write manifests, task dependency resolution, agent config loading, runbook I/O, sprint log I/O |
|
|
18
|
+
| `prompts.py` | Prompt builders for all agent roles; judge verdict parsing |
|
|
19
|
+
| `agent.py` | Thin wrapper around `claude_agent_sdk.query()` — streams messages, tracks cost, names sessions |
|
|
20
|
+
| `models.py` | Dataclasses: `Task`, `Manifest`, `AgentConfig`, `AgentResult`, `SprintResult` |
|
|
21
|
+
| `config.py` | `AutopilotConfig` dataclass with v2 fields; `load_config()` from TOML |
|
|
22
|
+
| `log.py` | Timestamped status logging |
|
|
23
|
+
|
|
24
|
+
**Agent role configs** live in `src/autopilot/agents/*.md` — markdown files with YAML frontmatter defining system prompts, allowed tools, budget, and permission mode for each role: `judge`, `worker`, `planner`, `critic`, `researcher`, `portfolio`, `roadmap`, `roadmap-evaluate`, `deep-researcher`.
|
|
25
|
+
|
|
26
|
+
**Bundled runbooks** live in `src/autopilot/runbooks/*.md` — markdown reference docs loaded at runtime via `load_runbook(archetype, cfg)`. The `python-cli` runbook ships with the package. Custom runbooks can be added to a project-local `runbooks/` directory (configured via `AutopilotConfig.runbooks_dir`).
|
|
27
|
+
|
|
28
|
+
## Core Pipeline
|
|
29
|
+
|
|
30
|
+
**`sprint`** (`orchestrator.py: execute_sprint()`): Executes an approved `.dev/sprint.md` — loops through pending tasks sequentially. Each task: spawn worker agent → verify marked done → retry on failure up to `max_task_attempts`. Pass `--auto-approve` to bypass the approval check. Pass `--resume` to reset stuck projects and retry failed tasks.
|
|
31
|
+
|
|
32
|
+
**`build`** (`orchestrator.py: build_project()`): One-shot workflow: runs `plan` then `sprint`. Equivalent to `autopilot plan . && autopilot sprint --auto-approve .`. Pass `--context <file>` to seed the planner.
|
|
33
|
+
|
|
34
|
+
**`plan`**: Lazily runs roadmap agent if `.dev/roadmap.md` doesn't exist, then runs the planner agent to write `.dev/sprint.md`. The critic agent always runs if its config exists, followed by a judge loop (up to 2 rounds) that evaluates the plan and revises if needed. On judge READY, sets `approved: true` in sprint.md. Pass `--context <file>` to skip lazy research and seed the planner directly.
|
|
35
|
+
|
|
36
|
+
**`roadmap`**: Runs roadmap agent → writes `.dev/roadmap.md` with `goal:`, `archetype:`, and `validate:` frontmatter plus shipping steps. Uses research summary if available. The roadmap is the authoritative goal+validate artifact. Pass `--deep` to run deep research first. Pass `--topic "question"` or `--topic-file brief.md` to run topic research (writes `.dev/research/{slug}/report.md`, no roadmap written).
|
|
37
|
+
|
|
38
|
+
**`ralph`** (`orchestrator.py: ralph_project()`): Outer loop: `(plan → sprint → evaluate) × N` until GOAL_MET or stuck. Requires `.dev/roadmap.md`. Each iteration calls `plan_project()` (planner + critic + judge), `execute_sprint()` (worker loop), `run_validation_hooks()`, and `evaluate_project()`. If tasks fail, appends a deferred investigation task to `roadmap.md` and stops. Loops until `goal_met=True` or `max_sprints` reached.
|
|
39
|
+
|
|
40
|
+
**`portfolio`**: Runs portfolio agent across all discovered projects → writes `<scan_dir>/.dev/portfolio.md`. Requires `--scan` or explicit paths. Auto-generates `.dev/roadmap.md` for any project that lacks one before building the portfolio (uses deep research if no existing research artifacts). The portfolio agent uses `roadmap.md` as its primary input per project.
|
|
41
|
+
|
|
42
|
+
## Key Patterns
|
|
43
|
+
|
|
44
|
+
- **Manifest format**: YAML frontmatter + markdown checkboxes at `.dev/sprint.md`. Task metadata persisted inline: `[id: foo]`, `[depends: bar]`, `[attempts: 2]`, `[status: failed]`, `[error: ...]`.
|
|
45
|
+
- **Session naming**: Every `run_agent()` call sets `extra_args={"session-name": "autopilot/{project}/{role}"}` so sessions appear distinctively in Claude Code's `/resume` history.
|
|
46
|
+
- **Project discovery**: `discover_projects()` finds dirs with `.dev/sprint.md`; `discover_all_projects()` finds any project-like dir (git, package.json, pyproject.toml, etc.).
|
|
47
|
+
- **Fork filtering**: In scan mode, non-owned repos are skipped by comparing git remote owner to detected user (`AUTOPILOT_GIT_USER` env → `git config autopilot.user` → `gh api user`). Use `--all` to disable.
|
|
48
|
+
- **Default cwd**: When no path arg is provided, autopilot defaults to the current directory for all modes.
|
|
49
|
+
|
|
50
|
+
## .dev Convention
|
|
51
|
+
|
|
52
|
+
All autopilot working files within a project live under `.dev/` (which should be in `.gitignore`):
|
|
53
|
+
- `.dev/sprint.md` — task manifest (`plan` output); used by `sprint` for worker loop
|
|
54
|
+
- `.dev/roadmap.md` — roadmap agent output; contains `goal:`, `archetype:`, and `validate:` frontmatter; used by `sprint` as the goal + validate definition
|
|
55
|
+
- `.dev/sprint-log.md` — sprint history, append-only, feeds planner context each sprint
|
|
56
|
+
- `.dev/project-summary.md` — researcher agent output
|
|
57
|
+
- `<scan_dir>/.dev/portfolio.md` — portfolio agent output
|
|
58
|
+
|
|
59
|
+
## Development Commands
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
uv pip install -e . # Install (editable)
|
|
63
|
+
autopilot sprint . # Execute approved sprint plan
|
|
64
|
+
autopilot sprint --auto-approve . # Execute, bypassing approval check
|
|
65
|
+
autopilot sprint --resume . # Reset stuck projects and retry
|
|
66
|
+
autopilot build . # Plan then execute (one-shot)
|
|
67
|
+
autopilot build --context spec.md . # Plan with context, then execute
|
|
68
|
+
autopilot plan . # Generate/improve manifest (plan + critic + judge)
|
|
69
|
+
autopilot roadmap . # Build shipping roadmap (goal + validate)
|
|
70
|
+
autopilot roadmap --deep . # Deep research then build roadmap
|
|
71
|
+
autopilot roadmap --topic "question" . # Research a specific topic
|
|
72
|
+
autopilot ralph . # Outer loop until goal met or stuck
|
|
73
|
+
autopilot sprint --scan ~/Projects # Auto-discover and process all projects
|
|
74
|
+
uv run ruff check src/ # Lint
|
|
75
|
+
uv run ruff format --check src/ # Format check
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
There are no tests yet (smoke tests planned before v0.1.0 release — see `.dev/plans/01-naming.md`).
|
|
79
|
+
|
|
80
|
+
## Code Style
|
|
81
|
+
|
|
82
|
+
- Python 3.11+, async/await throughout
|
|
83
|
+
- Ruff linter: line length 100, rules `E, F, I, N, W, UP`
|
|
84
|
+
- Dataclasses (not Pydantic)
|
|
85
|
+
- Type hints with `X | None` union syntax (not `Optional`)
|
|
86
|
+
|
|
87
|
+
## Documentation
|
|
88
|
+
|
|
89
|
+
When adding or changing features that affect CLI usage, agent roles, or the manifest format, update `README.md`. The README is the primary user-facing documentation.
|
|
90
|
+
|
|
91
|
+
Release plans and post-MVP features tracked in `.dev/roadmap.md` and `.dev/plans/`.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Contributing to autopilot
|
|
2
|
+
|
|
3
|
+
## Setup
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
git clone https://github.com/timainge/autopilot
|
|
7
|
+
cd autopilot
|
|
8
|
+
uv pip install -e ".[dev]"
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Adding a New Agent Role
|
|
12
|
+
|
|
13
|
+
1. Create `src/autopilot/agents/<role>.md` with YAML frontmatter:
|
|
14
|
+
```markdown
|
|
15
|
+
---
|
|
16
|
+
name: role-name
|
|
17
|
+
description: What this agent does
|
|
18
|
+
allowed_tools:
|
|
19
|
+
- Read
|
|
20
|
+
- Write
|
|
21
|
+
- Bash
|
|
22
|
+
max_turns: 20
|
|
23
|
+
max_budget_usd: 1.00
|
|
24
|
+
permission_mode: default
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
You are an expert...
|
|
28
|
+
```
|
|
29
|
+
2. Add a `build_<role>_prompt()` function in `prompts.py`
|
|
30
|
+
3. Add a `<role>_project()` function in `orchestrator.py`
|
|
31
|
+
4. Wire up the CLI flag in `cli.py`
|
|
32
|
+
|
|
33
|
+
## Running Linter
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
make lint
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Running Tests
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
make test
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Submitting PRs
|
|
46
|
+
|
|
47
|
+
- Keep changes focused; one feature per PR
|
|
48
|
+
- Run `make ci` before opening a PR
|
|
49
|
+
- Update `README.md` if you add CLI flags or agent roles
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: claude-autopilot
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: Autonomous project session orchestrator for Claude Code
|
|
5
|
+
Project-URL: Homepage, https://github.com/timainge/autopilot
|
|
6
|
+
Project-URL: Documentation, https://timainge.github.io/autopilot
|
|
7
|
+
Project-URL: Repository, https://github.com/timainge/autopilot
|
|
8
|
+
Project-URL: Bug Tracker, https://github.com/timainge/autopilot/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/timainge/autopilot/releases
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
Keywords: ai,automation,claude,claude-code,developer-tools
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Build Tools
|
|
19
|
+
Classifier: Topic :: Utilities
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Requires-Dist: claude-agent-sdk>=0.1.0
|
|
22
|
+
Requires-Dist: pyyaml>=6.0
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
25
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
26
|
+
Requires-Dist: twine>=5.0; extra == 'dev'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# autopilot
|
|
30
|
+
|
|
31
|
+
[](https://pypi.org/project/claude-autopilot/)
|
|
32
|
+
[](https://github.com/timainge/autopilot/actions/workflows/ci.yml)
|
|
33
|
+
[](https://pypi.org/project/claude-autopilot/)
|
|
34
|
+
[](https://timainge.github.io/autopilot)
|
|
35
|
+
|
|
36
|
+
Autopilot is the outer loop for Claude Code. You describe what needs building; autopilot plans it, executes it task by task, and checks whether the goal was met — without you sitting there typing "continue".
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## How it works
|
|
41
|
+
|
|
42
|
+
Autopilot runs a three-stage cycle:
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
roadmap → plan → sprint
|
|
46
|
+
↑ |
|
|
47
|
+
└──── evaluate ←─────┘
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**Roadmap** — defines the goal, the shipping target, and what "done" looks like (validation commands). Optional but makes everything downstream sharper.
|
|
51
|
+
|
|
52
|
+
**Plan** — a planner agent reads the roadmap and writes `.dev/sprint.md`: a set of structured tasks. A critic reviews it, a judge approves it. No execution until the plan is approved.
|
|
53
|
+
|
|
54
|
+
**Sprint** — each task in the manifest gets a fresh Claude Code session. The worker implements, commits, and marks the task done. Failed tasks retry up to a configured limit.
|
|
55
|
+
|
|
56
|
+
**Ralph** is the outer loop that drives this cycle autonomously — planning a sprint, executing it, running validation, evaluating whether the goal is met, and repeating until it is.
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Install
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install claude-autopilot
|
|
64
|
+
# or
|
|
65
|
+
uv pip install claude-autopilot
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Requires a Claude API key or Claude Code subscription token:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
# Option A: API key
|
|
72
|
+
export ANTHROPIC_API_KEY=your-key-here
|
|
73
|
+
|
|
74
|
+
# Option B: Claude Code subscription (Max/Pro)
|
|
75
|
+
claude setup-token
|
|
76
|
+
export CLAUDE_CODE_OAUTH_TOKEN=<token from above>
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Quick start
|
|
82
|
+
|
|
83
|
+
**One-shot build** — plan then execute in a single command:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
autopilot build .
|
|
87
|
+
autopilot build --context spec.md . # seed the planner with a spec or TODO list
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
**Step by step** — more control:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
autopilot roadmap . # optional: build a goal + validate spec
|
|
94
|
+
autopilot plan . # write + approve the task manifest
|
|
95
|
+
autopilot sprint . # execute the approved manifest
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
**Fully autonomous loop** — keeps going until the goal is met:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
autopilot roadmap . # required for ralph: defines the goal and validate commands
|
|
102
|
+
autopilot ralph . # plan → sprint → evaluate, repeat
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
## Commands
|
|
108
|
+
|
|
109
|
+
### `roadmap`
|
|
110
|
+
|
|
111
|
+
Writes `.dev/roadmap.md` — the goal, archetype, validation commands, and shipping phases. Used
|
|
112
|
+
as the primary input for planning and as the termination condition for ralph.
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
autopilot roadmap . # assess the project and write a roadmap
|
|
116
|
+
autopilot roadmap --deep . # run deep research (web + ecosystem) first
|
|
117
|
+
autopilot roadmap --topic "question" . # research a specific question → .dev/research/
|
|
118
|
+
autopilot roadmap --topic-file brief.md . # same, from a file
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### `plan`
|
|
122
|
+
|
|
123
|
+
Runs the planner → critic → judge pipeline and writes an approved `.dev/sprint.md`.
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
autopilot plan . # auto-runs roadmap first if it doesn't exist
|
|
127
|
+
autopilot plan --context TODO.md . # seed with a spec or todo list, skip research
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
The critic reviews the plan adversarially. The judge evaluates readiness: if NOT_READY, the
|
|
131
|
+
planner revises once with the judge's feedback and the judge re-evaluates. When approved,
|
|
132
|
+
`approved: true` is set in the manifest automatically.
|
|
133
|
+
|
|
134
|
+
### `sprint`
|
|
135
|
+
|
|
136
|
+
Executes the approved `.dev/sprint.md` task manifest. Each task spawns a fresh Claude Code
|
|
137
|
+
session.
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
autopilot sprint .
|
|
141
|
+
autopilot sprint --auto-approve . # skip the approval check
|
|
142
|
+
autopilot sprint --resume . # reset stuck projects, retry failed tasks
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### `build`
|
|
146
|
+
|
|
147
|
+
Shorthand for `plan` + `sprint --auto-approve` in one command.
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
autopilot build .
|
|
151
|
+
autopilot build --context spec.md .
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### `ralph`
|
|
155
|
+
|
|
156
|
+
The fully autonomous outer loop. Requires `.dev/roadmap.md` (run `autopilot roadmap .` first).
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
autopilot ralph .
|
|
160
|
+
autopilot ralph --auto-approve .
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Each iteration: plan a sprint → execute tasks → run `validate` commands from roadmap frontmatter
|
|
164
|
+
→ evaluate whether the goal is met. Stops when:
|
|
165
|
+
- The evaluator returns `GOAL_MET`
|
|
166
|
+
- Tasks fail (appends a deferred investigation task to `roadmap.md`)
|
|
167
|
+
- `max_sprints` is reached
|
|
168
|
+
|
|
169
|
+
### `portfolio`
|
|
170
|
+
|
|
171
|
+
Builds a cross-project index — goal, tech stack, current state, and prioritised quick wins.
|
|
172
|
+
Auto-generates `.dev/roadmap.md` for any project that lacks one before building.
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
autopilot portfolio --scan ~/Projects
|
|
176
|
+
autopilot portfolio path/to/proj-a path/to/proj-b
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
Output: `<scan_dir>/.dev/portfolio.md`.
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## Multi-project scanning
|
|
184
|
+
|
|
185
|
+
Every command works with `--scan` to operate across a directory of projects:
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
autopilot roadmap --scan ~/Projects
|
|
189
|
+
autopilot plan --scan ~/Projects
|
|
190
|
+
autopilot sprint --auto-approve --scan ~/Projects
|
|
191
|
+
autopilot ralph --scan ~/Projects
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
Repos you don't own are skipped by default. Autopilot compares the git remote owner against
|
|
195
|
+
your username (checked in order: `AUTOPILOT_GIT_USER` env, `git config autopilot.user`, `gh`
|
|
196
|
+
CLI auth). Use `--all` to include forks and clones.
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## Configuration
|
|
201
|
+
|
|
202
|
+
### Per-project: `autopilot.toml`
|
|
203
|
+
|
|
204
|
+
```toml
|
|
205
|
+
[autopilot]
|
|
206
|
+
max_budget_usd = 10.0
|
|
207
|
+
max_task_attempts = 3
|
|
208
|
+
max_sprints = 5
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### Global: `~/.config/autopilot/config.toml`
|
|
212
|
+
|
|
213
|
+
Same format. Per-project config takes precedence.
|
|
214
|
+
|
|
215
|
+
### Manifest frontmatter
|
|
216
|
+
|
|
217
|
+
`.dev/sprint.md` and `.dev/roadmap.md` use YAML frontmatter for structured config:
|
|
218
|
+
|
|
219
|
+
| Field | File | Default | Description |
|
|
220
|
+
|-------|------|---------|-------------|
|
|
221
|
+
| `name` | sprint | dir name | Project display name |
|
|
222
|
+
| `approved` | sprint | false | Approval gate — must be true before sprint runs |
|
|
223
|
+
| `status` | sprint | pending | pending / active / stuck / completed |
|
|
224
|
+
| `max_budget_usd` | sprint | 5.0 | Budget cap per sprint |
|
|
225
|
+
| `max_task_attempts` | sprint | 3 | Max retries per failed task |
|
|
226
|
+
| `goal` | roadmap | — | Goal type: launch / publish / complete |
|
|
227
|
+
| `archetype` | roadmap | — | Project archetype (e.g. `python-cli`) |
|
|
228
|
+
| `validate` | roadmap | — | Shell commands that must pass for goal completion |
|
|
229
|
+
|
|
230
|
+
Add `.dev/` to `.gitignore` — it contains orchestration state, not source code.
|
|
231
|
+
|
|
232
|
+
### Task format
|
|
233
|
+
|
|
234
|
+
Tasks in `.dev/sprint.md` are level-3 headings with a checkbox, a slug ID, and an optional
|
|
235
|
+
body:
|
|
236
|
+
|
|
237
|
+
```markdown
|
|
238
|
+
### [ ] create-api-client
|
|
239
|
+
|
|
240
|
+
Implement `src/client.py` with a `get()` and `post()` method using `httpx`.
|
|
241
|
+
Use the base URL from `config.py`. Raise `APIError` on non-2xx responses.
|
|
242
|
+
|
|
243
|
+
**Done**: `pytest tests/test_client.py` passes.
|
|
244
|
+
|
|
245
|
+
---
|
|
246
|
+
|
|
247
|
+
### [ ] add-retry-logic [depends: create-api-client]
|
|
248
|
+
|
|
249
|
+
Add exponential backoff to the client using `tenacity`. Max 3 retries,
|
|
250
|
+
starting at 1s. Log each retry attempt at WARNING level.
|
|
251
|
+
|
|
252
|
+
---
|
|
253
|
+
|
|
254
|
+
### [x] completed-task
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
- IDs are the heading text — must be `lowercase-with-dashes`
|
|
258
|
+
- Dependencies: `[depends: task-id]` or `[depends: a, b]` inline in the heading
|
|
259
|
+
- Retry metadata (`[attempts: N]`, `[status: failed]`, `[error: ...]`) is written by autopilot — don't edit manually
|
|
260
|
+
|
|
261
|
+
---
|
|
262
|
+
|
|
263
|
+
## Agent roles
|
|
264
|
+
|
|
265
|
+
Agent configs live in `src/autopilot/agents/*.md` — YAML frontmatter + system prompt. Sessions
|
|
266
|
+
appear in Claude Code's `/resume` history as `autopilot/projectname/role`.
|
|
267
|
+
|
|
268
|
+
| Role | Invoked by | What it does |
|
|
269
|
+
|------|-----------|--------------|
|
|
270
|
+
| `planner` | `plan`, `build`, `ralph` | Writes `.dev/sprint.md` |
|
|
271
|
+
| `critic` | `plan`, `build`, `ralph` | Reviews the plan adversarially |
|
|
272
|
+
| `judge` | `plan`, `build`, `ralph` | Approves or rejects the plan |
|
|
273
|
+
| `worker` | `sprint`, `build`, `ralph` | Executes a task, commits |
|
|
274
|
+
| `roadmap` | `roadmap`, `ralph` | Writes `.dev/roadmap.md`; evaluates goal completion |
|
|
275
|
+
| `researcher` | (lazy, before `plan`) | Analyses codebase → `.dev/project-summary.md` |
|
|
276
|
+
| `deep-researcher` | `roadmap --deep` | Extended web research before roadmapping |
|
|
277
|
+
| `portfolio` | `portfolio` | Cross-project index → `.dev/portfolio.md` |
|
|
278
|
+
|
|
279
|
+
### Custom roles
|
|
280
|
+
|
|
281
|
+
Drop a markdown file into `agents/` (or use `--agents-dir` to point to a custom directory):
|
|
282
|
+
|
|
283
|
+
```markdown
|
|
284
|
+
---
|
|
285
|
+
name: reviewer
|
|
286
|
+
description: Reviews completed tasks for quality
|
|
287
|
+
allowed_tools: [Read, Glob, Bash, Grep]
|
|
288
|
+
permission_mode: default
|
|
289
|
+
max_turns: 20
|
|
290
|
+
max_budget_usd: 0.50
|
|
291
|
+
---
|
|
292
|
+
|
|
293
|
+
You are a code reviewer. You read recently completed tasks and assess quality...
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
---
|
|
297
|
+
|
|
298
|
+
## Design notes
|
|
299
|
+
|
|
300
|
+
**Why the Agent SDK, not CLI pipes?**
|
|
301
|
+
The SDK wraps Claude Code programmatically — same tools, proper message streaming, error
|
|
302
|
+
handling. Each `query()` call is a fresh Claude Code session with clean context.
|
|
303
|
+
|
|
304
|
+
**Why sequential tasks, not parallel?**
|
|
305
|
+
Simpler to debug, cheaper, and avoids merge conflicts. Parallel execution via git worktrees is
|
|
306
|
+
planned for a future release.
|
|
307
|
+
|
|
308
|
+
**Why a human approval gate?**
|
|
309
|
+
The judge evaluates readiness, but a human must explicitly set `approved: true` (or pass
|
|
310
|
+
`--auto-approve`). This prevents runaway execution on half-baked plans.
|
|
311
|
+
|
|
312
|
+
**Why markdown manifests?**
|
|
313
|
+
The manifest doubles as project documentation. YAML frontmatter gives structured config; the
|
|
314
|
+
markdown body gives context that humans and agents can both read naturally.
|