npm - @jterrats/open-orchestra - Versions diffs - 0.1.0 → 0.3.0 - Mend

@jterrats/open-orchestra 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (208) hide show

package/AGENTS.md +90 -0
package/CHANGELOG.md +104 -0
package/CLAUDE.md +103 -0
package/README.md +173 -22
package/dist/assets/web-console.js +743 -0
package/dist/autonomous-workflow.d.ts +45 -0
package/dist/autonomous-workflow.js +386 -0
package/dist/autonomous-workflow.js.map +1 -0
package/dist/benchmark.d.ts +8 -0
package/dist/benchmark.js +193 -0
package/dist/benchmark.js.map +1 -0
package/dist/burndown.d.ts +3 -0
package/dist/burndown.js +141 -0
package/dist/burndown.js.map +1 -0
package/dist/clarification.d.ts +6 -0
package/dist/clarification.js +88 -0
package/dist/clarification.js.map +1 -0
package/dist/cli.js +221 -4
package/dist/cli.js.map +1 -1
package/dist/collaboration-flows.d.ts +5 -0
package/dist/collaboration-flows.js +256 -0
package/dist/collaboration-flows.js.map +1 -0
package/dist/command-manifest.d.ts +11 -0
package/dist/command-manifest.js +52 -0
package/dist/command-manifest.js.map +1 -0
package/dist/commands.d.ts +39 -0
package/dist/commands.js +1069 -2
package/dist/commands.js.map +1 -1
package/dist/constants.d.ts +4 -0
package/dist/constants.js +22 -0
package/dist/constants.js.map +1 -1
package/dist/defaults.d.ts +7 -11
package/dist/defaults.js +7 -625
package/dist/defaults.js.map +1 -1
package/dist/delegation-decision.d.ts +14 -0
package/dist/delegation-decision.js +391 -0
package/dist/delegation-decision.js.map +1 -0
package/dist/detect-commands.d.ts +3 -0
package/dist/detect-commands.js +28 -0
package/dist/detect-commands.js.map +1 -0
package/dist/diagram-validation.d.ts +36 -0
package/dist/diagram-validation.js +118 -0
package/dist/diagram-validation.js.map +1 -0
package/dist/fs-utils.d.ts +2 -0
package/dist/fs-utils.js +75 -6
package/dist/fs-utils.js.map +1 -1
package/dist/github.d.ts +11 -0
package/dist/github.js +48 -0
package/dist/github.js.map +1 -0
package/dist/health-checks.d.ts +28 -0
package/dist/health-checks.js +219 -0
package/dist/health-checks.js.map +1 -0
package/dist/health-commands.d.ts +2 -0
package/dist/health-commands.js +18 -0
package/dist/health-commands.js.map +1 -0
package/dist/instruction-apply.d.ts +34 -0
package/dist/instruction-apply.js +150 -0
package/dist/instruction-apply.js.map +1 -0
package/dist/instruction-blocks.d.ts +22 -0
package/dist/instruction-blocks.js +120 -0
package/dist/instruction-blocks.js.map +1 -0
package/dist/instruction-imports.d.ts +12 -0
package/dist/instruction-imports.js +45 -0
package/dist/instruction-imports.js.map +1 -0
package/dist/instruction-stale.d.ts +9 -0
package/dist/instruction-stale.js +106 -0
package/dist/instruction-stale.js.map +1 -0
package/dist/instruction-types.d.ts +66 -0
package/dist/instruction-types.js +2 -0
package/dist/instruction-types.js.map +1 -0
package/dist/instruction-updates.d.ts +4 -0
package/dist/instruction-updates.js +5 -0
package/dist/instruction-updates.js.map +1 -0
package/dist/knowledge-base.d.ts +10 -0
package/dist/knowledge-base.js +117 -0
package/dist/knowledge-base.js.map +1 -0
package/dist/mcp-oauth-proxy.d.ts +39 -0
package/dist/mcp-oauth-proxy.js +80 -0
package/dist/mcp-oauth-proxy.js.map +1 -0
package/dist/pr-review.d.ts +20 -0
package/dist/pr-review.js +142 -0
package/dist/pr-review.js.map +1 -0
package/dist/project-detection.d.ts +22 -0
package/dist/project-detection.js +174 -0
package/dist/project-detection.js.map +1 -0
package/dist/prompt-registry.d.ts +56 -0
package/dist/prompt-registry.js +163 -0
package/dist/prompt-registry.js.map +1 -0
package/dist/release-candidate.d.ts +41 -0
package/dist/release-candidate.js +196 -0
package/dist/release-candidate.js.map +1 -0
package/dist/release-commands.d.ts +4 -0
package/dist/release-commands.js +50 -0
package/dist/release-commands.js.map +1 -0
package/dist/roles/ai-support-roles.d.ts +11 -0
package/dist/roles/ai-support-roles.js +67 -0
package/dist/roles/ai-support-roles.js.map +1 -0
package/dist/roles/core-roles.d.ts +11 -0
package/dist/roles/core-roles.js +144 -0
package/dist/roles/core-roles.js.map +1 -0
package/dist/roles/engineering-roles.d.ts +11 -0
package/dist/roles/engineering-roles.js +176 -0
package/dist/roles/engineering-roles.js.map +1 -0
package/dist/roles/governance-roles.d.ts +11 -0
package/dist/roles/governance-roles.js +117 -0
package/dist/roles/governance-roles.js.map +1 -0
package/dist/roles/index.d.ts +11 -0
package/dist/roles/index.js +17 -0
package/dist/roles/index.js.map +1 -0
package/dist/roles/platform-ops-roles.d.ts +11 -0
package/dist/roles/platform-ops-roles.js +158 -0
package/dist/roles/platform-ops-roles.js.map +1 -0
package/dist/roles/qa-ux-roles.d.ts +11 -0
package/dist/roles/qa-ux-roles.js +193 -0
package/dist/roles/qa-ux-roles.js.map +1 -0
package/dist/roles/release-ops-roles.d.ts +11 -0
package/dist/roles/release-ops-roles.js +109 -0
package/dist/roles/release-ops-roles.js.map +1 -0
package/dist/runtime-adapters.d.ts +6 -0
package/dist/runtime-adapters.js +88 -0
package/dist/runtime-adapters.js.map +1 -0
package/dist/runtime-bootstrap.d.ts +12 -0
package/dist/runtime-bootstrap.js +136 -0
package/dist/runtime-bootstrap.js.map +1 -0
package/dist/skills.d.ts +36 -0
package/dist/skills.js +665 -0
package/dist/skills.js.map +1 -0
package/dist/subagent-protocol.d.ts +41 -0
package/dist/subagent-protocol.js +179 -0
package/dist/subagent-protocol.js.map +1 -0
package/dist/telemetry-consent.d.ts +24 -0
package/dist/telemetry-consent.js +95 -0
package/dist/telemetry-consent.js.map +1 -0
package/dist/telemetry-export.d.ts +14 -0
package/dist/telemetry-export.js +126 -0
package/dist/telemetry-export.js.map +1 -0
package/dist/telemetry-records.d.ts +3 -0
package/dist/telemetry-records.js +96 -0
package/dist/telemetry-records.js.map +1 -0
package/dist/telemetry-redaction.d.ts +9 -0
package/dist/telemetry-redaction.js +55 -0
package/dist/telemetry-redaction.js.map +1 -0
package/dist/telemetry-types.d.ts +52 -0
package/dist/telemetry-types.js +2 -0
package/dist/telemetry-types.js.map +1 -0
package/dist/telemetry.d.ts +4 -0
package/dist/telemetry.js +4 -0
package/dist/telemetry.js.map +1 -0
package/dist/types.d.ts +304 -1
package/dist/types.js +1 -1
package/dist/types.js.map +1 -1
package/dist/validation.d.ts +3 -1
package/dist/validation.js +28 -5
package/dist/validation.js.map +1 -1
package/dist/web-api.js +167 -3
package/dist/web-api.js.map +1 -1
package/dist/web-console.js +6 -160
package/dist/web-console.js.map +1 -1
package/dist/workflow-gates.js +4 -2
package/dist/workflow-gates.js.map +1 -1
package/dist/workflow-services.js +143 -67
package/dist/workflow-services.js.map +1 -1
package/dist/workflow-templates.d.ts +10 -0
package/dist/workflow-templates.js +141 -0
package/dist/workflow-templates.js.map +1 -0
package/dist/workspace-classification.d.ts +5 -0
package/dist/workspace-classification.js +127 -0
package/dist/workspace-classification.js.map +1 -0
package/dist/workspace-validator.js +11 -1
package/dist/workspace-validator.js.map +1 -1
package/dist/workspace.d.ts +8 -4
package/dist/workspace.js +111 -4
package/dist/workspace.js.map +1 -1
package/docs/autonomous-workflow.md +165 -0
package/docs/benchmark.md +219 -0
package/docs/dev-team-specialist-role-profiles.md +171 -0
package/docs/mcp-oauth-proxy-evaluation.md +44 -0
package/docs/multi-agent-orchestrator-backlog.md +413 -1
package/docs/open-orchestra-dogfooding-findings.md +66 -0
package/docs/orchestra-mvp.md +161 -3
package/docs/runtime-adapters.md +86 -0
package/docs/runtime-llm-flow.md +124 -0
package/docs/setup-agents-dogfooding-findings.md +101 -0
package/docs/skill-loading-strategy.md +114 -0
package/docs/source-of-truth-and-agent-learning.md +83 -0
package/package.json +9 -5
package/rules/agent-roles.mdc +30 -0
package/rules/ai-assisted-development.mdc +22 -0
package/skills/agent-learning/SKILL.md +24 -0
package/skills/agent-learning/manifest.json +40 -0
package/skills/backlog-sync/SKILL.md +24 -0
package/skills/backlog-sync/manifest.json +41 -0
package/skills/diagram-export/SKILL.md +35 -0
package/skills/diagram-export/manifest.json +40 -0
package/skills/model-evaluation/SKILL.md +25 -0
package/skills/model-evaluation/manifest.json +41 -0
package/skills/playwright-evidence/SKILL.md +28 -0
package/skills/playwright-evidence/manifest.json +46 -0
package/skills/pr-review/SKILL.md +23 -0
package/skills/pr-review/manifest.json +43 -0
package/skills/prompt-registry/SKILL.md +24 -0
package/skills/prompt-registry/manifest.json +45 -0
package/skills/release-readiness/SKILL.md +25 -0
package/skills/release-readiness/manifest.json +45 -0
package/skills/source-of-truth/SKILL.md +24 -0
package/skills/source-of-truth/manifest.json +47 -0
package/skills/static-analysis/SKILL.md +26 -0
package/skills/static-analysis/manifest.json +46 -0

package/AGENTS.md CHANGED Viewed

@@ -149,3 +149,93 @@
 - Ask for **Backlog Item ID** first. Use Conventional Commits with backlog scope: `type(ID): short description`.
 - Mark breaking changes with `!` and a `BREAKING CHANGE:` footer.
 - Keep PRs <400 lines. Review your own diff before requesting review.
+<!-- open-orchestra:start block-id="runtime-bootstrap" generator="open-orchestra runtime bootstrap" version="1" target="codex" source-manifest="open-orchestra command-manifest,runtime-bootstrap" content-sha256="c215b82bf89b09651384dbee3c7bd8c4e66cab148adc844b4979788848017be5" updated-at="2026-05-06T22:12:02.623Z" -->
+# Open Orchestra Runtime Bootstrap
+Runtime target: Codex. Reference Open Orchestra from AGENTS.md so local CLI work follows workflow gates.
+Use Open Orchestra as the local control plane when `.agent-workflow/` exists.
+The active LLM runtime is the parent agent. Do not assume automatic real subagent spawning or real provider execution.
+## Orchestra Workflow — Required for All Work
+Every piece of work — feature, bug fix, architecture decision, stack definition, PO refinement, or research spike —
+MUST go through the Orchestra workflow. Do not start any work without a registered task and a running workflow.
+### Step 1 — Register the task
+```
+orchestra task add --id <ID> --title "<title>" --owner <role> --paths "<files>" --goal "<goal>"
+```
+Use the correct owner role for the type of work:
+- Architecture / stack decisions → `architect`
+- Product strategy / roadmap → `product_manager`
+- Backlog refinement / acceptance criteria → `product_owner`
+- Implementation → `developer`
+- Verification / QA → `qa`
+- Release / deploy → `release_manager`
+### Step 2 — Declare effort baseline
+```
+orchestra estimate --task <ID> --sizing <xs|s|m|l|xl> --solo-days <N> --ai-unguided-days <N>
+```
+### Step 3 — Run the autonomous workflow
+```
+orchestra workflow run --task <ID> --gates phase
+```
+The workflow sequences PM → PO → Architect → Developer → QA → Release.
+Gates pause at `po→architect` and `qa→release` for human review.
+The architect phase requires a sizing decision before proceeding:
+```
+orchestra decision add --task <ID> --owner architect --title "Story sizing" \
+  --decision "<xs|s|m|l|xl> [N points]" --context "..." --consequences "..." --status accepted
+```
+### Step 4 — Collaborate through the phases
+Each phase routes work to the right role. Pass your comments, requirements, or context via:
+- `orchestra decision add` — architecture decisions, stack choices, accepted trade-offs
+- `orchestra review` — review findings from any role
+- `orchestra workflow clarify` — blocking questions from developer/QA to PO or architect
+- `orchestra evidence add` — artifacts, commands run, test results
+### Step 5 — Resume after gates
+```
+orchestra workflow run --task <ID> --resume <run-id>
+```
+### Step 6 — Benchmark after completion
+```
+orchestra benchmark --task <ID>
+```
+## Active Work
+- Run `orchestra health --json`.
+- Run `orchestra task list --json` and identify the active task.
+- For the active task, run context, delegation, plan, skills, protocol, and workflow render commands.
+## Task Loop
+- `orchestra health` - Check local tools and workflow readiness.
+- `orchestra task list` - List local workflow tasks.
+- `orchestra context --task <id>` - Read task context bundle.
+- `orchestra delegation decide --task <id>` - Decide whether to delegate.
+- `orchestra plan --task <id>` - Render role execution plan.
+- `orchestra skills plan --task <id>` - Select task-scoped skills.
+- `orchestra skills render --target <target>` - Render skills for a runtime.
+- `orchestra protocol render` - Render subagent protocol.
+- `orchestra workflow render --task <id>` - Render workflow templates.
+- `orchestra evidence add --task <id> --role <role> --type <type> --summary <text>` - Record delivery evidence.
+- `orchestra review --task <id> --role <role> --result <approve|block|changes> --findings <text> --recommendation <text>` - Record reviewer outcome.
+- `orchestra gate --gate <id> --task <id>` - Evaluate workflow gate.
+- `orchestra summary` - Summarize workspace state.
+## Completion
+- Run the project validation gate.
+- Record command/file/browser evidence with `orchestra evidence add`.
+- Record review outcome with `orchestra review`.
+- Update task status only after evidence and review are present.
+## Command Discovery
+- Use `orchestra commands manifest --json` for command metadata.
+- Use `orchestra --help` for human-readable help.
+<!-- open-orchestra:end block-id="runtime-bootstrap" -->

package/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,104 @@
+# Changelog
+## Unreleased
+## 0.3.0 - 2026-05-06
+### Added
+- **Autonomous workflow engine** (`workflow run`) executes a full story lifecycle as a governed multi-phase sequence: PM → PO → Architect → Developer → QA → Release. State is persisted in `.agent-workflow/workflow-runs.jsonl` (append-only).
+- `--gates none|phase|all` controls how many human approval checkpoints are required. `phase` mode gates at `po→architect` and `qa→release`; `all` gates every transition; `none` runs fully autonomously.
+- `--dry-run` prints the phase graph with gate annotations without creating any records.
+- `--resume <run-id>` continues a paused or clarification-suspended run from the correct phase.
+- **Architect sizing gate**: always enforced regardless of `--gates` mode. The architect must record a sizing decision (`xs/s/m/l/xl`) before the developer phase starts. If missing, the run fails with the exact `orchestra decision add` command to resolve it.
+- `orchestra workflow runs` lists all runs with status and phase trace.
+- **Clarification loop** (`workflow clarify`, `workflow clarify-respond`, `workflow clarify-list`): developer and QA phases can surface blocking questions to the PO or architect mid-phase. The active phase is suspended until the answer is recorded, then resumed with `--resume`. Records persisted in `.agent-workflow/clarifications.jsonl`.
+- **Effectiveness benchmark** (`orchestra estimate` + `orchestra benchmark`): PM declares solo and AI-unguided baselines once at story start; actual cycle time is computed automatically from autonomous run timestamps after the release phase completes. `orchestra benchmark --summary` prints a sortable table across all stories.
+- **Quality signals** automatically collected from the event log per story: review count, blocking reviews, evidence artifacts, gate blocks, lessons generated, model tokens, and estimated cost. Appear in `orchestra benchmark` output with no additional input.
+- **Sprint burndown** (`orchestra burndown --sprint <task-ids-csv>`): computes ideal vs actual lines from story point estimates. Developer points take priority over architect sizing; falls back gracefully. ASCII chart + `--json` output.
+- **Developer story point estimation**: developer records implementation effort via `orchestra decision add` with `actor=developer` and `metadata.points` — same pattern as architect sizing, distinguishable by `actor` field. Used as primary input for burndown; creates a calibration signal when both architect and developer estimates exist.
+- **QA→Developer retry loop**: when a QA phase closes with `qa_fail`, the workflow routes back to the developer phase with failure notes as context. Each retry creates a numbered sub-task (`TASK-developer-<run-suffix>-retry1`). The `maxIterations` cap prevents infinite loops.
+- **GitHub PR auto-creation** (`github.autoCreatePr: true` in config): after the release phase completes, automatically creates a PR via `gh pr create` with title and body generated from `pr-summary` data. PR URL is recorded as an evidence artifact. Failure is non-fatal — the run completes with a warning.
+- **Enriched runtime bootstrap block**: `orchestra init` now injects an "Orchestra Workflow — Required for All Work" section into `CLAUDE.md`, `AGENTS.md`, Cursor rules, Windsurf rules, and VS Code files covering any work type: architecture decisions, stack definition, PO refinement, product strategy, implementation, QA, and release. Correct owner role guidance included per work type.
+### Fixed
+- `resumePhaseIndex` now scans past `gate_paused` phases instead of returning immediately, preventing re-execution of already-completed phases on runs with multiple sequential gates.
+- `recordDecision` now extracts `sizing` and `points` from the `--decision` text for architect decisions (pattern: `"<xs|s|m|l|xl> [N points]"`), so the architect sizing gate reads them correctly via the standard CLI flow.
+- Phase sub-task IDs are now run-scoped (`TASK-phase-<run-suffix>`) preventing collisions when multiple workflow runs target the same task.
+### Documentation
+- README rewritten to lead with the autonomous workflow feature, clarification loop, benchmark, and burndown sections.
+- `docs/autonomous-workflow.md`: new reference page for the workflow engine, gate modes, sizing gate, clarification loop, and run/phase state reference.
+- `docs/benchmark.md`: new reference page for estimate, benchmark, burndown, quality signals, and developer story points.
+- `docs/orchestra-mvp.md` updated with all new sections, command examples, revised workflow files layout, and current scope.
+### Validation
+- Dogfooded using Orchestra itself: ORCH-017 and ORCH-022 were developed as autonomous workflow runs, surfacing and fixing the sizing extraction and task ID collision bugs during the process.
+## 0.2.2 - 2026-05-06
+### Changed
+- Added `orchestra --version`, `orchestra -v`, and `orchestra version --json`
+  for install and release verification.
+- Release automation now creates GitHub Releases from CI-created tags.
+- CI workflows use Node 24-compatible official GitHub Actions.
+### Validation
+- Validates the automated tag, GitHub Release, and npm publication chain.
+## 0.2.1 - 2026-05-06
+### Fixed
+- Generated runtime bootstrap guidance now uses the portable installed
+  executable form, `orchestra ...`, instead of repo-local
+  `node bin/orchestra.js ...` examples.
+- Skill render no longer creates partial `.agent-workflow/` state before
+  `orchestra init`, preventing CI validation failures in freshly cloned repos.
+- File-lock writes now create missing parent directories before locking, making
+  event recording safer in new workspaces.
+- Concurrent `evidence add` calls now create collision-resistant artifact
+  filenames instead of overwriting evidence recorded in the same millisecond.
+### Validation
+- Added regression coverage for portable bootstrap command examples, missing
+  lock parent directories, avoiding partial workflow state before init, and
+  concurrent evidence artifact creation.
+## 0.2.0 - 2026-05-06
+### Added
+- Runtime adapter catalog for `generic`, `claude`, `cursor`, `codex`, `vscode`,
+  and `windsurf`.
+- `orchestra runtime adapters --json` for clients and LLM runtimes that need
+  stable adapter metadata.
+- Target-specific init support with `orchestra init --target <csv>`.
+- Advisory init support for explicit runtime targets without default root
+  instruction writes.
+- Workspace classification and runtime adapter visibility in the web console.
+- Runtime adapter documentation covering Claude, Codex, Cursor, VS Code,
+  Windsurf, and generic LLM flows.
+### Changed
+- Runtime bootstrap and subagent protocol guidance now read from the runtime
+  adapter catalog instead of hardcoded target text.
+- Workspace classification uses the adapter catalog for supported target lists.
+- VS Code extension package version aligned with the CLI package.
+### Security
+- Extended workspace-local write guards to instruction apply/block paths.
+- Continued blocking unsafe workspace roots before init writes.
+### Validation
+- Added tests for runtime adapters, target-specific init, advisory target init,
+  workspace classification, and instruction path escape rejection.

package/CLAUDE.md CHANGED Viewed

@@ -2,6 +2,19 @@
 > Stack-agnostic guardrails for all projects. Managed by Open Orchestra.
+## Orchestra Workflow — REQUIRED
+This repo uses Open Orchestra for all development work. Every feature or fix MUST go through the workflow:
+1. **Task registration**: `orchestra task add --id <ID> --title "..." --owner <role> --paths "..." --goal "..."` before any code is written.
+2. **Effort baseline**: `orchestra estimate --task <ID> --sizing <xs|s|m|l|xl> --solo-days N --ai-unguided-days N` before starting implementation.
+3. **Autonomous run**: `orchestra workflow run --task <ID> --gates phase` to execute the PM→PO→Architect→Developer→QA→Release phase sequence.
+4. **Sizing decision**: `orchestra decision add --task <ID> --owner architect --title "Story sizing" --decision "<sizing> [N points]" ...` is required before the developer phase proceeds.
+5. **Architect sizing format**: always use `"<xs|s|m|l|xl> [N points]"` in `--decision` so the sizing gate reads it correctly (e.g. `"m [5 points]"`).
+6. **Benchmark**: `orchestra benchmark --task <ID>` after completion to record actual vs declared effort.
+Do NOT start implementation without a registered task and workflow run. Use `orchestra workflow runs` to check active runs. Use `orchestra benchmark --summary` to review effectiveness.
 ## Rule Composition
 - Keep root files concise. Put detailed guidance in focused `rules/*.mdc` files and reference only the operating principles here.
@@ -155,3 +168,93 @@
 - Concise, but detailed in architectural justifications.
 - Correct mistakes directly without apologizing.
 - **No Ninja Edits.** Summarize proposed changes and get agreement before modifying files.
+<!-- open-orchestra:start block-id="runtime-bootstrap" generator="open-orchestra runtime bootstrap" version="1" target="claude" source-manifest="open-orchestra command-manifest,runtime-bootstrap" content-sha256="af9e26eb55d14bc7d504c357f6fe6e996eaa2ea1ffb45a830afa15b1885fff8a" updated-at="2026-05-06T22:12:14.883Z" -->
+# Open Orchestra Runtime Bootstrap
+Runtime target: Claude. Reference compact Open Orchestra blocks from CLAUDE.md and load task skills on demand.
+Use Open Orchestra as the local control plane when `.agent-workflow/` exists.
+The active LLM runtime is the parent agent. Do not assume automatic real subagent spawning or real provider execution.
+## Orchestra Workflow — Required for All Work
+Every piece of work — feature, bug fix, architecture decision, stack definition, PO refinement, or research spike —
+MUST go through the Orchestra workflow. Do not start any work without a registered task and a running workflow.
+### Step 1 — Register the task
+```
+orchestra task add --id <ID> --title "<title>" --owner <role> --paths "<files>" --goal "<goal>"
+```
+Use the correct owner role for the type of work:
+- Architecture / stack decisions → `architect`
+- Product strategy / roadmap → `product_manager`
+- Backlog refinement / acceptance criteria → `product_owner`
+- Implementation → `developer`
+- Verification / QA → `qa`
+- Release / deploy → `release_manager`
+### Step 2 — Declare effort baseline
+```
+orchestra estimate --task <ID> --sizing <xs|s|m|l|xl> --solo-days <N> --ai-unguided-days <N>
+```
+### Step 3 — Run the autonomous workflow
+```
+orchestra workflow run --task <ID> --gates phase
+```
+The workflow sequences PM → PO → Architect → Developer → QA → Release.
+Gates pause at `po→architect` and `qa→release` for human review.
+The architect phase requires a sizing decision before proceeding:
+```
+orchestra decision add --task <ID> --owner architect --title "Story sizing" \
+  --decision "<xs|s|m|l|xl> [N points]" --context "..." --consequences "..." --status accepted
+```
+### Step 4 — Collaborate through the phases
+Each phase routes work to the right role. Pass your comments, requirements, or context via:
+- `orchestra decision add` — architecture decisions, stack choices, accepted trade-offs
+- `orchestra review` — review findings from any role
+- `orchestra workflow clarify` — blocking questions from developer/QA to PO or architect
+- `orchestra evidence add` — artifacts, commands run, test results
+### Step 5 — Resume after gates
+```
+orchestra workflow run --task <ID> --resume <run-id>
+```
+### Step 6 — Benchmark after completion
+```
+orchestra benchmark --task <ID>
+```
+## Active Work
+- Run `orchestra health --json`.
+- Run `orchestra task list --json` and identify the active task.
+- For the active task, run context, delegation, plan, skills, protocol, and workflow render commands.
+## Task Loop
+- `orchestra health` - Check local tools and workflow readiness.
+- `orchestra task list` - List local workflow tasks.
+- `orchestra context --task <id>` - Read task context bundle.
+- `orchestra delegation decide --task <id>` - Decide whether to delegate.
+- `orchestra plan --task <id>` - Render role execution plan.
+- `orchestra skills plan --task <id>` - Select task-scoped skills.
+- `orchestra skills render --target <target>` - Render skills for a runtime.
+- `orchestra protocol render` - Render subagent protocol.
+- `orchestra workflow render --task <id>` - Render workflow templates.
+- `orchestra evidence add --task <id> --role <role> --type <type> --summary <text>` - Record delivery evidence.
+- `orchestra review --task <id> --role <role> --result <approve|block|changes> --findings <text> --recommendation <text>` - Record reviewer outcome.
+- `orchestra gate --gate <id> --task <id>` - Evaluate workflow gate.
+- `orchestra summary` - Summarize workspace state.
+## Completion
+- Run the project validation gate.
+- Record command/file/browser evidence with `orchestra evidence add`.
+- Record review outcome with `orchestra review`.
+- Update task status only after evidence and review are present.
+## Command Discovery
+- Use `orchestra commands manifest --json` for command metadata.
+- Use `orchestra --help` for human-readable help.
+<!-- open-orchestra:end block-id="runtime-bootstrap" -->

package/README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 Open Orchestra is a local-first, provider-agnostic framework for governed multi-agent software delivery.
-It coordinates agents through task graphs, durable workflow files, handoffs, reviews, evidence, gates, locks, model routing, model provenance, and budget controls. The public CLI is `orchestra`.
+It coordinates agents through task graphs, durable workflow files, handoffs, reviews, evidence, gates, locks, model routing, model provenance, budget controls, and an autonomous multi-phase workflow engine. The public CLI is `orchestra`.
 ## Quick Start
@@ -14,40 +14,191 @@ node bin/orchestra.js status
 node bin/orchestra.js roles list --json
 ```
+For Claude, Codex, Cursor, VS Code, Windsurf, and generic LLM usage, see
+[docs/runtime-llm-flow.md](docs/runtime-llm-flow.md) and
+[docs/runtime-adapters.md](docs/runtime-adapters.md).
+## Autonomous Workflow
+The `workflow run` command executes a full story lifecycle as a governed multi-phase sequence without manual step-by-step commands. Each phase creates a sub-task, generates handoff artifacts, and persists state in an append-only run log.
+```
+PM → PO [gate] → Architect [sizing gate] → Developer → QA [gate] → Release
+```
+```bash
+# Dry run — inspect the phase graph and gate annotations without persisting state
+orchestra workflow run --task FEAT-001 --dry-run --gates phase
+# Full autonomous run, no human gates
+orchestra workflow run --task FEAT-001 --gates none
+# Run with human approval gates at po→architect and qa→release
+orchestra workflow run --task FEAT-001 --gates phase
+# Resume a paused or clarification-suspended run
+orchestra workflow run --task FEAT-001 --resume <run-id>
+# List all runs with status and phase trace
+orchestra workflow runs
+```
+**Gate modes:**
+| Mode | Gates |
+|------|-------|
+| `none` | Fully autonomous — no human approval required |
+| `phase` | Pauses at `po→architect` and `qa→release` |
+| `all` | Pauses at every phase transition |
+**Architect sizing gate** is always enforced regardless of `--gates` mode. The architect must record a sizing decision (`xs/s/m/l/xl`) before the developer phase starts. If missing, the run fails with the exact command to resolve it.
+### Clarification Loop
+Developers or QA engineers can surface blocking questions to the PO or architect mid-phase without stopping the workflow or making unvalidated assumptions.
+```bash
+# Developer asks PO a question (suspends the current phase)
+orchestra workflow clarify --run <run-id> --from developer --to po \
+  --question "Should empty input return null or throw?"
+# PO answers (resumes the phase)
+orchestra workflow clarify-respond --run <run-id> --clarification <id> \
+  --answer "Return null — downstream code handles it."
+# Resume execution after the answer is recorded
+orchestra workflow run --task FEAT-001 --resume <run-id>
+# Inspect all clarifications for a run
+orchestra workflow clarify-list --run <run-id>
+```
+Clarifications are persisted in `.agent-workflow/clarifications.jsonl` and visible in task context.
+## Benchmark & Sprint Burndown
+Open Orchestra measures effectiveness across three development modes and generates a sprint burndown from story point estimates.
+### Estimate (declare baselines once, at story start)
+```bash
+orchestra estimate \
+  --task FEAT-001 \
+  --sizing m \
+  --solo-days 5 \
+  --ai-unguided-days 3 \
+  --confidence high
+```
+### Benchmark (auto-computed after run completes)
+```bash
+# Per-story report: cycle time, savings %, quality signals
+orchestra benchmark --task FEAT-001
+# Sprint summary table
+orchestra benchmark --summary
+```
+Example output:
+```
+Benchmark: FEAT-001  [complete]
+  Sizing:      m
+  Solo:        5d  (declared)
+  AI-unguided: 3d  (declared)
+  Actual:      1.4d
+  vs Solo:     -72%
+  vs AI:       -53%
+  QA loops:    1
+  Reviews:     3 (0 blocking)
+  Evidence:    5 artifacts
+  Lessons:     2
+  Tokens:      17500in / 5000out
+  Cost:        $0.0257
+```
+Quality signals (reviews, evidence, lessons, gate blocks, token usage, cost) are read automatically from the event log — no manual input after the initial estimate.
+### Sprint Burndown
+Developer story points take priority over architect sizing; falls back to architect if developer hasn't estimated yet.
+```bash
+# ASCII chart + task breakdown
+orchestra burndown --sprint FEAT-001,FEAT-002,FEAT-003
+# JSON series for dashboards
+orchestra burndown --sprint FEAT-001,FEAT-002,FEAT-003 --json
+```
+Developer records their own estimate with:
+```bash
+orchestra decision add \
+  --task FEAT-001 \
+  --owner developer \
+  --title "Dev estimate" \
+  --decision "M / 8 points" \
+  --context "..." --consequences "..." --status accepted
+```
+See [docs/benchmark.md](docs/benchmark.md) for the full reference.
 ## Role Catalog
 Open Orchestra treats roles as capabilities and governance responsibilities, not only human job titles. Projects can keep roles inactive until risk, scope, impact area, or a workflow gate requires them.
-Core delivery roles remain available: Product Manager, Product Owner, Business Analyst, Architect, Developer, QA, Security, DevOps, SRE, DBA, UX/UI Designer, Release Manager, Compliance/Privacy, and Technical Writer.
+Core delivery roles: Product Manager, Product Owner, Business Analyst, Architect, Developer, QA, Security, DevOps, SRE, DBA, Data Engineer, UX/UI Designer, Accessibility Reviewer, Release Manager, Compliance/Privacy, Technical Writer, Tech Lead, SDET, Platform Engineer, Frontend Specialist, Backend Specialist, Mobile Specialist, AI Evaluation Engineer, and Support/Customer Operations.
+Orchestration roles for modern multi-agent systems:
+- Parent Agent / Orchestrator — sequencing, handoffs, locks, escalation, integration.
+- Planner — work breakdown, dependency mapping, role activation rationale.
+- Reviewer / Critic — independent review before gates or handoffs.
+- Toolsmith / Integration Engineer — tools, MCPs, providers, adapters, automation contracts.
+- Context Curator / Memory Manager — decisions, assumptions, stale context, shared memory hygiene.
+- Policy / Governance Agent — approvals, budgets, workflow rules, compliance gates.
+- Observability / Incident Response — telemetry, alerts, runbooks, incident readiness.
+- Data / Privacy Officer — PII, retention, encryption, access, data compliance.
+- Domain Expert — project-specific business or industry judgment.
+- Performance Engineer — load, latency, scalability, caching, concurrency, graceful degradation.
+- Game Designer — gameplay loops, tutorialization, player feedback, balance risk.
+Each default role declares activation criteria, expected evidence, and gate participation so a parent agent can select only the roles needed for a task. See [docs/dev-team-specialist-role-profiles.md](docs/dev-team-specialist-role-profiles.md) for specialist profiles.
+## Workflow Files
+```text
+.agent-workflow/
+  config.json
+  roles.json
+  tasks.json
+  locks.json
+  events.jsonl
+  workflow-runs.jsonl       ← autonomous run state (append-only)
+  clarifications.jsonl      ← clarification loop records (append-only)
+  estimates.jsonl           ← declared effort baselines (append-only)
+  approvals/
+  decisions/
+  handoffs/
+  evidence/
+  reviews/
+  runs/
+```
-State-of-the-art orchestration roles are also included:
+## Skills and Context Loading
-- Parent Agent / Orchestrator for sequencing, handoffs, locks, escalation, and integration.
-- Planner for work breakdown, dependency mapping, and role activation rationale.
-- Reviewer / Critic for independent review before gates or handoffs.
-- Toolsmith / Integration Engineer for tools, MCPs, providers, adapters, and automation contracts.
-- Context Curator / Memory Manager for decisions, assumptions, stale context, and shared memory hygiene.
-- Policy / Governance Agent for approvals, budgets, workflow rules, and compliance gates.
-- Observability / Incident Response for telemetry, alerts, runbooks, and incident readiness.
-- Data / Privacy Officer for PII, retention, encryption, access, and data compliance.
-- Domain Expert for project-specific business or industry judgment.
-- UX Researcher / Accessibility Reviewer for mobile-first UX, accessibility, onboarding, and game/player guidance.
-- Performance Engineer for load, latency, scalability, caching, concurrency, and graceful degradation.
-- Game Designer for gameplay loops, tutorialization, player feedback, and balance risk.
+Primary instruction files should stay short. Detailed procedures live in task-scoped skills loaded only when needed. See [docs/skill-loading-strategy.md](docs/skill-loading-strategy.md) for the manifest, loading flow, and built-in skill candidates.
-Each default role declares activation criteria, expected evidence, and gate participation so a parent agent can select only the roles needed for a task.
+## Prompt Registry
-## VS Code Control Center
+Open Orchestra scaffolds a stack-agnostic `.generated-prompts/` registry during `orchestra init`. Agents use it to preserve prompt intent and generation conventions without bloating main instruction files. Split by artifact type: code, UI, services, tests, CI/CD, docs, diagrams, and evals.
-The first VS Code extension scaffold lives in `extensions/vscode-open-orchestra`. It provides an Open Orchestra activity bar view that consumes stable CLI JSON contracts for status, validation, graph plan, roles, approvals, evidence, config inspection, and Playwright evidence attachment. The CLI remains the source of truth; the extension does not parse human-readable terminal output or duplicate workflow file logic.
+## VS Code Control Center
-Initial local usage:
+The VS Code extension scaffold lives in `extensions/vscode-open-orchestra`. It consumes stable CLI JSON contracts for status, validation, graph plan, roles, approvals, evidence, and config inspection. The CLI remains the source of truth.
 ```bash
-# From this repo, build the CLI first
 npm run build
-# Open the extension folder in VS Code and run the extension host
 code extensions/vscode-open-orchestra
 ```
@@ -57,4 +208,4 @@ code extensions/vscode-open-orchestra
 - Existing `AGENTS.md`, `CLAUDE.md`, Cursor rules, and generated instruction files remain supported.
 - `ORCHESTRA.md` is the intended future primary guide name and can coexist with current agent instruction files.
-See [docs/orchestra-mvp.md](docs/orchestra-mvp.md) for the current command reference.
+See [docs/orchestra-mvp.md](docs/orchestra-mvp.md) for the full command reference.