npm - @jterrats/open-orchestra - Versions diffs - 0.5.5 → 1.0.1 - Mend

@jterrats/open-orchestra 0.5.5 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (310) hide show

package/AGENTS.md +9 -8
package/CLAUDE.md +13 -11
package/README.md +78 -11
package/dist/assets/web-console.js +203 -36
package/dist/automation-evidence.d.ts +23 -0
package/dist/automation-evidence.js +97 -0
package/dist/automation-evidence.js.map +1 -0
package/dist/autonomous-run-state.d.ts +4 -1
package/dist/autonomous-run-state.js +8 -2
package/dist/autonomous-run-state.js.map +1 -1
package/dist/autonomous-run-store.d.ts +3 -1
package/dist/autonomous-run-store.js +9 -3
package/dist/autonomous-run-store.js.map +1 -1
package/dist/autonomous-workflow-constants.js +5 -1
package/dist/autonomous-workflow-constants.js.map +1 -1
package/dist/benchmark.d.ts +4 -1
package/dist/benchmark.js +140 -19
package/dist/benchmark.js.map +1 -1
package/dist/cli.js +88 -2
package/dist/cli.js.map +1 -1
package/dist/collaboration-flows.js +5 -19
package/dist/collaboration-flows.js.map +1 -1
package/dist/collection-utils.d.ts +3 -0
package/dist/collection-utils.js +10 -0
package/dist/collection-utils.js.map +1 -0
package/dist/command-manifest.d.ts +12 -1
package/dist/command-manifest.js +218 -10
package/dist/command-manifest.js.map +1 -1
package/dist/commands.d.ts +14 -6
package/dist/commands.js +78 -28
package/dist/commands.js.map +1 -1
package/dist/config-migrations.d.ts +24 -0
package/dist/config-migrations.js +102 -0
package/dist/config-migrations.js.map +1 -0
package/dist/constants.d.ts +3 -0
package/dist/constants.js +26 -0
package/dist/constants.js.map +1 -1
package/dist/cursor-canvas.d.ts +20 -0
package/dist/cursor-canvas.js +119 -0
package/dist/cursor-canvas.js.map +1 -0
package/dist/dashboard-commands.d.ts +2 -0
package/dist/dashboard-commands.js +14 -0
package/dist/dashboard-commands.js.map +1 -0
package/dist/defaults.d.ts +13 -0
package/dist/defaults.js +13 -0
package/dist/defaults.js.map +1 -1
package/dist/delegation-decision.js +23 -8
package/dist/delegation-decision.js.map +1 -1
package/dist/delivery-commands.js +5 -0
package/dist/delivery-commands.js.map +1 -1
package/dist/delivery-dashboard-charts.d.ts +4 -0
package/dist/delivery-dashboard-charts.js +156 -0
package/dist/delivery-dashboard-charts.js.map +1 -0
package/dist/delivery-dashboard-html.d.ts +2 -0
package/dist/delivery-dashboard-html.js +115 -0
package/dist/delivery-dashboard-html.js.map +1 -0
package/dist/delivery-dashboard-types.d.ts +78 -0
package/dist/delivery-dashboard-types.js +2 -0
package/dist/delivery-dashboard-types.js.map +1 -0
package/dist/delivery-dashboard.d.ts +8 -0
package/dist/delivery-dashboard.js +124 -0
package/dist/delivery-dashboard.js.map +1 -0
package/dist/doc-sync.d.ts +25 -0
package/dist/doc-sync.js +79 -0
package/dist/doc-sync.js.map +1 -0
package/dist/effort-classification.d.ts +7 -0
package/dist/effort-classification.js +72 -0
package/dist/effort-classification.js.map +1 -0
package/dist/extension-commands.d.ts +3 -0
package/dist/extension-commands.js +40 -0
package/dist/extension-commands.js.map +1 -0
package/dist/extensions.d.ts +22 -0
package/dist/extensions.js +126 -0
package/dist/extensions.js.map +1 -0
package/dist/gemini-provider.d.ts +3 -6
package/dist/gemini-provider.js +8 -17
package/dist/gemini-provider.js.map +1 -1
package/dist/github.d.ts +2 -0
package/dist/github.js +15 -3
package/dist/github.js.map +1 -1
package/dist/health-checks.js +51 -0
package/dist/health-checks.js.map +1 -1
package/dist/lucid-story-map.d.ts +73 -0
package/dist/lucid-story-map.js +112 -0
package/dist/lucid-story-map.js.map +1 -0
package/dist/mcp-integrations.d.ts +19 -0
package/dist/mcp-integrations.js +58 -0
package/dist/mcp-integrations.js.map +1 -0
package/dist/mcp-tool-adapter.d.ts +21 -0
package/dist/mcp-tool-adapter.js +56 -0
package/dist/mcp-tool-adapter.js.map +1 -0
package/dist/metrics-commands.js +47 -13
package/dist/metrics-commands.js.map +1 -1
package/dist/model-commands.d.ts +5 -0
package/dist/model-commands.js +95 -1
package/dist/model-commands.js.map +1 -1
package/dist/model-providers.d.ts +5 -12
package/dist/model-providers.js +30 -43
package/dist/model-providers.js.map +1 -1
package/dist/network-policy.d.ts +2 -0
package/dist/network-policy.js +6 -0
package/dist/network-policy.js.map +1 -0
package/dist/ollama-provider.d.ts +3 -6
package/dist/ollama-provider.js +7 -16
package/dist/ollama-provider.js.map +1 -1
package/dist/package-update-check.d.ts +19 -0
package/dist/package-update-check.js +24 -0
package/dist/package-update-check.js.map +1 -1
package/dist/phase-executor.d.ts +1 -0
package/dist/phase-executor.js +401 -9
package/dist/phase-executor.js.map +1 -1
package/dist/phase-playbooks.d.ts +18 -1
package/dist/phase-playbooks.js +146 -2
package/dist/phase-playbooks.js.map +1 -1
package/dist/planning-commands.d.ts +1 -0
package/dist/planning-commands.js +36 -36
package/dist/planning-commands.js.map +1 -1
package/dist/policy-commands.d.ts +2 -0
package/dist/policy-commands.js +29 -0
package/dist/policy-commands.js.map +1 -0
package/dist/policy-defaults.d.ts +2 -0
package/dist/policy-defaults.js +42 -0
package/dist/policy-defaults.js.map +1 -0
package/dist/policy.d.ts +20 -0
package/dist/policy.js +155 -0
package/dist/policy.js.map +1 -0
package/dist/project-detection.js +9 -7
package/dist/project-detection.js.map +1 -1
package/dist/prompt-registry-update.d.ts +2 -0
package/dist/prompt-registry-update.js +5 -1
package/dist/prompt-registry-update.js.map +1 -1
package/dist/prompt-registry-validation.d.ts +3 -0
package/dist/prompt-registry-validation.js +61 -21
package/dist/prompt-registry-validation.js.map +1 -1
package/dist/provider-utils.d.ts +11 -0
package/dist/provider-utils.js +14 -0
package/dist/provider-utils.js.map +1 -1
package/dist/qa-commands.d.ts +2 -0
package/dist/qa-commands.js +18 -0
package/dist/qa-commands.js.map +1 -0
package/dist/qa-coverage.d.ts +24 -0
package/dist/qa-coverage.js +189 -0
package/dist/qa-coverage.js.map +1 -0
package/dist/qa-readiness.d.ts +5 -0
package/dist/qa-readiness.js +26 -0
package/dist/qa-readiness.js.map +1 -0
package/dist/refresh-generated.d.ts +32 -0
package/dist/refresh-generated.js +180 -0
package/dist/refresh-generated.js.map +1 -0
package/dist/release-candidate.d.ts +9 -1
package/dist/release-candidate.js +52 -1
package/dist/release-candidate.js.map +1 -1
package/dist/release-commands.js +161 -8
package/dist/release-commands.js.map +1 -1
package/dist/release-readiness.d.ts +33 -0
package/dist/release-readiness.js +187 -3
package/dist/release-readiness.js.map +1 -1
package/dist/runtime-adapters.d.ts +2 -1
package/dist/runtime-adapters.js +16 -0
package/dist/runtime-adapters.js.map +1 -1
package/dist/runtime-bootstrap.js +1 -1
package/dist/runtime-bootstrap.js.map +1 -1
package/dist/runtime-commands.d.ts +2 -0
package/dist/runtime-commands.js +85 -3
package/dist/runtime-commands.js.map +1 -1
package/dist/runtime-execution-adapters.js +40 -0
package/dist/runtime-execution-adapters.js.map +1 -1
package/dist/runtime-execution-renderer.d.ts +3 -2
package/dist/runtime-execution-renderer.js +46 -8
package/dist/runtime-execution-renderer.js.map +1 -1
package/dist/runtime-execution.d.ts +8 -2
package/dist/runtime-execution.js +109 -11
package/dist/runtime-execution.js.map +1 -1
package/dist/runtime-guardrails.d.ts +26 -0
package/dist/runtime-guardrails.js +168 -0
package/dist/runtime-guardrails.js.map +1 -0
package/dist/setup-agents-import.js +5 -3
package/dist/setup-agents-import.js.map +1 -1
package/dist/skills-catalog.js +1 -0
package/dist/skills-catalog.js.map +1 -1
package/dist/skills-commands.d.ts +5 -0
package/dist/skills-commands.js +79 -2
package/dist/skills-commands.js.map +1 -1
package/dist/skills-memory.d.ts +36 -2
package/dist/skills-memory.js +165 -6
package/dist/skills-memory.js.map +1 -1
package/dist/skills-planning.js +9 -22
package/dist/skills-planning.js.map +1 -1
package/dist/skills-render.js +2 -4
package/dist/skills-render.js.map +1 -1
package/dist/skills.d.ts +1 -1
package/dist/skills.js +1 -1
package/dist/skills.js.map +1 -1
package/dist/sprint-commands.js +2 -1
package/dist/sprint-commands.js.map +1 -1
package/dist/subagent-protocol.js +3 -5
package/dist/subagent-protocol.js.map +1 -1
package/dist/support-commands.d.ts +2 -0
package/dist/support-commands.js +18 -0
package/dist/support-commands.js.map +1 -0
package/dist/support-diagnostics.d.ts +49 -0
package/dist/support-diagnostics.js +86 -0
package/dist/support-diagnostics.js.map +1 -0
package/dist/task-graph-commands.js +6 -14
package/dist/task-graph-commands.js.map +1 -1
package/dist/task-text.d.ts +8 -0
package/dist/task-text.js +18 -0
package/dist/task-text.js.map +1 -0
package/dist/telemetry-redaction.js +8 -1
package/dist/telemetry-redaction.js.map +1 -1
package/dist/tool-commands.d.ts +3 -0
package/dist/tool-commands.js +62 -0
package/dist/tool-commands.js.map +1 -1
package/dist/tracker-adapters.d.ts +71 -0
package/dist/tracker-adapters.js +186 -0
package/dist/tracker-adapters.js.map +1 -0
package/dist/tracker-commands.d.ts +2 -0
package/dist/tracker-commands.js +119 -0
package/dist/tracker-commands.js.map +1 -0
package/dist/types/metrics.d.ts +25 -1
package/dist/types/model-config.d.ts +51 -4
package/dist/types/runtime.d.ts +83 -0
package/dist/types/skills.d.ts +2 -0
package/dist/types/tasks.d.ts +10 -0
package/dist/types/workflow-run.d.ts +35 -0
package/dist/types.d.ts +12 -4
package/dist/types.js.map +1 -1
package/dist/upgrade-commands.js +13 -4
package/dist/upgrade-commands.js.map +1 -1
package/dist/validation.js +2 -2
package/dist/validation.js.map +1 -1
package/dist/visual-validation.d.ts +81 -0
package/dist/visual-validation.js +290 -0
package/dist/visual-validation.js.map +1 -0
package/dist/web-action-security.d.ts +11 -0
package/dist/web-action-security.js +45 -0
package/dist/web-action-security.js.map +1 -0
package/dist/web-api-read-routes.js +115 -3
package/dist/web-api-read-routes.js.map +1 -1
package/dist/web-api.js +507 -5
package/dist/web-api.js.map +1 -1
package/dist/web-artifacts.d.ts +55 -0
package/dist/web-artifacts.js +222 -0
package/dist/web-artifacts.js.map +1 -0
package/dist/web-console/assets/index-C9lx-V42.css +1 -0
package/dist/web-console/assets/index-M3S0g1GK.js +11 -0
package/dist/web-console/index.html +13 -0
package/dist/web-console.js +9 -3
package/dist/web-console.js.map +1 -1
package/dist/web-recovery.d.ts +30 -0
package/dist/web-recovery.js +163 -0
package/dist/web-recovery.js.map +1 -0
package/dist/web-workflow-progress.d.ts +41 -0
package/dist/web-workflow-progress.js +114 -0
package/dist/web-workflow-progress.js.map +1 -0
package/dist/workflow-approval-service.d.ts +2 -1
package/dist/workflow-approval-service.js +83 -4
package/dist/workflow-approval-service.js.map +1 -1
package/dist/workflow-approval-utils.js +13 -3
package/dist/workflow-approval-utils.js.map +1 -1
package/dist/workflow-event-query.d.ts +2 -0
package/dist/workflow-event-query.js +6 -0
package/dist/workflow-event-query.js.map +1 -0
package/dist/workflow-evidence-service.js +18 -9
package/dist/workflow-evidence-service.js.map +1 -1
package/dist/workflow-gates.d.ts +2 -0
package/dist/workflow-gates.js +103 -0
package/dist/workflow-gates.js.map +1 -1
package/dist/workflow-markdown.d.ts +6 -0
package/dist/workflow-markdown.js +25 -0
package/dist/workflow-markdown.js.map +1 -0
package/dist/workflow-phase-planner.d.ts +19 -0
package/dist/workflow-phase-planner.js +133 -0
package/dist/workflow-phase-planner.js.map +1 -0
package/dist/workflow-run-commands.d.ts +1 -0
package/dist/workflow-run-commands.js +247 -20
package/dist/workflow-run-commands.js.map +1 -1
package/dist/workflow-services.d.ts +21 -12
package/dist/workflow-services.js +376 -260
package/dist/workflow-services.js.map +1 -1
package/dist/workflow-task-service.d.ts +11 -0
package/dist/workflow-task-service.js +242 -0
package/dist/workflow-task-service.js.map +1 -0
package/dist/workflow-templates.js +2 -14
package/dist/workflow-templates.js.map +1 -1
package/dist/workspace-validator.js +133 -5
package/dist/workspace-validator.js.map +1 -1
package/dist/workspace.js +10 -2
package/dist/workspace.js.map +1 -1
package/docs/adoption-guide.md +147 -0
package/docs/autonomous-workflow.md +146 -28
package/docs/benchmark.md +17 -9
package/docs/command-contracts.md +18 -1
package/docs/core-command-surface.md +62 -13
package/docs/end-to-end-demo.md +1 -0
package/docs/extension-contracts.md +83 -0
package/docs/orchestra-mvp.md +86 -3
package/docs/persona-workflows.md +32 -0
package/docs/release-test-matrix.md +42 -0
package/docs/runtime-adapters.md +113 -0
package/docs/runtime-llm-flow.md +13 -0
package/docs/setup-agents-applicability-review.md +173 -0
package/docs/skill-loading-strategy.md +1 -0
package/docs/source-of-truth-and-agent-learning.md +14 -0
package/docs/traceability-flow.md +5 -1
package/docs/tracker-adapter-contract.md +10 -1
package/docs/web-console-qa.md +35 -0
package/package.json +12 -6
package/rules/development-engineering.mdc +66 -0
package/skills/doc-sync/SKILL.md +2 -0

package/docs/autonomous-workflow.md CHANGED Viewed

@@ -2,20 +2,43 @@
 `orchestra workflow run` executes a full story lifecycle as a governed multi-phase sequence without requiring manual step-by-step commands. Each phase creates a sub-task, generates handoff artifacts, and persists state in an append-only run log at `.agent-workflow/workflow-runs.jsonl`.
+## End-To-End Lifecycle
+Use the autonomous workflow when a task needs product framing, architecture,
+implementation, QA evidence, and release readiness in one governed trace.
+1. Register or sync the backlog item with a task ID, paths, owner, and
+   acceptance criteria.
+2. Record an estimate and an architect sizing decision before implementation
+   work begins.
+3. Start `orchestra workflow run --task <id> --gates phase`.
+4. Review the PO to Architect gate. Approve it when the story is refined enough
+   for technical design and implementation.
+5. Let Architect, Developer, and QA phases produce handoffs, reviews, evidence,
+   and any clarification records needed to finish the work.
+6. Review the QA to Release gate. Approve it only when validation evidence,
+   unresolved risks, release notes, and rollback expectations are acceptable.
+7. Resume into Release, then run `orchestra benchmark --task <id>` to capture
+   actual delivery data for future estimates.
+The run state, gate artifacts, handoffs, evidence, reviews, decisions, and
+clarifications are persisted under `.agent-workflow/` so the delivery story can
+be audited after the fact.
 ## Phase Graph
 ```
 PM → PO [gate] → Architect [sizing gate] → Developer → QA [gate] → Release
 ```
-| Phase | Role | Summary |
-|-------|------|---------|
-| `pm` | product_manager | Product framing, prioritization, and success metrics |
-| `po` | product_owner | Backlog refinement, story sizing, and acceptance criteria |
-| `architect` | architect | Technical tasking, design decisions, and size estimation |
-| `developer` | developer | Implementation against acceptance criteria |
-| `qa` | qa | Verification against acceptance criteria and edge cases |
-| `release` | release_manager | Release candidate validation and PR creation |
+| Phase       | Role            | Output                                                               | Human checkpoint                                          |
+| ----------- | --------------- | -------------------------------------------------------------------- | --------------------------------------------------------- |
+| `pm`        | product_manager | Product framing, trade-offs, sequencing, and success metrics         | None by default                                           |
+| `po`        | product_owner   | Refined scope, acceptance criteria, assumptions, and release value   | `po→architect` when `--gates phase` or `--gates all`      |
+| `architect` | architect       | Technical approach, affected boundaries, sizing decision, and risks  | Architect sizing gate is always required                  |
+| `developer` | developer       | Code/docs changes, implementation notes, and Developer to QA handoff | Optional clarification to PO or Architect                 |
+| `qa`        | qa              | Test plan, validation evidence, gaps, and QA recommendation          | `qa→release` when `--gates phase` or `--gates all`        |
+| `release`   | release_manager | Release readiness, rollback notes, and final completion state        | Release approval is represented by the QA to Release gate |
 ## Gate Modes
@@ -25,14 +48,33 @@ orchestra workflow run --task <id> --gates phase   # gates at po→architect and
 orchestra workflow run --task <id> --gates all     # gate at every transition
 ```
-| Mode | Pauses at |
-|------|-----------|
-| `none` | Never — fully autonomous |
+| Mode    | Pauses at                       |
+| ------- | ------------------------------- |
+| `none`  | Never — fully autonomous        |
 | `phase` | `po→architect` and `qa→release` |
-| `all` | Every phase transition |
+| `all`   | Every phase transition          |
 When a gate is reached, the run writes a review artifact to `.agent-workflow/approvals/` and prints the exact `--resume` command. The run resumes when a human approves and runs that command.
+## Gates Versus Clarifications
+Gates and clarifications solve different problems. A gate is a planned approval
+checkpoint between phases. A clarification is a mid-phase question that prevents
+the active role from continuing safely.
+| Situation                                                           | Use           | Why                                                                   |
+| ------------------------------------------------------------------- | ------------- | --------------------------------------------------------------------- |
+| PO acceptance criteria need human confirmation before design starts | Gate          | This is a planned phase boundary                                      |
+| QA evidence is complete but release risk needs sign-off             | Gate          | This determines whether Release may proceed                           |
+| Developer needs to know whether empty input is valid                | Clarification | The active phase is blocked by a product or architecture question     |
+| QA finds ambiguous expected behavior while writing tests            | Clarification | The answer should unblock QA without inventing a new phase boundary   |
+| Architect chooses between two durable system approaches             | Decision      | This is an architecture record, not a pause by itself                 |
+| Reviewer finds a defect after evidence is attached                  | Review        | This is a quality finding that can approve, block, or request changes |
+Do not use gates for every question. Use `workflow clarify` when the question is
+specific, answerable by PO or Architect, and the current Developer or QA phase
+can continue after the answer is recorded.
 ## Provider-Backed Phase Execution
 By default, workflow phases remain deterministic because the default provider is `none`. When a role or default provider route is configured to a non-`none` provider, each phase builds a prompt from task context, rendered skills, the active phase playbook, and prior handoff content, then writes the provider output to `.agent-workflow/runs/<task>/<phase>/`.
@@ -45,6 +87,18 @@ orchestra model provenance list --task FEAT-001 --json
 Provider execution records `MODEL_PROVENANCE_RECORDED` events. The benchmark layer uses those events to report token and cost signals.
+When every configured provider fails, the workflow prints sanitized per-provider causes and stores them in the failed phase notes and `AUTONOMOUS_RUN_FAILED` event metadata. These diagnostics distinguish DNS/network failures, missing credentials, HTTP status errors, policy blocks, and exhausted fallbacks without exposing API keys, auth headers, or raw secret values. If the cause is still unclear, run that provider's smoke test with the same credential file or environment variables used by `.agent-workflow/config.json`.
+## Runtime Tool Permission Policy
+Runtime briefs and delegation packets include a `Tool Permission Policy` section. This is adapter metadata, not an instruction to bypass runtime safety by default.
+Claude CLI is intentionally brief-only in Open Orchestra today. Direct non-interactive Claude execution must not use `claude --print <prompt>` alone for tool-using tasks, because the process can block waiting for tool approval. If direct Claude CLI execution is added later, it must require explicit user opt-in and choose permission flags from the adapter policy:
+- `--gates none` / fully autonomous mode: only with explicit opt-in, use the adapter autonomy flags.
+- `--gates phase` or `--gates all`: use read-only allowed tools by default and require separate approval for write or shell tools.
+- Brief/delegation rendering remains the default path and does not grant tool permissions by itself.
 ## Phase Playbooks
 `orchestra init` creates editable phase playbooks in `.agent-workflow/playbooks/`:
@@ -63,6 +117,23 @@ Use playbooks for phase-specific guidance that should not live in always-loaded
 Playbooks are provider-agnostic. They are loaded into provider-backed phase prompts, `workflow render --phase <phase>`, runtime briefs, and runtime delegation packets. Only the active phase playbook is loaded.
+### Authoring Playbooks
+Write playbooks as concise phase instructions, not as a second root
+`AGENTS.md`. A good playbook should include:
+- Role objective for the phase.
+- Inputs the role should read before acting.
+- Required outputs and artifact names.
+- Quality gates or review checks owned by that role.
+- Evidence that must be recorded before handoff.
+- Escalation rules for clarifications, decisions, reviews, or security checks.
+Keep project-specific conventions in the project playbooks and keep stack-wide
+policy in root instructions or `rules/*.mdc`. If a playbook starts duplicating
+all phases, split the shared rule into `rules/` and leave only phase-specific
+work in the playbook.
 Configuration is convention-over-config by default, with optional overrides:
 ```json
@@ -78,9 +149,27 @@ Configuration is convention-over-config by default, with optional overrides:
 If a playbook file is missing, Orchestra uses deterministic fallback guidance and surfaces a warning in rendered content.
+### Playbook Resolution
+Resolution order is:
+1. `workflow.phasePlaybooks[phase]` in `.agent-workflow/config.json`, when set.
+2. `<playbooksDir>/<phase>.md`, where `playbooksDir` defaults to
+   `.agent-workflow/playbooks`.
+3. Deterministic fallback guidance built into Open Orchestra.
+Use `orchestra workflow render --task <id> --phase <phase>` to inspect the exact
+playbook content that a runtime brief or provider-backed phase will receive.
+Use `orchestra health --json` to confirm the workflow state is readable before a
+run.
 ## Architect Sizing Gate
-Regardless of `--gates` mode, the architect phase always requires a sizing decision before the developer phase starts. In provider-backed mode the architect phase can record that decision automatically; otherwise, if no sizing decision is found, the run stops with the exact command to resolve it:
+Regardless of `--gates` mode, the architect phase always requires a valid sizing decision before the developer phase starts. Provider-backed architect phases record that decision from structured phase output. Deterministic architect phases record a conservative default (`m [3 points]`) when no architect sizing exists, so unattended local runs remain complete and auditable. If the provider returns an unsupported sizing label, Orchestra normalizes it back to the same default before recording the decision.
+The developer phase also records implementation story points when no developer estimate exists. Provider-backed phases can return `developerPoints` in the structured output; deterministic phases use the architect point estimate when available, or `3 points` as a conservative fallback. Burndown uses developer points before architect points.
+When manual correction is needed, record an accepted architect decision:
 ```bash
 orchestra decision add \
@@ -98,6 +187,10 @@ Valid sizing labels: `xs`, `s`, `m`, `l`, `xl`. An optional numeric point estima
 ## Usage
 ```bash
+# Explain recommended phases from project signals and task risk
+orchestra workflow phase-plan --task FEAT-001
+orchestra workflow phase-plan --task FEAT-001 --json
 # Inspect the phase graph without persisting state
 orchestra workflow run --task FEAT-001 --dry-run --gates phase
@@ -127,6 +220,31 @@ orchestra workflow runs
 orchestra workflow runs --json
 ```
+`workflow phase-plan` is advisory. It uses project detection, task text, risks,
+and paths to recommend additional review phases such as `ux_review` for
+frontend accessibility or responsive behavior, and `docs_review` for
+documentation or public-site changes. If `.agent-workflow/config.json` already
+defines `workflow.phaseSequence`, that manual sequence remains authoritative and
+the recommendations are reported without silently changing the run.
+## Web and Runtime Progress
+Workflow progress is available from both CLI and web-supported surfaces:
+```bash
+orchestra workflow runs --json
+orchestra web
+```
+The local web console reads `/api/workflow/progress` and shows the active phase,
+role, provider/model, elapsed time, fallback state, failed reason, paused gates,
+and resumable runs. This is a local web API contract; it does not execute
+runtime agents or call provider APIs by itself.
+Provider-backed phases also print progress in `workflow run` human output. The
+default provider remains `none`, so deterministic workflow runs do not require
+model credentials.
 ## Clarification Loop
 Developers or QA engineers can surface blocking questions to the PO or architect mid-phase. The active phase is suspended until the answer is recorded, then resumed normally.
@@ -159,24 +277,24 @@ Clarification records are persisted in `.agent-workflow/clarifications.jsonl` an
 ## Run States
-| Status | Meaning |
-|--------|---------|
-| `running` | Execution in progress |
-| `paused` | Waiting for human gate approval or clarification answer |
-| `done` | All phases completed successfully |
-| `failed` | Run stopped due to a missing prerequisite (e.g. sizing decision) |
+| Status    | Meaning                                                          |
+| --------- | ---------------------------------------------------------------- |
+| `running` | Execution in progress                                            |
+| `paused`  | Waiting for human gate approval or clarification answer          |
+| `done`    | All phases completed successfully                                |
+| `failed`  | Run stopped due to a missing prerequisite (e.g. sizing decision) |
 ## Phase States
-| Status | Meaning |
-|--------|---------|
-| `pending` | Not yet started |
-| `running` | Currently executing |
-| `done` | Completed and handed off |
-| `gate_paused` | Completed; waiting for human gate approval before next phase |
-| `awaiting_clarification` | Suspended; waiting for a clarification answer |
-| `qa_failed` | QA found issues; developer phase will retry |
-| `blocked` | Blocked by an unresolvable condition |
+| Status                   | Meaning                                                      |
+| ------------------------ | ------------------------------------------------------------ |
+| `pending`                | Not yet started                                              |
+| `running`                | Currently executing                                          |
+| `done`                   | Completed and handed off                                     |
+| `gate_paused`            | Completed; waiting for human gate approval before next phase |
+| `awaiting_clarification` | Suspended; waiting for a clarification answer                |
+| `qa_failed`              | QA found issues; developer phase will retry                  |
+| `blocked`                | Blocked by an unresolvable condition                         |
 ## Gate Pause Notifications

package/docs/benchmark.md CHANGED Viewed

@@ -8,15 +8,16 @@ Open Orchestra measures the effectiveness of AI-assisted development across thre
 |------|-------------|
 | **Solo (no LLM)** | Declared by PM or architect at story start — contrafactual estimate |
 | **AI-unguided** | Declared at story start — how long with a general LLM but no roles, gates, or skills |
+| **AI-guided** | Declared at story start — how long with AI plus Orchestra workflow, roles, gates, memory, and evidence |
 | **AI + Orchestra (actual)** | Measured automatically from `AUTONOMOUS_PHASE_DONE` timestamps in the event log |
-The first two are self-reported. Orchestra only measures the third. The comparison is meaningful even with declared baselines because it creates a consistent, auditable record across many stories.
+The first three are self-reported. Orchestra measures the actual governed run. The comparison is meaningful even with declared baselines because it creates a consistent, auditable record across many stories.
 ## Usage
 ### 1. Declare baselines at story start
-Record the three-mode estimate before work begins — ideally during the architect phase.
+Record the three-mode estimate before work begins — ideally during the architect phase. Autonomous workflow runs also ensure an architect sizing decision exists before developer handoff; deterministic architect phases record `m [3 points]` when no architect sizing exists, while provider-backed phases record the normalized provider output.
 ```bash
 orchestra estimate \
@@ -24,6 +25,7 @@ orchestra estimate \
   --sizing m \
   --solo-days 5 \
   --ai-unguided-days 3 \
+  --ai-guided-days 2 \
   --confidence high \
   --declared-by pm
 ```
@@ -36,6 +38,7 @@ Options:
 | `--sizing` | Yes | — | `xs`, `s`, `m`, `l`, `xl` |
 | `--solo-days` | Yes | — | Estimated days without any AI |
 | `--ai-unguided-days` | Yes | — | Estimated days with a general LLM, no Orchestra |
+| `--ai-guided-days` | Yes | — | Estimated days with AI guided by Orchestra workflow |
 | `--confidence` | No | `medium` | `low`, `medium`, `high` |
 | `--declared-by` | No | `pm` | Role recording the estimate |
 | `--json` | No | — | Structured output |
@@ -66,9 +69,11 @@ Benchmark: FEAT-001  [complete]
   Sizing:      m
   Solo:        5d  (declared)
   AI-unguided: 3d  (declared)
+  AI-guided:   2d  (declared)
   Actual:      1.4d
   vs Solo:     -72%
-  vs AI:       -53%
+  vs AI-U:     -53%
+  vs AI-G:     -30%
   QA loops:    1
   Reviews:     3 (0 blocking)
   Evidence:    5 artifacts
@@ -81,14 +86,15 @@ Benchmark: FEAT-001  [complete]
 Example summary table:
 ```
-Story          Size Solo   AI     Actual   vs Solo  vs AI    QA   Rev  Blk  Ev   Les
-────────────────────────────────────────────────────────────────────────────────────
-TASK-042       m    5d     3d     1.4d     -72%     -53%     1    3    0    5    2
-TASK-089       l    8d     5d     2.1d     -74%     -58%     2    4    1    7    3
-TASK-101       s    2d     1.5d   0.6d     -70%     -60%     0    2    0    3    1
+Story          Size Solo   AI-U   AI-G   Actual   vs Solo  vs AI-U vs AI-G QA   Rev  Blk  Ev   Les
+────────────────────────────────────────────────────────────────────────────────────────────────────
+TASK-042       m    5d     3d     2d     1.4d     -72%     -53%    -30%    1    3    0    5    2
+TASK-089       l    8d     5d     3d     2.1d     -74%     -58%    -30%    2    4    1    7    3
+TASK-101       s    2d     1.5d   1d     0.6d     -70%     -60%    -40%    0    2    0    3    1
 Avg savings vs solo:        -72%
 Avg savings vs AI-unguided: -57%
+Avg savings vs AI-guided:   -33%
 Stories with actuals:       3/3
 ```
@@ -148,7 +154,7 @@ Task breakdown:
 ### Developer Story Point Estimation
-Architect sizing (`xs/s/m/l/xl` + optional points) reflects technical scope. Developer points reflect implementation effort from the developer's perspective. When both exist, the burndown uses developer points — the divergence between the two is a calibration signal worth tracking.
+Architect sizing (`xs/s/m/l/xl` + optional points) reflects technical scope. Developer points reflect implementation effort from the developer's perspective. Autonomous developer phases record implementation points when missing; provider-backed phases can return `developerPoints`, while deterministic phases use the architect point estimate or `3 points` fallback. When both exist, the burndown uses developer points — the divergence between the two is a calibration signal worth tracking.
 Developer records their estimate with:
@@ -194,9 +200,11 @@ All benchmark and burndown commands support `--json` for structured output.
   "sizingLabel": "m",
   "soloEstimateDays": 5,
   "aiUnguidedEstimateDays": 3,
+  "aiGuidedEstimateDays": 2,
   "actualDays": 1.4,
   "vsSoloPct": -72,
   "vsAiUnguidedPct": -53,
+  "vsAiGuidedPct": -30,
   "qaIterations": 1,
   "quality": {
     "reviewCount": 3,

package/docs/command-contracts.md CHANGED Viewed

@@ -2,7 +2,8 @@
 `orchestra commands manifest --json` is the supported discovery surface for
 automation. Entries include command text, required and optional flags, JSON
-support, compatibility status, and reusable schema/message references.
+support, surface classification, compatibility status, contract version, exit
+codes, error shape, and reusable schema/message references.
 ## Compatibility
@@ -11,6 +12,22 @@ support, compatibility status, and reusable schema/message references.
 - `experimental`: command is intended for humans or workflow mutation and should
   not be consumed as a stable machine contract unless a future schema is linked.
+## Surfaces
+- `public`: stable automation contract for project workflows, documentation,
+  bootstrap files, and CI usage.
+- `experimental`: supported CLI behavior, but not a frozen machine contract for
+  `1.0.0`.
+- `internal`: implementation or adapter support surface. It may expose `--json`
+  for local tooling, but it is not part of the public 1.0 automation contract.
+Public commands use contract version `1.0`, exit codes `0` and `1`, and the
+generic JSON error contract unless a command-specific schema replaces it.
+Config migration is exposed as a public contract through
+`orchestra config migrate --json`. It is dry-run by default and requires
+`--apply` before writing `.agent-workflow/config.json`.
 ## Reusable Contracts
 - `schemas/commands/generic-json-output.schema.json` defines the baseline JSON

package/docs/core-command-surface.md CHANGED Viewed

@@ -17,16 +17,48 @@ orchestra commands manifest --json
 Core commands are the first screen for a new project or a production delivery
 workflow. They are stable enough to appear in onboarding and public examples.
-| Job | Commands |
-| --- | --- |
-| Install and verify | `orchestra version`, `orchestra upgrade --smoke --json` |
-| Initialize workspace | `orchestra init`, `orchestra health --json`, `orchestra status` |
-| Create and inspect work | `orchestra task add`, `orchestra task list`, `orchestra task show` |
-| Run governed delivery | `orchestra workflow run`, `orchestra workflow runs` |
-| Resolve workflow gates | `orchestra decision add`, `orchestra workflow gate-approve` |
-| Record delivery proof | `orchestra evidence add`, `orchestra review` |
-| Sync tracker state | `orchestra github sync --issue <number>` |
-| Check release readiness | `orchestra release check --json`, `orchestra release candidate --dry-run --json` |
+For first visible value, show this compact sequence before introducing every
+artifact type:
+```bash
+orchestra init
+orchestra health --json
+orchestra task add --id DEMO-001 --title "Ship a governed README update" --owner developer --paths "README.md"
+orchestra workflow run --task DEMO-001 --gates none
+orchestra status
+orchestra release candidate --dry-run --json
+```
+For production delivery, follow with estimates, architecture sizing decisions,
+human gates, evidence, QA reviews, and `orchestra release check --json`.
+| Job                                       | Commands                                                                                                                        |
+| ----------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
+| Install and verify                        | `orchestra version`, `orchestra upgrade --smoke --json`                                                                         |
+| Initialize workspace                      | `orchestra init`, `orchestra health --json`, `orchestra status`                                                                 |
+| Create and inspect work                   | `orchestra task add`, `orchestra task list`, `orchestra task show`                                                              |
+| Run governed delivery                     | `orchestra workflow run`, `orchestra workflow runs`                                                                             |
+| Inspect workflow shape and playbooks      | `orchestra workflow phase-plan --task <id>`, `orchestra workflow render --task <id> --phase <phase>`                            |
+| Resolve workflow gates and clarifications | `orchestra decision add`, `orchestra workflow gate-approve`, `orchestra workflow clarify`, `orchestra workflow clarify-respond` |
+| Record delivery proof                     | `orchestra evidence add`, `orchestra review`                                                                                    |
+| Plan QA automation                        | `orchestra qa coverage --task <id>`, `orchestra playwright plan --task <id>`                                                    |
+| Sync tracker state                        | `orchestra github sync --issue <number>`, `orchestra tracker sync --tracker <provider> --remote <id> --issue-file <file>`       |
+| Check release readiness                   | `orchestra release check --json`, `orchestra release candidate --dry-run --json`                                                |
+## Run Command Guidance
+`orchestra workflow run` is the governed delivery lifecycle. It creates phase
+sub-tasks, handoffs, run state, and gates across PM, PO, Architect, Developer,
+QA, and Release. Use it for product work, issue delivery, QA handoff, release
+readiness, and dogfooding the end-to-end process.
+`orchestra run` executes the task's local execution plan. It is useful for
+lower-level plan execution and budget/fallback validation, but it does not
+replace the autonomous lifecycle or its phase gates.
+For the full workflow narrative, phase matrix, gate-versus-clarify decision
+table, and playbook authoring guidance, see
+[autonomous-workflow.md](autonomous-workflow.md).
 ## Advanced Commands
@@ -39,10 +71,25 @@ should be linked from onboarding, not mixed into first-run copy.
 - Runtime and skills: `orchestra runtime brief`,
   `orchestra runtime delegate-plan`, `orchestra skills plan`,
   `orchestra skills render`, `orchestra protocol render`.
+- Extensions: `orchestra extensions list --json`,
+  `orchestra extensions validate --json`.
+- Provider profiles: `orchestra model profile set`,
+  `orchestra model profile apply`, `orchestra model profile smoke`,
+  `orchestra model providers`.
+- Generated-file operations: `orchestra refresh --check --json`,
+  `orchestra refresh --dry-run`, `orchestra refresh --force`,
+  `orchestra cursor canvas status --json`,
+  `orchestra cursor canvas sync --dry-run --json`.
 - Memory and source selection: `orchestra memory query`,
-  `orchestra memory hook`, `orchestra sources list`, `orchestra lessons list`.
+  `orchestra memory hook`, `orchestra memory governance`,
+  `orchestra sources list`, `orchestra lessons list`,
+  `orchestra lessons archive`, `orchestra lessons redact`,
+  `orchestra lessons prune`.
 - Metrics and cost: `orchestra estimate`, `orchestra benchmark`,
   `orchestra burndown`, `orchestra usage`, `orchestra budget check`.
+- Governance policy: `orchestra policy evaluate` checks routine, elevated,
+  major, and destructive action tiers from `.agent-workflow/policy.json` and
+  creates approval artifacts for major or destructive actions.
 - Local control surfaces: `orchestra web`, `orchestra serve`,
   `orchestra roles list`, `orchestra config show`.
@@ -67,5 +114,7 @@ unless a specific workflow needs them.
 ## Documentation Rule
 Do not duplicate the full command manifest in README or the site. Show the core
-path, then link to `orchestra commands manifest --json` and
-[command-contracts.md](command-contracts.md) for automation consumers.
+path, then link to `orchestra -h` for human onboarding,
+`orchestra help commands` for the full CLI catalog, and
+`orchestra commands manifest --json` plus [command-contracts.md](command-contracts.md)
+for automation consumers.

package/docs/end-to-end-demo.md CHANGED Viewed

@@ -31,6 +31,7 @@ orchestra estimate \
   --sizing s \
   --solo-days 1 \
   --ai-unguided-days 0.5 \
+  --ai-guided-days 0.25 \
   --confidence medium
 ```

package/docs/extension-contracts.md ADDED Viewed

@@ -0,0 +1,83 @@
+# Extension Contracts
+Open Orchestra discovers local extensions from:
+```text
+.agent-workflow/extensions/<extension-id>/manifest.json
+```
+The manifest is metadata only. Discovery and validation do not import or execute
+extension code.
+## Manifest
+```json
+{
+  "id": "acme-tracker",
+  "name": "Acme Tracker Adapter",
+  "version": "1.0.0",
+  "compatibility": { "orchestra": "^1.0.0" },
+  "extensionPoints": ["tracker-adapter"],
+  "entry": "adapter.js",
+  "capabilities": ["normalized-issue-sync"],
+  "riskAreas": ["network", "tracker-state"]
+}
+```
+Supported extension points for the 1.0 contract are:
+- `skill`
+- `tracker-adapter`
+- `provider-adapter`
+- `phase-playbook`
+- `refresh-target`
+`entry` must be a relative path inside the extension directory. Absolute paths
+and traversal are rejected. Provider adapter extensions are currently
+metadata-only until runtime provider loading is stabilized.
+## Commands
+```bash
+orchestra extensions list --json
+orchestra extensions validate --json
+```
+## Provider-Like Example
+```json
+{
+  "id": "acme-models",
+  "name": "Acme Models Provider",
+  "version": "1.0.0",
+  "compatibility": { "orchestra": "^1.0.0" },
+  "extensionPoints": ["provider-adapter"],
+  "entry": "provider.js",
+  "capabilities": ["chat-completions", "json-mode"],
+  "riskAreas": ["secrets", "network"]
+}
+```
+## Tracker-Like Example
+```json
+{
+  "id": "acme-tracker",
+  "name": "Acme Tracker Adapter",
+  "version": "1.0.0",
+  "compatibility": { "orchestra": "^1.0.0" },
+  "extensionPoints": ["tracker-adapter"],
+  "entry": "tracker.js",
+  "capabilities": ["normalized-issue-sync"],
+  "riskAreas": ["network", "tracker-state"]
+}
+```
+## Stability
+Stable for 1.0.0: manifest shape, discovery path, extension point names, local
+path safety validation, and JSON output from `extensions list` and
+`extensions validate`.
+Experimental: dynamic code loading, provider runtime registration, tracker live
+transport execution, and generated-file refresh target execution.