@jterrats/open-orchestra 0.5.5 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (310) hide show
  1. package/AGENTS.md +9 -8
  2. package/CLAUDE.md +13 -11
  3. package/README.md +78 -11
  4. package/dist/assets/web-console.js +203 -36
  5. package/dist/automation-evidence.d.ts +23 -0
  6. package/dist/automation-evidence.js +97 -0
  7. package/dist/automation-evidence.js.map +1 -0
  8. package/dist/autonomous-run-state.d.ts +4 -1
  9. package/dist/autonomous-run-state.js +8 -2
  10. package/dist/autonomous-run-state.js.map +1 -1
  11. package/dist/autonomous-run-store.d.ts +3 -1
  12. package/dist/autonomous-run-store.js +9 -3
  13. package/dist/autonomous-run-store.js.map +1 -1
  14. package/dist/autonomous-workflow-constants.js +5 -1
  15. package/dist/autonomous-workflow-constants.js.map +1 -1
  16. package/dist/benchmark.d.ts +4 -1
  17. package/dist/benchmark.js +140 -19
  18. package/dist/benchmark.js.map +1 -1
  19. package/dist/cli.js +88 -2
  20. package/dist/cli.js.map +1 -1
  21. package/dist/collaboration-flows.js +5 -19
  22. package/dist/collaboration-flows.js.map +1 -1
  23. package/dist/collection-utils.d.ts +3 -0
  24. package/dist/collection-utils.js +10 -0
  25. package/dist/collection-utils.js.map +1 -0
  26. package/dist/command-manifest.d.ts +12 -1
  27. package/dist/command-manifest.js +218 -10
  28. package/dist/command-manifest.js.map +1 -1
  29. package/dist/commands.d.ts +14 -6
  30. package/dist/commands.js +78 -28
  31. package/dist/commands.js.map +1 -1
  32. package/dist/config-migrations.d.ts +24 -0
  33. package/dist/config-migrations.js +102 -0
  34. package/dist/config-migrations.js.map +1 -0
  35. package/dist/constants.d.ts +3 -0
  36. package/dist/constants.js +26 -0
  37. package/dist/constants.js.map +1 -1
  38. package/dist/cursor-canvas.d.ts +20 -0
  39. package/dist/cursor-canvas.js +119 -0
  40. package/dist/cursor-canvas.js.map +1 -0
  41. package/dist/dashboard-commands.d.ts +2 -0
  42. package/dist/dashboard-commands.js +14 -0
  43. package/dist/dashboard-commands.js.map +1 -0
  44. package/dist/defaults.d.ts +13 -0
  45. package/dist/defaults.js +13 -0
  46. package/dist/defaults.js.map +1 -1
  47. package/dist/delegation-decision.js +23 -8
  48. package/dist/delegation-decision.js.map +1 -1
  49. package/dist/delivery-commands.js +5 -0
  50. package/dist/delivery-commands.js.map +1 -1
  51. package/dist/delivery-dashboard-charts.d.ts +4 -0
  52. package/dist/delivery-dashboard-charts.js +156 -0
  53. package/dist/delivery-dashboard-charts.js.map +1 -0
  54. package/dist/delivery-dashboard-html.d.ts +2 -0
  55. package/dist/delivery-dashboard-html.js +115 -0
  56. package/dist/delivery-dashboard-html.js.map +1 -0
  57. package/dist/delivery-dashboard-types.d.ts +78 -0
  58. package/dist/delivery-dashboard-types.js +2 -0
  59. package/dist/delivery-dashboard-types.js.map +1 -0
  60. package/dist/delivery-dashboard.d.ts +8 -0
  61. package/dist/delivery-dashboard.js +124 -0
  62. package/dist/delivery-dashboard.js.map +1 -0
  63. package/dist/doc-sync.d.ts +25 -0
  64. package/dist/doc-sync.js +79 -0
  65. package/dist/doc-sync.js.map +1 -0
  66. package/dist/effort-classification.d.ts +7 -0
  67. package/dist/effort-classification.js +72 -0
  68. package/dist/effort-classification.js.map +1 -0
  69. package/dist/extension-commands.d.ts +3 -0
  70. package/dist/extension-commands.js +40 -0
  71. package/dist/extension-commands.js.map +1 -0
  72. package/dist/extensions.d.ts +22 -0
  73. package/dist/extensions.js +126 -0
  74. package/dist/extensions.js.map +1 -0
  75. package/dist/gemini-provider.d.ts +3 -6
  76. package/dist/gemini-provider.js +8 -17
  77. package/dist/gemini-provider.js.map +1 -1
  78. package/dist/github.d.ts +2 -0
  79. package/dist/github.js +15 -3
  80. package/dist/github.js.map +1 -1
  81. package/dist/health-checks.js +51 -0
  82. package/dist/health-checks.js.map +1 -1
  83. package/dist/lucid-story-map.d.ts +73 -0
  84. package/dist/lucid-story-map.js +112 -0
  85. package/dist/lucid-story-map.js.map +1 -0
  86. package/dist/mcp-integrations.d.ts +19 -0
  87. package/dist/mcp-integrations.js +58 -0
  88. package/dist/mcp-integrations.js.map +1 -0
  89. package/dist/mcp-tool-adapter.d.ts +21 -0
  90. package/dist/mcp-tool-adapter.js +56 -0
  91. package/dist/mcp-tool-adapter.js.map +1 -0
  92. package/dist/metrics-commands.js +47 -13
  93. package/dist/metrics-commands.js.map +1 -1
  94. package/dist/model-commands.d.ts +5 -0
  95. package/dist/model-commands.js +95 -1
  96. package/dist/model-commands.js.map +1 -1
  97. package/dist/model-providers.d.ts +5 -12
  98. package/dist/model-providers.js +30 -43
  99. package/dist/model-providers.js.map +1 -1
  100. package/dist/network-policy.d.ts +2 -0
  101. package/dist/network-policy.js +6 -0
  102. package/dist/network-policy.js.map +1 -0
  103. package/dist/ollama-provider.d.ts +3 -6
  104. package/dist/ollama-provider.js +7 -16
  105. package/dist/ollama-provider.js.map +1 -1
  106. package/dist/package-update-check.d.ts +19 -0
  107. package/dist/package-update-check.js +24 -0
  108. package/dist/package-update-check.js.map +1 -1
  109. package/dist/phase-executor.d.ts +1 -0
  110. package/dist/phase-executor.js +401 -9
  111. package/dist/phase-executor.js.map +1 -1
  112. package/dist/phase-playbooks.d.ts +18 -1
  113. package/dist/phase-playbooks.js +146 -2
  114. package/dist/phase-playbooks.js.map +1 -1
  115. package/dist/planning-commands.d.ts +1 -0
  116. package/dist/planning-commands.js +36 -36
  117. package/dist/planning-commands.js.map +1 -1
  118. package/dist/policy-commands.d.ts +2 -0
  119. package/dist/policy-commands.js +29 -0
  120. package/dist/policy-commands.js.map +1 -0
  121. package/dist/policy-defaults.d.ts +2 -0
  122. package/dist/policy-defaults.js +42 -0
  123. package/dist/policy-defaults.js.map +1 -0
  124. package/dist/policy.d.ts +20 -0
  125. package/dist/policy.js +155 -0
  126. package/dist/policy.js.map +1 -0
  127. package/dist/project-detection.js +9 -7
  128. package/dist/project-detection.js.map +1 -1
  129. package/dist/prompt-registry-update.d.ts +2 -0
  130. package/dist/prompt-registry-update.js +5 -1
  131. package/dist/prompt-registry-update.js.map +1 -1
  132. package/dist/prompt-registry-validation.d.ts +3 -0
  133. package/dist/prompt-registry-validation.js +61 -21
  134. package/dist/prompt-registry-validation.js.map +1 -1
  135. package/dist/provider-utils.d.ts +11 -0
  136. package/dist/provider-utils.js +14 -0
  137. package/dist/provider-utils.js.map +1 -1
  138. package/dist/qa-commands.d.ts +2 -0
  139. package/dist/qa-commands.js +18 -0
  140. package/dist/qa-commands.js.map +1 -0
  141. package/dist/qa-coverage.d.ts +24 -0
  142. package/dist/qa-coverage.js +189 -0
  143. package/dist/qa-coverage.js.map +1 -0
  144. package/dist/qa-readiness.d.ts +5 -0
  145. package/dist/qa-readiness.js +26 -0
  146. package/dist/qa-readiness.js.map +1 -0
  147. package/dist/refresh-generated.d.ts +32 -0
  148. package/dist/refresh-generated.js +180 -0
  149. package/dist/refresh-generated.js.map +1 -0
  150. package/dist/release-candidate.d.ts +9 -1
  151. package/dist/release-candidate.js +52 -1
  152. package/dist/release-candidate.js.map +1 -1
  153. package/dist/release-commands.js +161 -8
  154. package/dist/release-commands.js.map +1 -1
  155. package/dist/release-readiness.d.ts +33 -0
  156. package/dist/release-readiness.js +187 -3
  157. package/dist/release-readiness.js.map +1 -1
  158. package/dist/runtime-adapters.d.ts +2 -1
  159. package/dist/runtime-adapters.js +16 -0
  160. package/dist/runtime-adapters.js.map +1 -1
  161. package/dist/runtime-bootstrap.js +1 -1
  162. package/dist/runtime-bootstrap.js.map +1 -1
  163. package/dist/runtime-commands.d.ts +2 -0
  164. package/dist/runtime-commands.js +85 -3
  165. package/dist/runtime-commands.js.map +1 -1
  166. package/dist/runtime-execution-adapters.js +40 -0
  167. package/dist/runtime-execution-adapters.js.map +1 -1
  168. package/dist/runtime-execution-renderer.d.ts +3 -2
  169. package/dist/runtime-execution-renderer.js +46 -8
  170. package/dist/runtime-execution-renderer.js.map +1 -1
  171. package/dist/runtime-execution.d.ts +8 -2
  172. package/dist/runtime-execution.js +109 -11
  173. package/dist/runtime-execution.js.map +1 -1
  174. package/dist/runtime-guardrails.d.ts +26 -0
  175. package/dist/runtime-guardrails.js +168 -0
  176. package/dist/runtime-guardrails.js.map +1 -0
  177. package/dist/setup-agents-import.js +5 -3
  178. package/dist/setup-agents-import.js.map +1 -1
  179. package/dist/skills-catalog.js +1 -0
  180. package/dist/skills-catalog.js.map +1 -1
  181. package/dist/skills-commands.d.ts +5 -0
  182. package/dist/skills-commands.js +79 -2
  183. package/dist/skills-commands.js.map +1 -1
  184. package/dist/skills-memory.d.ts +36 -2
  185. package/dist/skills-memory.js +165 -6
  186. package/dist/skills-memory.js.map +1 -1
  187. package/dist/skills-planning.js +9 -22
  188. package/dist/skills-planning.js.map +1 -1
  189. package/dist/skills-render.js +2 -4
  190. package/dist/skills-render.js.map +1 -1
  191. package/dist/skills.d.ts +1 -1
  192. package/dist/skills.js +1 -1
  193. package/dist/skills.js.map +1 -1
  194. package/dist/sprint-commands.js +2 -1
  195. package/dist/sprint-commands.js.map +1 -1
  196. package/dist/subagent-protocol.js +3 -5
  197. package/dist/subagent-protocol.js.map +1 -1
  198. package/dist/support-commands.d.ts +2 -0
  199. package/dist/support-commands.js +18 -0
  200. package/dist/support-commands.js.map +1 -0
  201. package/dist/support-diagnostics.d.ts +49 -0
  202. package/dist/support-diagnostics.js +86 -0
  203. package/dist/support-diagnostics.js.map +1 -0
  204. package/dist/task-graph-commands.js +6 -14
  205. package/dist/task-graph-commands.js.map +1 -1
  206. package/dist/task-text.d.ts +8 -0
  207. package/dist/task-text.js +18 -0
  208. package/dist/task-text.js.map +1 -0
  209. package/dist/telemetry-redaction.js +8 -1
  210. package/dist/telemetry-redaction.js.map +1 -1
  211. package/dist/tool-commands.d.ts +3 -0
  212. package/dist/tool-commands.js +62 -0
  213. package/dist/tool-commands.js.map +1 -1
  214. package/dist/tracker-adapters.d.ts +71 -0
  215. package/dist/tracker-adapters.js +186 -0
  216. package/dist/tracker-adapters.js.map +1 -0
  217. package/dist/tracker-commands.d.ts +2 -0
  218. package/dist/tracker-commands.js +119 -0
  219. package/dist/tracker-commands.js.map +1 -0
  220. package/dist/types/metrics.d.ts +25 -1
  221. package/dist/types/model-config.d.ts +51 -4
  222. package/dist/types/runtime.d.ts +83 -0
  223. package/dist/types/skills.d.ts +2 -0
  224. package/dist/types/tasks.d.ts +10 -0
  225. package/dist/types/workflow-run.d.ts +35 -0
  226. package/dist/types.d.ts +12 -4
  227. package/dist/types.js.map +1 -1
  228. package/dist/upgrade-commands.js +13 -4
  229. package/dist/upgrade-commands.js.map +1 -1
  230. package/dist/validation.js +2 -2
  231. package/dist/validation.js.map +1 -1
  232. package/dist/visual-validation.d.ts +81 -0
  233. package/dist/visual-validation.js +290 -0
  234. package/dist/visual-validation.js.map +1 -0
  235. package/dist/web-action-security.d.ts +11 -0
  236. package/dist/web-action-security.js +45 -0
  237. package/dist/web-action-security.js.map +1 -0
  238. package/dist/web-api-read-routes.js +115 -3
  239. package/dist/web-api-read-routes.js.map +1 -1
  240. package/dist/web-api.js +507 -5
  241. package/dist/web-api.js.map +1 -1
  242. package/dist/web-artifacts.d.ts +55 -0
  243. package/dist/web-artifacts.js +222 -0
  244. package/dist/web-artifacts.js.map +1 -0
  245. package/dist/web-console/assets/index-C9lx-V42.css +1 -0
  246. package/dist/web-console/assets/index-M3S0g1GK.js +11 -0
  247. package/dist/web-console/index.html +13 -0
  248. package/dist/web-console.js +9 -3
  249. package/dist/web-console.js.map +1 -1
  250. package/dist/web-recovery.d.ts +30 -0
  251. package/dist/web-recovery.js +163 -0
  252. package/dist/web-recovery.js.map +1 -0
  253. package/dist/web-workflow-progress.d.ts +41 -0
  254. package/dist/web-workflow-progress.js +114 -0
  255. package/dist/web-workflow-progress.js.map +1 -0
  256. package/dist/workflow-approval-service.d.ts +2 -1
  257. package/dist/workflow-approval-service.js +83 -4
  258. package/dist/workflow-approval-service.js.map +1 -1
  259. package/dist/workflow-approval-utils.js +13 -3
  260. package/dist/workflow-approval-utils.js.map +1 -1
  261. package/dist/workflow-event-query.d.ts +2 -0
  262. package/dist/workflow-event-query.js +6 -0
  263. package/dist/workflow-event-query.js.map +1 -0
  264. package/dist/workflow-evidence-service.js +18 -9
  265. package/dist/workflow-evidence-service.js.map +1 -1
  266. package/dist/workflow-gates.d.ts +2 -0
  267. package/dist/workflow-gates.js +103 -0
  268. package/dist/workflow-gates.js.map +1 -1
  269. package/dist/workflow-markdown.d.ts +6 -0
  270. package/dist/workflow-markdown.js +25 -0
  271. package/dist/workflow-markdown.js.map +1 -0
  272. package/dist/workflow-phase-planner.d.ts +19 -0
  273. package/dist/workflow-phase-planner.js +133 -0
  274. package/dist/workflow-phase-planner.js.map +1 -0
  275. package/dist/workflow-run-commands.d.ts +1 -0
  276. package/dist/workflow-run-commands.js +247 -20
  277. package/dist/workflow-run-commands.js.map +1 -1
  278. package/dist/workflow-services.d.ts +21 -12
  279. package/dist/workflow-services.js +376 -260
  280. package/dist/workflow-services.js.map +1 -1
  281. package/dist/workflow-task-service.d.ts +11 -0
  282. package/dist/workflow-task-service.js +242 -0
  283. package/dist/workflow-task-service.js.map +1 -0
  284. package/dist/workflow-templates.js +2 -14
  285. package/dist/workflow-templates.js.map +1 -1
  286. package/dist/workspace-validator.js +133 -5
  287. package/dist/workspace-validator.js.map +1 -1
  288. package/dist/workspace.js +10 -2
  289. package/dist/workspace.js.map +1 -1
  290. package/docs/adoption-guide.md +147 -0
  291. package/docs/autonomous-workflow.md +146 -28
  292. package/docs/benchmark.md +17 -9
  293. package/docs/command-contracts.md +18 -1
  294. package/docs/core-command-surface.md +62 -13
  295. package/docs/end-to-end-demo.md +1 -0
  296. package/docs/extension-contracts.md +83 -0
  297. package/docs/orchestra-mvp.md +86 -3
  298. package/docs/persona-workflows.md +32 -0
  299. package/docs/release-test-matrix.md +42 -0
  300. package/docs/runtime-adapters.md +113 -0
  301. package/docs/runtime-llm-flow.md +13 -0
  302. package/docs/setup-agents-applicability-review.md +173 -0
  303. package/docs/skill-loading-strategy.md +1 -0
  304. package/docs/source-of-truth-and-agent-learning.md +14 -0
  305. package/docs/traceability-flow.md +5 -1
  306. package/docs/tracker-adapter-contract.md +10 -1
  307. package/docs/web-console-qa.md +35 -0
  308. package/package.json +12 -6
  309. package/rules/development-engineering.mdc +66 -0
  310. package/skills/doc-sync/SKILL.md +2 -0
@@ -2,20 +2,43 @@
2
2
 
3
3
  `orchestra workflow run` executes a full story lifecycle as a governed multi-phase sequence without requiring manual step-by-step commands. Each phase creates a sub-task, generates handoff artifacts, and persists state in an append-only run log at `.agent-workflow/workflow-runs.jsonl`.
4
4
 
5
+ ## End-To-End Lifecycle
6
+
7
+ Use the autonomous workflow when a task needs product framing, architecture,
8
+ implementation, QA evidence, and release readiness in one governed trace.
9
+
10
+ 1. Register or sync the backlog item with a task ID, paths, owner, and
11
+ acceptance criteria.
12
+ 2. Record an estimate and an architect sizing decision before implementation
13
+ work begins.
14
+ 3. Start `orchestra workflow run --task <id> --gates phase`.
15
+ 4. Review the PO to Architect gate. Approve it when the story is refined enough
16
+ for technical design and implementation.
17
+ 5. Let Architect, Developer, and QA phases produce handoffs, reviews, evidence,
18
+ and any clarification records needed to finish the work.
19
+ 6. Review the QA to Release gate. Approve it only when validation evidence,
20
+ unresolved risks, release notes, and rollback expectations are acceptable.
21
+ 7. Resume into Release, then run `orchestra benchmark --task <id>` to capture
22
+ actual delivery data for future estimates.
23
+
24
+ The run state, gate artifacts, handoffs, evidence, reviews, decisions, and
25
+ clarifications are persisted under `.agent-workflow/` so the delivery story can
26
+ be audited after the fact.
27
+
5
28
  ## Phase Graph
6
29
 
7
30
  ```
8
31
  PM → PO [gate] → Architect [sizing gate] → Developer → QA [gate] → Release
9
32
  ```
10
33
 
11
- | Phase | Role | Summary |
12
- |-------|------|---------|
13
- | `pm` | product_manager | Product framing, prioritization, and success metrics |
14
- | `po` | product_owner | Backlog refinement, story sizing, and acceptance criteria |
15
- | `architect` | architect | Technical tasking, design decisions, and size estimation |
16
- | `developer` | developer | Implementation against acceptance criteria |
17
- | `qa` | qa | Verification against acceptance criteria and edge cases |
18
- | `release` | release_manager | Release candidate validation and PR creation |
34
+ | Phase | Role | Output | Human checkpoint |
35
+ | ----------- | --------------- | -------------------------------------------------------------------- | --------------------------------------------------------- |
36
+ | `pm` | product_manager | Product framing, trade-offs, sequencing, and success metrics | None by default |
37
+ | `po` | product_owner | Refined scope, acceptance criteria, assumptions, and release value | `po→architect` when `--gates phase` or `--gates all` |
38
+ | `architect` | architect | Technical approach, affected boundaries, sizing decision, and risks | Architect sizing gate is always required |
39
+ | `developer` | developer | Code/docs changes, implementation notes, and Developer to QA handoff | Optional clarification to PO or Architect |
40
+ | `qa` | qa | Test plan, validation evidence, gaps, and QA recommendation | `qa→release` when `--gates phase` or `--gates all` |
41
+ | `release` | release_manager | Release readiness, rollback notes, and final completion state | Release approval is represented by the QA to Release gate |
19
42
 
20
43
  ## Gate Modes
21
44
 
@@ -25,14 +48,33 @@ orchestra workflow run --task <id> --gates phase # gates at po→architect and
25
48
  orchestra workflow run --task <id> --gates all # gate at every transition
26
49
  ```
27
50
 
28
- | Mode | Pauses at |
29
- |------|-----------|
30
- | `none` | Never — fully autonomous |
51
+ | Mode | Pauses at |
52
+ | ------- | ------------------------------- |
53
+ | `none` | Never — fully autonomous |
31
54
  | `phase` | `po→architect` and `qa→release` |
32
- | `all` | Every phase transition |
55
+ | `all` | Every phase transition |
33
56
 
34
57
  When a gate is reached, the run writes a review artifact to `.agent-workflow/approvals/` and prints the exact `--resume` command. The run resumes when a human approves and runs that command.
35
58
 
59
+ ## Gates Versus Clarifications
60
+
61
+ Gates and clarifications solve different problems. A gate is a planned approval
62
+ checkpoint between phases. A clarification is a mid-phase question that prevents
63
+ the active role from continuing safely.
64
+
65
+ | Situation | Use | Why |
66
+ | ------------------------------------------------------------------- | ------------- | --------------------------------------------------------------------- |
67
+ | PO acceptance criteria need human confirmation before design starts | Gate | This is a planned phase boundary |
68
+ | QA evidence is complete but release risk needs sign-off | Gate | This determines whether Release may proceed |
69
+ | Developer needs to know whether empty input is valid | Clarification | The active phase is blocked by a product or architecture question |
70
+ | QA finds ambiguous expected behavior while writing tests | Clarification | The answer should unblock QA without inventing a new phase boundary |
71
+ | Architect chooses between two durable system approaches | Decision | This is an architecture record, not a pause by itself |
72
+ | Reviewer finds a defect after evidence is attached | Review | This is a quality finding that can approve, block, or request changes |
73
+
74
+ Do not use gates for every question. Use `workflow clarify` when the question is
75
+ specific, answerable by PO or Architect, and the current Developer or QA phase
76
+ can continue after the answer is recorded.
77
+
36
78
  ## Provider-Backed Phase Execution
37
79
 
38
80
  By default, workflow phases remain deterministic because the default provider is `none`. When a role or default provider route is configured to a non-`none` provider, each phase builds a prompt from task context, rendered skills, the active phase playbook, and prior handoff content, then writes the provider output to `.agent-workflow/runs/<task>/<phase>/`.
@@ -45,6 +87,18 @@ orchestra model provenance list --task FEAT-001 --json
45
87
 
46
88
  Provider execution records `MODEL_PROVENANCE_RECORDED` events. The benchmark layer uses those events to report token and cost signals.
47
89
 
90
+ When every configured provider fails, the workflow prints sanitized per-provider causes and stores them in the failed phase notes and `AUTONOMOUS_RUN_FAILED` event metadata. These diagnostics distinguish DNS/network failures, missing credentials, HTTP status errors, policy blocks, and exhausted fallbacks without exposing API keys, auth headers, or raw secret values. If the cause is still unclear, run that provider's smoke test with the same credential file or environment variables used by `.agent-workflow/config.json`.
91
+
92
+ ## Runtime Tool Permission Policy
93
+
94
+ Runtime briefs and delegation packets include a `Tool Permission Policy` section. This is adapter metadata, not an instruction to bypass runtime safety by default.
95
+
96
+ Claude CLI is intentionally brief-only in Open Orchestra today. Direct non-interactive Claude execution must not use `claude --print <prompt>` alone for tool-using tasks, because the process can block waiting for tool approval. If direct Claude CLI execution is added later, it must require explicit user opt-in and choose permission flags from the adapter policy:
97
+
98
+ - `--gates none` / fully autonomous mode: only with explicit opt-in, use the adapter autonomy flags.
99
+ - `--gates phase` or `--gates all`: use read-only allowed tools by default and require separate approval for write or shell tools.
100
+ - Brief/delegation rendering remains the default path and does not grant tool permissions by itself.
101
+
48
102
  ## Phase Playbooks
49
103
 
50
104
  `orchestra init` creates editable phase playbooks in `.agent-workflow/playbooks/`:
@@ -63,6 +117,23 @@ Use playbooks for phase-specific guidance that should not live in always-loaded
63
117
 
64
118
  Playbooks are provider-agnostic. They are loaded into provider-backed phase prompts, `workflow render --phase <phase>`, runtime briefs, and runtime delegation packets. Only the active phase playbook is loaded.
65
119
 
120
+ ### Authoring Playbooks
121
+
122
+ Write playbooks as concise phase instructions, not as a second root
123
+ `AGENTS.md`. A good playbook should include:
124
+
125
+ - Role objective for the phase.
126
+ - Inputs the role should read before acting.
127
+ - Required outputs and artifact names.
128
+ - Quality gates or review checks owned by that role.
129
+ - Evidence that must be recorded before handoff.
130
+ - Escalation rules for clarifications, decisions, reviews, or security checks.
131
+
132
+ Keep project-specific conventions in the project playbooks and keep stack-wide
133
+ policy in root instructions or `rules/*.mdc`. If a playbook starts duplicating
134
+ all phases, split the shared rule into `rules/` and leave only phase-specific
135
+ work in the playbook.
136
+
66
137
  Configuration is convention-over-config by default, with optional overrides:
67
138
 
68
139
  ```json
@@ -78,9 +149,27 @@ Configuration is convention-over-config by default, with optional overrides:
78
149
 
79
150
  If a playbook file is missing, Orchestra uses deterministic fallback guidance and surfaces a warning in rendered content.
80
151
 
152
+ ### Playbook Resolution
153
+
154
+ Resolution order is:
155
+
156
+ 1. `workflow.phasePlaybooks[phase]` in `.agent-workflow/config.json`, when set.
157
+ 2. `<playbooksDir>/<phase>.md`, where `playbooksDir` defaults to
158
+ `.agent-workflow/playbooks`.
159
+ 3. Deterministic fallback guidance built into Open Orchestra.
160
+
161
+ Use `orchestra workflow render --task <id> --phase <phase>` to inspect the exact
162
+ playbook content that a runtime brief or provider-backed phase will receive.
163
+ Use `orchestra health --json` to confirm the workflow state is readable before a
164
+ run.
165
+
81
166
  ## Architect Sizing Gate
82
167
 
83
- Regardless of `--gates` mode, the architect phase always requires a sizing decision before the developer phase starts. In provider-backed mode the architect phase can record that decision automatically; otherwise, if no sizing decision is found, the run stops with the exact command to resolve it:
168
+ Regardless of `--gates` mode, the architect phase always requires a valid sizing decision before the developer phase starts. Provider-backed architect phases record that decision from structured phase output. Deterministic architect phases record a conservative default (`m [3 points]`) when no architect sizing exists, so unattended local runs remain complete and auditable. If the provider returns an unsupported sizing label, Orchestra normalizes it back to the same default before recording the decision.
169
+
170
+ The developer phase also records implementation story points when no developer estimate exists. Provider-backed phases can return `developerPoints` in the structured output; deterministic phases use the architect point estimate when available, or `3 points` as a conservative fallback. Burndown uses developer points before architect points.
171
+
172
+ When manual correction is needed, record an accepted architect decision:
84
173
 
85
174
  ```bash
86
175
  orchestra decision add \
@@ -98,6 +187,10 @@ Valid sizing labels: `xs`, `s`, `m`, `l`, `xl`. An optional numeric point estima
98
187
  ## Usage
99
188
 
100
189
  ```bash
190
+ # Explain recommended phases from project signals and task risk
191
+ orchestra workflow phase-plan --task FEAT-001
192
+ orchestra workflow phase-plan --task FEAT-001 --json
193
+
101
194
  # Inspect the phase graph without persisting state
102
195
  orchestra workflow run --task FEAT-001 --dry-run --gates phase
103
196
 
@@ -127,6 +220,31 @@ orchestra workflow runs
127
220
  orchestra workflow runs --json
128
221
  ```
129
222
 
223
+ `workflow phase-plan` is advisory. It uses project detection, task text, risks,
224
+ and paths to recommend additional review phases such as `ux_review` for
225
+ frontend accessibility or responsive behavior, and `docs_review` for
226
+ documentation or public-site changes. If `.agent-workflow/config.json` already
227
+ defines `workflow.phaseSequence`, that manual sequence remains authoritative and
228
+ the recommendations are reported without silently changing the run.
229
+
230
+ ## Web and Runtime Progress
231
+
232
+ Workflow progress is available from both CLI and web-supported surfaces:
233
+
234
+ ```bash
235
+ orchestra workflow runs --json
236
+ orchestra web
237
+ ```
238
+
239
+ The local web console reads `/api/workflow/progress` and shows the active phase,
240
+ role, provider/model, elapsed time, fallback state, failed reason, paused gates,
241
+ and resumable runs. This is a local web API contract; it does not execute
242
+ runtime agents or call provider APIs by itself.
243
+
244
+ Provider-backed phases also print progress in `workflow run` human output. The
245
+ default provider remains `none`, so deterministic workflow runs do not require
246
+ model credentials.
247
+
130
248
  ## Clarification Loop
131
249
 
132
250
  Developers or QA engineers can surface blocking questions to the PO or architect mid-phase. The active phase is suspended until the answer is recorded, then resumed normally.
@@ -159,24 +277,24 @@ Clarification records are persisted in `.agent-workflow/clarifications.jsonl` an
159
277
 
160
278
  ## Run States
161
279
 
162
- | Status | Meaning |
163
- |--------|---------|
164
- | `running` | Execution in progress |
165
- | `paused` | Waiting for human gate approval or clarification answer |
166
- | `done` | All phases completed successfully |
167
- | `failed` | Run stopped due to a missing prerequisite (e.g. sizing decision) |
280
+ | Status | Meaning |
281
+ | --------- | ---------------------------------------------------------------- |
282
+ | `running` | Execution in progress |
283
+ | `paused` | Waiting for human gate approval or clarification answer |
284
+ | `done` | All phases completed successfully |
285
+ | `failed` | Run stopped due to a missing prerequisite (e.g. sizing decision) |
168
286
 
169
287
  ## Phase States
170
288
 
171
- | Status | Meaning |
172
- |--------|---------|
173
- | `pending` | Not yet started |
174
- | `running` | Currently executing |
175
- | `done` | Completed and handed off |
176
- | `gate_paused` | Completed; waiting for human gate approval before next phase |
177
- | `awaiting_clarification` | Suspended; waiting for a clarification answer |
178
- | `qa_failed` | QA found issues; developer phase will retry |
179
- | `blocked` | Blocked by an unresolvable condition |
289
+ | Status | Meaning |
290
+ | ------------------------ | ------------------------------------------------------------ |
291
+ | `pending` | Not yet started |
292
+ | `running` | Currently executing |
293
+ | `done` | Completed and handed off |
294
+ | `gate_paused` | Completed; waiting for human gate approval before next phase |
295
+ | `awaiting_clarification` | Suspended; waiting for a clarification answer |
296
+ | `qa_failed` | QA found issues; developer phase will retry |
297
+ | `blocked` | Blocked by an unresolvable condition |
180
298
 
181
299
  ## Gate Pause Notifications
182
300
 
package/docs/benchmark.md CHANGED
@@ -8,15 +8,16 @@ Open Orchestra measures the effectiveness of AI-assisted development across thre
8
8
  |------|-------------|
9
9
  | **Solo (no LLM)** | Declared by PM or architect at story start — contrafactual estimate |
10
10
  | **AI-unguided** | Declared at story start — how long with a general LLM but no roles, gates, or skills |
11
+ | **AI-guided** | Declared at story start — how long with AI plus Orchestra workflow, roles, gates, memory, and evidence |
11
12
  | **AI + Orchestra (actual)** | Measured automatically from `AUTONOMOUS_PHASE_DONE` timestamps in the event log |
12
13
 
13
- The first two are self-reported. Orchestra only measures the third. The comparison is meaningful even with declared baselines because it creates a consistent, auditable record across many stories.
14
+ The first three are self-reported. Orchestra measures the actual governed run. The comparison is meaningful even with declared baselines because it creates a consistent, auditable record across many stories.
14
15
 
15
16
  ## Usage
16
17
 
17
18
  ### 1. Declare baselines at story start
18
19
 
19
- Record the three-mode estimate before work begins — ideally during the architect phase.
20
+ Record the three-mode estimate before work begins — ideally during the architect phase. Autonomous workflow runs also ensure an architect sizing decision exists before developer handoff; deterministic architect phases record `m [3 points]` when no architect sizing exists, while provider-backed phases record the normalized provider output.
20
21
 
21
22
  ```bash
22
23
  orchestra estimate \
@@ -24,6 +25,7 @@ orchestra estimate \
24
25
  --sizing m \
25
26
  --solo-days 5 \
26
27
  --ai-unguided-days 3 \
28
+ --ai-guided-days 2 \
27
29
  --confidence high \
28
30
  --declared-by pm
29
31
  ```
@@ -36,6 +38,7 @@ Options:
36
38
  | `--sizing` | Yes | — | `xs`, `s`, `m`, `l`, `xl` |
37
39
  | `--solo-days` | Yes | — | Estimated days without any AI |
38
40
  | `--ai-unguided-days` | Yes | — | Estimated days with a general LLM, no Orchestra |
41
+ | `--ai-guided-days` | Yes | — | Estimated days with AI guided by Orchestra workflow |
39
42
  | `--confidence` | No | `medium` | `low`, `medium`, `high` |
40
43
  | `--declared-by` | No | `pm` | Role recording the estimate |
41
44
  | `--json` | No | — | Structured output |
@@ -66,9 +69,11 @@ Benchmark: FEAT-001 [complete]
66
69
  Sizing: m
67
70
  Solo: 5d (declared)
68
71
  AI-unguided: 3d (declared)
72
+ AI-guided: 2d (declared)
69
73
  Actual: 1.4d
70
74
  vs Solo: -72%
71
- vs AI: -53%
75
+ vs AI-U: -53%
76
+ vs AI-G: -30%
72
77
  QA loops: 1
73
78
  Reviews: 3 (0 blocking)
74
79
  Evidence: 5 artifacts
@@ -81,14 +86,15 @@ Benchmark: FEAT-001 [complete]
81
86
  Example summary table:
82
87
 
83
88
  ```
84
- Story Size Solo AI Actual vs Solo vs AI QA Rev Blk Ev Les
85
- ────────────────────────────────────────────────────────────────────────────────────
86
- TASK-042 m 5d 3d 1.4d -72% -53% 1 3 0 5 2
87
- TASK-089 l 8d 5d 2.1d -74% -58% 2 4 1 7 3
88
- TASK-101 s 2d 1.5d 0.6d -70% -60% 0 2 0 3 1
89
+ Story Size Solo AI-U AI-G Actual vs Solo vs AI-U vs AI-G QA Rev Blk Ev Les
90
+ ────────────────────────────────────────────────────────────────────────────────────────────────────
91
+ TASK-042 m 5d 3d 2d 1.4d -72% -53% -30% 1 3 0 5 2
92
+ TASK-089 l 8d 5d 3d 2.1d -74% -58% -30% 2 4 1 7 3
93
+ TASK-101 s 2d 1.5d 1d 0.6d -70% -60% -40% 0 2 0 3 1
89
94
 
90
95
  Avg savings vs solo: -72%
91
96
  Avg savings vs AI-unguided: -57%
97
+ Avg savings vs AI-guided: -33%
92
98
  Stories with actuals: 3/3
93
99
  ```
94
100
 
@@ -148,7 +154,7 @@ Task breakdown:
148
154
 
149
155
  ### Developer Story Point Estimation
150
156
 
151
- Architect sizing (`xs/s/m/l/xl` + optional points) reflects technical scope. Developer points reflect implementation effort from the developer's perspective. When both exist, the burndown uses developer points — the divergence between the two is a calibration signal worth tracking.
157
+ Architect sizing (`xs/s/m/l/xl` + optional points) reflects technical scope. Developer points reflect implementation effort from the developer's perspective. Autonomous developer phases record implementation points when missing; provider-backed phases can return `developerPoints`, while deterministic phases use the architect point estimate or `3 points` fallback. When both exist, the burndown uses developer points — the divergence between the two is a calibration signal worth tracking.
152
158
 
153
159
  Developer records their estimate with:
154
160
 
@@ -194,9 +200,11 @@ All benchmark and burndown commands support `--json` for structured output.
194
200
  "sizingLabel": "m",
195
201
  "soloEstimateDays": 5,
196
202
  "aiUnguidedEstimateDays": 3,
203
+ "aiGuidedEstimateDays": 2,
197
204
  "actualDays": 1.4,
198
205
  "vsSoloPct": -72,
199
206
  "vsAiUnguidedPct": -53,
207
+ "vsAiGuidedPct": -30,
200
208
  "qaIterations": 1,
201
209
  "quality": {
202
210
  "reviewCount": 3,
@@ -2,7 +2,8 @@
2
2
 
3
3
  `orchestra commands manifest --json` is the supported discovery surface for
4
4
  automation. Entries include command text, required and optional flags, JSON
5
- support, compatibility status, and reusable schema/message references.
5
+ support, surface classification, compatibility status, contract version, exit
6
+ codes, error shape, and reusable schema/message references.
6
7
 
7
8
  ## Compatibility
8
9
 
@@ -11,6 +12,22 @@ support, compatibility status, and reusable schema/message references.
11
12
  - `experimental`: command is intended for humans or workflow mutation and should
12
13
  not be consumed as a stable machine contract unless a future schema is linked.
13
14
 
15
+ ## Surfaces
16
+
17
+ - `public`: stable automation contract for project workflows, documentation,
18
+ bootstrap files, and CI usage.
19
+ - `experimental`: supported CLI behavior, but not a frozen machine contract for
20
+ `1.0.0`.
21
+ - `internal`: implementation or adapter support surface. It may expose `--json`
22
+ for local tooling, but it is not part of the public 1.0 automation contract.
23
+
24
+ Public commands use contract version `1.0`, exit codes `0` and `1`, and the
25
+ generic JSON error contract unless a command-specific schema replaces it.
26
+
27
+ Config migration is exposed as a public contract through
28
+ `orchestra config migrate --json`. It is dry-run by default and requires
29
+ `--apply` before writing `.agent-workflow/config.json`.
30
+
14
31
  ## Reusable Contracts
15
32
 
16
33
  - `schemas/commands/generic-json-output.schema.json` defines the baseline JSON
@@ -17,16 +17,48 @@ orchestra commands manifest --json
17
17
  Core commands are the first screen for a new project or a production delivery
18
18
  workflow. They are stable enough to appear in onboarding and public examples.
19
19
 
20
- | Job | Commands |
21
- | --- | --- |
22
- | Install and verify | `orchestra version`, `orchestra upgrade --smoke --json` |
23
- | Initialize workspace | `orchestra init`, `orchestra health --json`, `orchestra status` |
24
- | Create and inspect work | `orchestra task add`, `orchestra task list`, `orchestra task show` |
25
- | Run governed delivery | `orchestra workflow run`, `orchestra workflow runs` |
26
- | Resolve workflow gates | `orchestra decision add`, `orchestra workflow gate-approve` |
27
- | Record delivery proof | `orchestra evidence add`, `orchestra review` |
28
- | Sync tracker state | `orchestra github sync --issue <number>` |
29
- | Check release readiness | `orchestra release check --json`, `orchestra release candidate --dry-run --json` |
20
+ For first visible value, show this compact sequence before introducing every
21
+ artifact type:
22
+
23
+ ```bash
24
+ orchestra init
25
+ orchestra health --json
26
+ orchestra task add --id DEMO-001 --title "Ship a governed README update" --owner developer --paths "README.md"
27
+ orchestra workflow run --task DEMO-001 --gates none
28
+ orchestra status
29
+ orchestra release candidate --dry-run --json
30
+ ```
31
+
32
+ For production delivery, follow with estimates, architecture sizing decisions,
33
+ human gates, evidence, QA reviews, and `orchestra release check --json`.
34
+
35
+ | Job | Commands |
36
+ | ----------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
37
+ | Install and verify | `orchestra version`, `orchestra upgrade --smoke --json` |
38
+ | Initialize workspace | `orchestra init`, `orchestra health --json`, `orchestra status` |
39
+ | Create and inspect work | `orchestra task add`, `orchestra task list`, `orchestra task show` |
40
+ | Run governed delivery | `orchestra workflow run`, `orchestra workflow runs` |
41
+ | Inspect workflow shape and playbooks | `orchestra workflow phase-plan --task <id>`, `orchestra workflow render --task <id> --phase <phase>` |
42
+ | Resolve workflow gates and clarifications | `orchestra decision add`, `orchestra workflow gate-approve`, `orchestra workflow clarify`, `orchestra workflow clarify-respond` |
43
+ | Record delivery proof | `orchestra evidence add`, `orchestra review` |
44
+ | Plan QA automation | `orchestra qa coverage --task <id>`, `orchestra playwright plan --task <id>` |
45
+ | Sync tracker state | `orchestra github sync --issue <number>`, `orchestra tracker sync --tracker <provider> --remote <id> --issue-file <file>` |
46
+ | Check release readiness | `orchestra release check --json`, `orchestra release candidate --dry-run --json` |
47
+
48
+ ## Run Command Guidance
49
+
50
+ `orchestra workflow run` is the governed delivery lifecycle. It creates phase
51
+ sub-tasks, handoffs, run state, and gates across PM, PO, Architect, Developer,
52
+ QA, and Release. Use it for product work, issue delivery, QA handoff, release
53
+ readiness, and dogfooding the end-to-end process.
54
+
55
+ `orchestra run` executes the task's local execution plan. It is useful for
56
+ lower-level plan execution and budget/fallback validation, but it does not
57
+ replace the autonomous lifecycle or its phase gates.
58
+
59
+ For the full workflow narrative, phase matrix, gate-versus-clarify decision
60
+ table, and playbook authoring guidance, see
61
+ [autonomous-workflow.md](autonomous-workflow.md).
30
62
 
31
63
  ## Advanced Commands
32
64
 
@@ -39,10 +71,25 @@ should be linked from onboarding, not mixed into first-run copy.
39
71
  - Runtime and skills: `orchestra runtime brief`,
40
72
  `orchestra runtime delegate-plan`, `orchestra skills plan`,
41
73
  `orchestra skills render`, `orchestra protocol render`.
74
+ - Extensions: `orchestra extensions list --json`,
75
+ `orchestra extensions validate --json`.
76
+ - Provider profiles: `orchestra model profile set`,
77
+ `orchestra model profile apply`, `orchestra model profile smoke`,
78
+ `orchestra model providers`.
79
+ - Generated-file operations: `orchestra refresh --check --json`,
80
+ `orchestra refresh --dry-run`, `orchestra refresh --force`,
81
+ `orchestra cursor canvas status --json`,
82
+ `orchestra cursor canvas sync --dry-run --json`.
42
83
  - Memory and source selection: `orchestra memory query`,
43
- `orchestra memory hook`, `orchestra sources list`, `orchestra lessons list`.
84
+ `orchestra memory hook`, `orchestra memory governance`,
85
+ `orchestra sources list`, `orchestra lessons list`,
86
+ `orchestra lessons archive`, `orchestra lessons redact`,
87
+ `orchestra lessons prune`.
44
88
  - Metrics and cost: `orchestra estimate`, `orchestra benchmark`,
45
89
  `orchestra burndown`, `orchestra usage`, `orchestra budget check`.
90
+ - Governance policy: `orchestra policy evaluate` checks routine, elevated,
91
+ major, and destructive action tiers from `.agent-workflow/policy.json` and
92
+ creates approval artifacts for major or destructive actions.
46
93
  - Local control surfaces: `orchestra web`, `orchestra serve`,
47
94
  `orchestra roles list`, `orchestra config show`.
48
95
 
@@ -67,5 +114,7 @@ unless a specific workflow needs them.
67
114
  ## Documentation Rule
68
115
 
69
116
  Do not duplicate the full command manifest in README or the site. Show the core
70
- path, then link to `orchestra commands manifest --json` and
71
- [command-contracts.md](command-contracts.md) for automation consumers.
117
+ path, then link to `orchestra -h` for human onboarding,
118
+ `orchestra help commands` for the full CLI catalog, and
119
+ `orchestra commands manifest --json` plus [command-contracts.md](command-contracts.md)
120
+ for automation consumers.
@@ -31,6 +31,7 @@ orchestra estimate \
31
31
  --sizing s \
32
32
  --solo-days 1 \
33
33
  --ai-unguided-days 0.5 \
34
+ --ai-guided-days 0.25 \
34
35
  --confidence medium
35
36
  ```
36
37
 
@@ -0,0 +1,83 @@
1
+ # Extension Contracts
2
+
3
+ Open Orchestra discovers local extensions from:
4
+
5
+ ```text
6
+ .agent-workflow/extensions/<extension-id>/manifest.json
7
+ ```
8
+
9
+ The manifest is metadata only. Discovery and validation do not import or execute
10
+ extension code.
11
+
12
+ ## Manifest
13
+
14
+ ```json
15
+ {
16
+ "id": "acme-tracker",
17
+ "name": "Acme Tracker Adapter",
18
+ "version": "1.0.0",
19
+ "compatibility": { "orchestra": "^1.0.0" },
20
+ "extensionPoints": ["tracker-adapter"],
21
+ "entry": "adapter.js",
22
+ "capabilities": ["normalized-issue-sync"],
23
+ "riskAreas": ["network", "tracker-state"]
24
+ }
25
+ ```
26
+
27
+ Supported extension points for the 1.0 contract are:
28
+
29
+ - `skill`
30
+ - `tracker-adapter`
31
+ - `provider-adapter`
32
+ - `phase-playbook`
33
+ - `refresh-target`
34
+
35
+ `entry` must be a relative path inside the extension directory. Absolute paths
36
+ and traversal are rejected. Provider adapter extensions are currently
37
+ metadata-only until runtime provider loading is stabilized.
38
+
39
+ ## Commands
40
+
41
+ ```bash
42
+ orchestra extensions list --json
43
+ orchestra extensions validate --json
44
+ ```
45
+
46
+ ## Provider-Like Example
47
+
48
+ ```json
49
+ {
50
+ "id": "acme-models",
51
+ "name": "Acme Models Provider",
52
+ "version": "1.0.0",
53
+ "compatibility": { "orchestra": "^1.0.0" },
54
+ "extensionPoints": ["provider-adapter"],
55
+ "entry": "provider.js",
56
+ "capabilities": ["chat-completions", "json-mode"],
57
+ "riskAreas": ["secrets", "network"]
58
+ }
59
+ ```
60
+
61
+ ## Tracker-Like Example
62
+
63
+ ```json
64
+ {
65
+ "id": "acme-tracker",
66
+ "name": "Acme Tracker Adapter",
67
+ "version": "1.0.0",
68
+ "compatibility": { "orchestra": "^1.0.0" },
69
+ "extensionPoints": ["tracker-adapter"],
70
+ "entry": "tracker.js",
71
+ "capabilities": ["normalized-issue-sync"],
72
+ "riskAreas": ["network", "tracker-state"]
73
+ }
74
+ ```
75
+
76
+ ## Stability
77
+
78
+ Stable for 1.0.0: manifest shape, discovery path, extension point names, local
79
+ path safety validation, and JSON output from `extensions list` and
80
+ `extensions validate`.
81
+
82
+ Experimental: dynamic code loading, provider runtime registration, tracker live
83
+ transport execution, and generated-file refresh target execution.