@jterrats/open-orchestra 1.1.2 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/AGENTS.md +14 -1
  2. package/CHANGELOG.md +48 -0
  3. package/CLAUDE.md +14 -1
  4. package/README.md +21 -6
  5. package/dist/autonomous-phase-lifecycle.js +52 -2
  6. package/dist/autonomous-phase-lifecycle.js.map +1 -1
  7. package/dist/autonomous-run-state.d.ts +3 -1
  8. package/dist/autonomous-run-state.js +28 -2
  9. package/dist/autonomous-run-state.js.map +1 -1
  10. package/dist/autonomous-run-store.js +9 -0
  11. package/dist/autonomous-run-store.js.map +1 -1
  12. package/dist/benchmark.js +54 -6
  13. package/dist/benchmark.js.map +1 -1
  14. package/dist/capability-commands.d.ts +2 -0
  15. package/dist/capability-commands.js +82 -0
  16. package/dist/capability-commands.js.map +1 -0
  17. package/dist/capture-commands.d.ts +16 -0
  18. package/dist/capture-commands.js +70 -0
  19. package/dist/capture-commands.js.map +1 -0
  20. package/dist/cli-capability-catalog.d.ts +10 -0
  21. package/dist/cli-capability-catalog.js +110 -0
  22. package/dist/cli-capability-catalog.js.map +1 -0
  23. package/dist/cli-capability-data.d.ts +2 -0
  24. package/dist/cli-capability-data.js +233 -0
  25. package/dist/cli-capability-data.js.map +1 -0
  26. package/dist/cli-capability-renderer.d.ts +2 -0
  27. package/dist/cli-capability-renderer.js +43 -0
  28. package/dist/cli-capability-renderer.js.map +1 -0
  29. package/dist/cli-capability-types.d.ts +30 -0
  30. package/dist/cli-capability-types.js +2 -0
  31. package/dist/cli-capability-types.js.map +1 -0
  32. package/dist/command-manifest.js +11 -5
  33. package/dist/command-manifest.js.map +1 -1
  34. package/dist/command-routes-integrations.js +2 -1
  35. package/dist/command-routes-integrations.js.map +1 -1
  36. package/dist/command-routes.js +8 -1
  37. package/dist/command-routes.js.map +1 -1
  38. package/dist/commands.d.ts +4 -2
  39. package/dist/commands.js +7 -2
  40. package/dist/commands.js.map +1 -1
  41. package/dist/constants.js +2 -0
  42. package/dist/constants.js.map +1 -1
  43. package/dist/delivery-commands.js +1 -0
  44. package/dist/delivery-commands.js.map +1 -1
  45. package/dist/delivery-dashboard-charts.js +7 -3
  46. package/dist/delivery-dashboard-charts.js.map +1 -1
  47. package/dist/delivery-dashboard-types.d.ts +4 -0
  48. package/dist/delivery-dashboard.js +6 -0
  49. package/dist/delivery-dashboard.js.map +1 -1
  50. package/dist/effort-classification.d.ts +1 -0
  51. package/dist/effort-classification.js +15 -0
  52. package/dist/effort-classification.js.map +1 -1
  53. package/dist/fs-utils.js +6 -5
  54. package/dist/fs-utils.js.map +1 -1
  55. package/dist/mcp-integrations.d.ts +9 -2
  56. package/dist/mcp-integrations.js +50 -13
  57. package/dist/mcp-integrations.js.map +1 -1
  58. package/dist/mcp-oauth-proxy.d.ts +8 -0
  59. package/dist/mcp-oauth-proxy.js +25 -0
  60. package/dist/mcp-oauth-proxy.js.map +1 -1
  61. package/dist/mcp-runtime-config.d.ts +55 -0
  62. package/dist/mcp-runtime-config.js +252 -0
  63. package/dist/mcp-runtime-config.js.map +1 -0
  64. package/dist/mcp-tool-adapter.js +2 -4
  65. package/dist/mcp-tool-adapter.js.map +1 -1
  66. package/dist/model-providers.d.ts +1 -1
  67. package/dist/model-providers.js +1 -1
  68. package/dist/model-providers.js.map +1 -1
  69. package/dist/ollama-provider.d.ts +7 -0
  70. package/dist/ollama-provider.js +41 -15
  71. package/dist/ollama-provider.js.map +1 -1
  72. package/dist/phase-playbooks.js +17 -0
  73. package/dist/phase-playbooks.js.map +1 -1
  74. package/dist/planning-commands.js +5 -0
  75. package/dist/planning-commands.js.map +1 -1
  76. package/dist/qa-evidence-validation.d.ts +1 -1
  77. package/dist/qa-evidence-validation.js +3 -1
  78. package/dist/qa-evidence-validation.js.map +1 -1
  79. package/dist/quality-contracts.js +1 -1
  80. package/dist/quality-contracts.js.map +1 -1
  81. package/dist/release-inclusion.js +1 -1
  82. package/dist/release-inclusion.js.map +1 -1
  83. package/dist/runtime-bootstrap.js +13 -0
  84. package/dist/runtime-bootstrap.js.map +1 -1
  85. package/dist/runtime-child-prompt.js +25 -0
  86. package/dist/runtime-child-prompt.js.map +1 -1
  87. package/dist/runtime-commands.d.ts +3 -0
  88. package/dist/runtime-commands.js +96 -0
  89. package/dist/runtime-commands.js.map +1 -1
  90. package/dist/runtime-events.d.ts +48 -0
  91. package/dist/runtime-events.js +255 -0
  92. package/dist/runtime-events.js.map +1 -0
  93. package/dist/runtime-execution-renderer.js +8 -0
  94. package/dist/runtime-execution-renderer.js.map +1 -1
  95. package/dist/runtime-parent-actions.js +3 -0
  96. package/dist/runtime-parent-actions.js.map +1 -1
  97. package/dist/runtime-spawn-bridge.js +15 -5
  98. package/dist/runtime-spawn-bridge.js.map +1 -1
  99. package/dist/sonar-preflight.js +21 -1
  100. package/dist/sonar-preflight.js.map +1 -1
  101. package/dist/sprint-metrics.js +7 -1
  102. package/dist/sprint-metrics.js.map +1 -1
  103. package/dist/task-graph-commands.js +48 -0
  104. package/dist/task-graph-commands.js.map +1 -1
  105. package/dist/tool-commands.d.ts +1 -0
  106. package/dist/tool-commands.js +129 -14
  107. package/dist/tool-commands.js.map +1 -1
  108. package/dist/types/metrics.d.ts +5 -1
  109. package/dist/types/runtime.d.ts +3 -0
  110. package/dist/types/tasks.d.ts +24 -1
  111. package/dist/types/workflow-run.d.ts +16 -0
  112. package/dist/types.d.ts +5 -3
  113. package/dist/types.js.map +1 -1
  114. package/dist/validation.js +6 -0
  115. package/dist/validation.js.map +1 -1
  116. package/dist/verifier-contracts.d.ts +29 -0
  117. package/dist/verifier-contracts.js +184 -0
  118. package/dist/verifier-contracts.js.map +1 -0
  119. package/dist/web-api-read-routes.d.ts +1 -0
  120. package/dist/web-api-read-routes.js +6 -1
  121. package/dist/web-api-read-routes.js.map +1 -1
  122. package/dist/web-api.js +58 -1
  123. package/dist/web-api.js.map +1 -1
  124. package/dist/web-console/assets/index-Cip-y4WE.css +1 -0
  125. package/dist/web-console/assets/index-CuWjFxss.js +11 -0
  126. package/dist/web-console/index.html +2 -2
  127. package/dist/workflow-approval-service.js +36 -2
  128. package/dist/workflow-approval-service.js.map +1 -1
  129. package/dist/workflow-continuation-policy.d.ts +3 -0
  130. package/dist/workflow-continuation-policy.js +20 -0
  131. package/dist/workflow-continuation-policy.js.map +1 -0
  132. package/dist/workflow-correction-context.d.ts +30 -0
  133. package/dist/workflow-correction-context.js +117 -0
  134. package/dist/workflow-correction-context.js.map +1 -0
  135. package/dist/workflow-evidence-service.js +31 -22
  136. package/dist/workflow-evidence-service.js.map +1 -1
  137. package/dist/workflow-gates.js +6 -0
  138. package/dist/workflow-gates.js.map +1 -1
  139. package/dist/workflow-handoff-assessment.js +10 -2
  140. package/dist/workflow-handoff-assessment.js.map +1 -1
  141. package/dist/workflow-handoff-contract.d.ts +7 -0
  142. package/dist/workflow-handoff-contract.js +18 -0
  143. package/dist/workflow-handoff-contract.js.map +1 -1
  144. package/dist/workflow-phase-planner.js +56 -22
  145. package/dist/workflow-phase-planner.js.map +1 -1
  146. package/dist/workflow-return-routing.d.ts +14 -0
  147. package/dist/workflow-return-routing.js +61 -0
  148. package/dist/workflow-return-routing.js.map +1 -0
  149. package/dist/workflow-run-commands.js +173 -52
  150. package/dist/workflow-run-commands.js.map +1 -1
  151. package/dist/workflow-services.js +6 -3
  152. package/dist/workflow-services.js.map +1 -1
  153. package/dist/workflow-task-service.js +15 -7
  154. package/dist/workflow-task-service.js.map +1 -1
  155. package/docs/autonomous-workflow.md +10 -0
  156. package/docs/ci-self-hosted-runners.md +82 -0
  157. package/docs/command-contracts.md +21 -0
  158. package/docs/e2e-test-batteries.md +115 -25
  159. package/docs/runtime-adapters.md +4 -0
  160. package/docs/runtime-llm-flow.md +6 -3
  161. package/docs/security-env-vars.md +1 -0
  162. package/docs/sonar-quality-gates.md +19 -9
  163. package/docs/verifier-contracts.md +87 -0
  164. package/package.json +7 -3
  165. package/site/dist/assets/{index-Bi8l6tCE.js → index-B1Xsl_Kg.js} +1 -1
  166. package/site/dist/index.html +1 -1
  167. package/dist/web-console/assets/index--_RLc7Zp.js +0 -11
  168. package/dist/web-console/assets/index-Cxo3REa4.css +0 -1
@@ -16,41 +16,92 @@ entry points a user or CI runner actually executes.
16
16
  flows.
17
17
  - External provider, GitHub, Sonar, or network-dependent paths must be opt-in
18
18
  and must report skipped or deferred evidence when offline.
19
+ - Local provider smokes must default to mock or loopback/private endpoints,
20
+ reject public internet egress unless a trusted-provider policy explicitly
21
+ allows it, and record redacted provider provenance instead of credentials,
22
+ raw prompts, hostnames, or ports.
23
+ - Stubbed provider/MCP batteries are contract evidence only. They may block PRs
24
+ when CLI/API/workflow contracts regress, but they must not satisfy
25
+ real-provider acceptance criteria for hosted auth, latency, rate limits,
26
+ provider-side failures, or production model behavior.
19
27
  - A release can ship only when P0 batteries pass or a release-manager accepted
20
28
  risk records the unavailable environment and compensating evidence.
21
29
 
30
+ ## Product Journey Coverage Matrix
31
+
32
+ | Journey | Primary surface | Environment model | Expected result | Evidence strategy | Current battery | Child story recommendation |
33
+ | ------------------------------- | ---------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------- | ------------------------------------- |
34
+ | First workspace setup | CLI | Local `/tmp` workspace, source and packaged binary | User can initialize a project, preserve existing files, and see target runtime guidance | Assert exit codes, generated files, managed block boundaries, package version, and human/JSON output | Local and installed CLI onboarding | GH-533-A source/package setup parity |
35
+ | Runtime-specific guidance regen | CLI + generated docs | Simulated Codex, Claude, Cursor, VS Code, Windsurf, generic workspaces | `init --force` regenerates missing managed guidance without mixing target-specific instructions | Filesystem diffs plus QA coverage JSON proving regenerated artifacts map back to acceptance criteria | Init refresh environments | GH-533-B runtime regen matrix |
36
+ | Workflow delivery lifecycle | CLI workflow | Isolated local workspace | Task, estimate, phase handoffs, gate pause/resume, QA failback, evidence, and release readiness work end to end | JSON event stream, handoff artifacts, review/evidence records, and release-readiness before/after assertions | Workflow lifecycle CLI | GH-533-C workflow evidence lifecycle |
37
+ | Runtime delegation | CLI runtime actions | Local runtime queue with detached sessions | Manual and multi-squad delegation queue safely, preserve parent availability, and reconcile lifecycle state | Spawn request artifacts, lifecycle commands, runtime session lists, queue state, and completion events | Runtime manual queue, multi-squad runtime | GH-533-D runtime delegation contracts |
38
+ | Web console operator flow | Browser app + API | Local web server and persisted test workspace | Operator can inspect tasks, costs, providers, delegation, workflow progress, recovery, and artifacts | Playwright visible assertions, API persistence checks, responsive/keyboard coverage, screenshots/traces on failure | Browser console | GH-533-E web console operator journey |
39
+ | Chat and provider flow | Browser app + API + provider | Stubbed provider by default, opt-in local provider | Chat sends scoped messages, streams usable status, records provider provenance, and handles failures safely | API response assertions, SSE/event trace, DOM state, provider request fixture, redacted logs | Browser console, provider opt-in | GH-533-F chat provider E2E |
40
+ | Stubbed provider + MCP contract | CLI + web API + workflow | Built-in fake provider and scoped MCP config in an isolated workspace | PR-safe provider/MCP contract proves fake-provider routing, scoped MCP status, redacted evidence, and hosted provider fail-closed policy | CLI/API JSON comparisons, workflow executor provenance, redacted contract evidence report, hosted-provider negative assertion | Stubbed provider MCP contract | GH-551 GH-528A PR-safe contract E2E |
41
+ | Public docs and site discovery | Browser site | Local static/site build | Users can find docs, navigate core concepts, inspect architecture, and read mobile-safe pages | Playwright navigation, search, docs catalog, responsive text-fit, and no raw GitHub dead-end assertions | Public site, docs/site content source | GH-533-G docs discovery E2E |
42
+ | Security-sensitive operations | CLI + API + browser | Local sandbox with hostile fixtures | Unsafe file paths, shell patterns, secrets, stack traces, and telemetry leaks are blocked or redacted | Hostile fixture assertions, redacted command/API evidence, browser-visible safe errors | Security-sensitive operations | GH-533-H security boundary E2E |
43
+ | Packaged renderer behavior | CLI package path | Built package artifacts | Diagram/report commands use packaged paths and produce user-safe output without stale source-only assumptions | Compiled command output, package file list, XML/SVG/HTML/source invariant checks, and explicit missing messaging | Renderer packaged paths | GH-540 packaged artifact E2E |
44
+ | Benchmark/dashboard reporting | CLI metrics + dashboard JSON | Isolated workflow telemetry with completed and measurable stories | Velocity, calibration, and dashboard distinguish completed stories from measurable actuals without fake duration | CLI JSON assertions, dashboard metric assertions, actual source checks, and calibration sample-size checks | Benchmark dashboard duration smoke | GH-541 benchmark dashboard E2E |
45
+ | MCP scoped OAuth diagnostics | CLI + web API | Runtime-scoped MCP config in an isolated workspace | CLI and web API report matching runtime OAuth state, redact secrets, and hide cross-workspace MCP config | CLI/API JSON comparisons, redaction assertions, scoped workspace config checks | MCP scoped OAuth smoke | GH-542 MCP OAuth E2E |
46
+ | Release candidate readiness | CLI + CI artifacts | Local dry run, CI/self-hosted runner | Candidate package contents, version/tag policy, release matrix, and quality gates are release-ready | `npm pack --dry-run`, release check JSON, CI run links/logs, and accepted-risk records for unavailable environments | Package release dry run, source quality | GH-533-J release candidate E2E |
47
+
48
+ ## Regeneration Plan
49
+
50
+ Regenerate E2E scenarios when product behavior changes one of these contracts:
51
+
52
+ 1. Public CLI command shape, command manifest, JSON output, generated files, or
53
+ package entry points change.
54
+ 2. Runtime profiles, phase playbooks, agent skills/rules, spawn request shape,
55
+ or managed guidance files change.
56
+ 3. Web console routes, visible task/provider/workflow states, API persistence,
57
+ SSE events, or recovery flows change.
58
+ 4. Provider integrations, MCP configuration, local/cloud execution policy,
59
+ secrets handling, or redaction boundaries change.
60
+ 5. Release, CI, package, tag, or self-hosted runner policy changes.
61
+
62
+ For each regeneration event, QA must update the matrix row, add or revise the
63
+ expected result, choose the real evidence surface, and record why lower-level
64
+ tests are or are not enough. Stubbed tests can stay P0 only when the product
65
+ contract is deterministic without external services; cloud, GitHub, Sonar,
66
+ provider, mobile, or desktop-dependent evidence must be P1/P2 opt-in with a
67
+ clear skip/deferred signal and owner.
68
+
22
69
  ## P0 Release-Blocking Batteries
23
70
 
24
- | Battery | Scope | Command | Minimum Assertions | Evidence |
25
- | ------------------------- | ---------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------ |
26
- | Source quality | Static checks, build, unit tests, workflow validation, secret scan, security audit | `npm run precommit` | exit code 0, no leaks, no audit blockers, workflow valid | command log |
27
- | Local CLI onboarding | Current source CLI in `/tmp` workspaces | `ORCHESTRA_NODE_SCRIPT=$PWD/bin/orchestra.js npm run test:e2e:init` | `--version`, `init`, `status`, `validate`, first-use task, handoff, evidence, release readiness | stdout/stderr, JSON output, filesystem assertions |
28
- | Installed CLI onboarding | Installed or packaged CLI in `/tmp` workspaces | `npm run test:e2e:init` after installing the candidate package | same assertions as local CLI onboarding, proving the packaged binary matches source behavior | stdout/stderr, JSON output, filesystem assertions, package version |
29
- | Browser console | Web console task, cost, provider, delegation, recovery, evidence, workflow, accessibility, artifacts | `npm run test:e2e` | visible state, API persistence, evidence attachment, lifecycle transitions, responsive/keyboard behavior | Playwright report, screenshots/traces on failure |
30
- | Public site | Documentation/site navigation, docs catalog, architecture viewer, mobile fit | `npm run test:e2e` | navigation order, local docs catalog search, no raw GitHub redirect for docs, mobile content fit | Playwright report |
31
- | Runtime manual queue | Manual runtime delegation in a `/tmp` workspace | `npm run test:e2e:runtime` | two active sessions, third manual `spawn-request` materializes `queued`, artifact includes lifecycle commands, `runtime sessions` lists queued session | stdout/stderr, JSON output, artifact content |
32
- | Init refresh environments | Simulated generic, Codex, Claude, Cursor, VS Code/GitHub Copilot, and Windsurf workspaces | `npm run test:e2e:init` | missing runtime guidance files regenerate on `init --force`, user content is preserved, managed blocks/frontmatter are updated only inside managed ranges, target-specific metadata excludes wrong-target content, generated-artifact evidence maps back to acceptance criteria | filesystem diff assertions, QA coverage JSON |
33
- | Workflow lifecycle CLI | CLI workflow run, gate, resume, QA failback, release readiness | `node --test e2e/workflow-lifecycle-cli.test.js` | task phases create handoffs, blocked QA routes back, routine gate resumes immediately, release readiness maps acceptance to evidence | JSON output, events, handoffs |
71
+ | Battery | Scope | Command | Minimum Assertions | Evidence |
72
+ | ------------------------- | ---------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
73
+ | Source quality | Static checks, build, unit tests, workflow validation, secret scan, security audit | `npm run ci:quality` | exit code 0, no leaks, no audit blockers, workflow valid | command log |
74
+ | Local CLI onboarding | Current source CLI in `/tmp` workspaces | `ORCHESTRA_NODE_SCRIPT=$PWD/bin/orchestra.js npm run test:e2e:init` | `--version`, `init`, `status`, `validate`, first-use task, handoff, evidence, release readiness | stdout/stderr, JSON output, filesystem assertions |
75
+ | Installed CLI onboarding | Installed or packaged CLI in `/tmp` workspaces | `npm run test:e2e:init` after installing the candidate package | same assertions as local CLI onboarding, proving the packaged binary matches source behavior | stdout/stderr, JSON output, filesystem assertions, package version |
76
+ | Browser console | Web console task, cost, provider, delegation, recovery, evidence, workflow, accessibility, artifacts | `npm run test:e2e` | visible state, API persistence, evidence attachment, lifecycle transitions, responsive/keyboard behavior | Playwright report, screenshots/traces on failure |
77
+ | Public site | Documentation/site navigation, docs catalog, architecture viewer, mobile fit | `npm run test:e2e` | navigation order, local docs catalog search, no raw GitHub redirect for docs, mobile content fit | Playwright report |
78
+ | Runtime manual queue | Manual runtime delegation in a `/tmp` workspace | `npm run test:e2e:runtime` | two active sessions, third manual `spawn-request` materializes `queued`, artifact includes lifecycle commands, `runtime sessions` lists queued session | stdout/stderr, JSON output, artifact content |
79
+ | Init refresh environments | Simulated generic, Codex, Claude, Cursor, VS Code/GitHub Copilot, and Windsurf workspaces | `npm run test:e2e:init` | missing runtime guidance files regenerate on `init --force`, user content is preserved, managed blocks/frontmatter are updated only inside managed ranges, target-specific metadata excludes wrong-target content, generated-artifact evidence maps back to acceptance criteria | filesystem diff assertions, QA coverage JSON |
80
+ | Workflow lifecycle CLI | CLI workflow run, gate, resume, QA failback, release readiness | `node --test e2e/workflow-lifecycle-cli.test.js` | task phases create handoffs, blocked QA routes back, routine gate resumes immediately, release readiness maps acceptance to evidence | JSON output, events, handoffs |
81
+ | Renderer packaged paths | Diagram lint command, artifact previews, and report renderer package contracts | `npm run build && node --test test/renderer-compiled-path.test.js e2e/packaged-renderer-smoke.test.js` | compiled CLI path emits Mermaid install guidance without stack traces, candidate package includes compiled renderer modules, previews preserve draw.io/Mermaid/ERD/TSX invariants, report render command absence is explicit | stdout/stderr, JSON output, package file list, XML/SVG/HTML/source invariant checks |
82
+ | Stubbed provider MCP | Built-in fake provider, scoped MCP fixture, provider/MCP web API status, and contract evidence | `npm run test:e2e:init` or `npm run build && node --test e2e/stubbed-provider-mcp-contract.test.js` | fake provider routing is visible through CLI/API, scoped MCP CLI/API status matches, hosted provider execution fails before direct provider API use, evidence is redacted and labeled contract evidence, and no hosted provider secret is required | CLI/API JSON output, workflow events, `.agent-workflow/evidence/*-report.md`, `reports/stubbed-provider-mcp-contract-evidence.json` |
34
83
 
35
84
  ## P1 High-Risk Regression Batteries
36
85
 
37
- | Battery | Scope | Command | Minimum Assertions | Evidence |
38
- | ------------------------------ | --------------------------------------------------------------------- | --------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------ |
39
- | Multi-squad runtime | Parallel squad delegation with queue and threshold policy | `npm run test:e2e:runtime` | independent sessions, non-blocking parent, queued sessions do not fall back to parent, completion order reconciles | JSON output, lifecycle events |
40
- | Acceptance evidence | CLI, API, browser, and deferred integration evidence | `node --test e2e/acceptance-evidence.test.js` | evidence maps to named acceptance criteria, deferred external validation requires owner and rationale | evidence artifacts |
41
- | Recovery and repair | Interrupted runs, stale locks, failed provider phases | `node --test e2e/recovery-cli.test.js` plus browser recovery coverage | recovery detects issue, repair requires confirmation, repaired state is observable | JSON output, before/after state |
42
- | Docs/site content source | Site content generated from docs and manifest | `npm run site:build && npm run test:e2e -- --grep docs` | docs render as human-friendly catalog, no markdown-only dead ends, search works | Playwright report |
43
- | Security-sensitive operations | File paths, shell execution, web writes, secrets, telemetry redaction | `npm run test:e2e:security` | path traversal blocked, unsafe writes rejected, secret-like data redacted, no raw stack traces | command/API evidence |
44
- | Ollama provider-backed runtime | Local OpenAI-compatible Ollama provider route in a `/tmp` workspace | `npm run test:e2e:runtime:ollama` | `model connect --provider ollama`, provider-backed developer phase, OpenAI-compatible request shape, provider provenance, no runtime subagent credentials in artifacts | stdout/stderr, JSON output, mock provider request, event log |
86
+ | Battery | Scope | Command | Minimum Assertions | Evidence |
87
+ | ------------------------------ | --------------------------------------------------------------------- | ----------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------ |
88
+ | Multi-squad runtime | Parallel squad delegation with queue and threshold policy | `npm run test:e2e:runtime` | independent sessions, non-blocking parent, queued sessions do not fall back to parent, completion order reconciles | JSON output, lifecycle events |
89
+ | Acceptance evidence | CLI, API, browser, and deferred integration evidence | `node --test e2e/acceptance-evidence.test.js` | evidence maps to named acceptance criteria, deferred external validation requires owner and rationale | evidence artifacts |
90
+ | Recovery and repair | Interrupted runs, stale locks, failed provider phases | `node --test e2e/recovery-cli.test.js` plus browser recovery coverage | recovery detects issue, repair requires confirmation, repaired state is observable | JSON output, before/after state |
91
+ | Docs/site content source | Site content generated from docs and manifest | `npm run site:build && npm run test:e2e -- --grep docs` | docs render as human-friendly catalog, no markdown-only dead ends, search works | Playwright report |
92
+ | Security-sensitive operations | File paths, shell execution, web writes, secrets, telemetry redaction | `npm run test:e2e:security` | path traversal blocked, unsafe writes rejected, secret-like data redacted, no raw stack traces | command/API evidence |
93
+ | Ollama provider-backed runtime | Local OpenAI-compatible Ollama provider route in a `/tmp` workspace | `npm run test:e2e:runtime:ollama` | `model connect --provider ollama`, provider-backed developer phase, OpenAI-compatible request shape, provider provenance, no runtime subagent credentials in artifacts | stdout/stderr, JSON output, mock provider request, event log |
94
+ | Benchmark dashboard duration | Benchmark-derived velocity, calibration, and delivery dashboard JSON | `npm run build && node --test e2e/benchmark-dashboard-duration.test.js` | completed stories are counted separately from stories with actual duration, completed-without-duration is visible, calibration sample size excludes null actuals | CLI JSON output, dashboard JSON, calibration JSON |
95
+ | MCP scoped OAuth smoke | Runtime-scoped MCP integration status through CLI and web API | `npm run build && node --test e2e/mcp-scoped-oauth-smoke.test.js` | CLI and web API agree for `--runtime claude-cli`, diagnostics redact OAuth/API-key material, cross-workspace MCP config is not exposed | CLI/API JSON output, scoped config fixture, redaction checks |
45
96
 
46
97
  ## P2 Extended Confidence Batteries
47
98
 
48
- | Battery | Scope | Command | Minimum Assertions | Evidence |
49
- | -------------------------- | ----------------------------------------------------- | ------------------------------------------------------------- | ----------------------------------------------------------------------------------------- | ---------------------------- |
50
- | Tracker and GitHub sync | Issue import/export and close readiness | opt-in CI job with network credentials | labels, comments, close gate, release readiness, no secret exposure | sanitized logs |
51
- | Sonar quality loop | Local or remote Sonar import and release gate mapping | configured Sonar workflow or local compose job | insights imported, release readiness reflects quality gate, unavailable token is explicit | artifact import report |
52
- | Provider-backed delegation | OpenAI, Anthropic, Gemini, Ollama, fake/local provider-backed routes plus runtime-native separation | opt-in provider E2E plus focused wrapper/unit coverage | registry routing, explicit direct API policy, forbidden fallback, budget and scheduler blocks, redacted evidence, no silent runtime-native fallback | redacted provider provenance |
53
- | Package release dry run | npm package contents and release check | `npm pack --dry-run --json && orchestra release check --json` | generated/private state excluded, version/tag policy valid, release readiness complete | package list, release report |
99
+ | Battery | Scope | Command | Minimum Assertions | Evidence |
100
+ | -------------------------- | --------------------------------------------------------------------------------------------------- | ------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------- |
101
+ | Tracker and GitHub sync | Issue import/export and close readiness | opt-in CI job with network credentials | labels, comments, close gate, release readiness, no secret exposure | sanitized logs |
102
+ | Sonar quality loop | Local or remote Sonar import and release gate mapping | configured Sonar workflow or local compose job | insights imported, release readiness reflects quality gate, unavailable token is explicit | artifact import report |
103
+ | Provider-backed delegation | OpenAI, Anthropic, Gemini, Ollama, fake/local provider-backed routes plus runtime-native separation | opt-in provider E2E plus focused wrapper/unit coverage | registry routing, explicit direct API policy, forbidden fallback, local-provider public egress rejection, timeout handling, budget and scheduler blocks, redacted evidence, no silent runtime-native fallback | redacted provider provenance |
104
+ | Package release dry run | npm package contents and release check | `npm pack --dry-run --json && orchestra release check --json` | generated/private state excluded, version/tag policy valid, release readiness complete | package list, release report |
54
105
 
55
106
  ## Required `/tmp` Fixture Patterns
56
107
 
@@ -81,9 +132,29 @@ the packaging/install path is wrong.
81
132
  3. Add `e2e/workflow-lifecycle-cli.test.js` for workflow run, gate, failback,
82
133
  resume, and release readiness.
83
134
  4. Add `e2e/runtime-multi-squad.test.js` for async background squad behavior.
84
- 5. Add focused security and acceptance-evidence E2E only where unit tests cannot
135
+ 5. Add `e2e/stubbed-provider-mcp-contract.test.js` for PR-safe provider/MCP
136
+ contract evidence that cannot be confused with real-provider evidence.
137
+ 6. Add focused security and acceptance-evidence E2E only where unit tests cannot
85
138
  prove the user-visible contract.
86
139
 
140
+ ## Executable Child Story Recommendations
141
+
142
+ | Story | Scope | First executable check | Acceptance criteria seed |
143
+ | -------- | ---------------------------- | --------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
144
+ | GH-533-A | Source/package setup parity | Extend `e2e/init-onboarding.test.js` to run source and packaged modes from the same fixture table | Source and installed binaries initialize equivalent state, report equivalent JSON contracts, and identify candidate package version |
145
+ | GH-533-B | Runtime regen matrix | Add fixture rows for each supported runtime target and compare regenerated managed blocks | Regeneration preserves user content, writes only managed ranges, and never emits wrong-target instructions |
146
+ | GH-533-C | Workflow evidence lifecycle | Expand `e2e/workflow-lifecycle-cli.test.js` around AC-to-evidence and QA failback assertions | Release readiness stays blocked until evidence maps to acceptance criteria and blocked reviews return to the responsible role |
147
+ | GH-533-D | Runtime delegation contracts | Add queue and lifecycle assertions shared by manual and multi-squad runtime tests | Parent remains available, queued actions are explicit, child lifecycle is observable, and no fallback silently runs in parent |
148
+ | GH-533-E | Web console operator journey | Create a Page Object backed journey for task, provider, workflow, artifacts, and recovery panels | Browser UI proves visible state, persisted API effects, keyboard/responsive behavior, and failure recovery |
149
+ | GH-533-F | Chat provider E2E | Add stubbed API/SSE/DOM checks plus opt-in local provider smoke | Chat scope, provider provenance, streamed status, redacted errors, and retry/recovery behavior are asserted |
150
+ | GH-533-G | Docs discovery E2E | Split docs/site grep coverage into catalog, search, architecture, and mobile scenarios | Users can find docs without raw repository dead ends, and mobile pages have no clipping or overlap |
151
+ | GH-533-H | Security boundary E2E | Add hostile fixture coverage for paths, shell-like input, secrets, telemetry, and stack traces | Unsafe operations fail closed and all user-facing/API evidence is redacted |
152
+ | GH-540 | Packaged artifact E2E | Promote compiled renderer/package assertions into a package-candidate smoke | Packaged commands do not depend on source-only paths and produce inspectable artifacts or explicit unavailable messages |
153
+ | GH-541 | Benchmark dashboard E2E | Add velocity, calibration, and dashboard JSON assertions for completed stories without measured duration | Reports count completed work without inventing duration and calibration uses only measurable actuals |
154
+ | GH-542 | MCP OAuth scoped E2E | Add CLI/API status parity assertions for runtime-scoped MCP OAuth diagnostics | Scoped OAuth status is consistent across CLI/API and diagnostics stay redacted without cross-workspace leakage |
155
+ | GH-551 | Stubbed provider/MCP E2E | Add fake-provider workflow plus MCP dry-run CLI assertions in an isolated workspace | PR-safe contract evidence proves provider/MCP integration without secrets or network calls and never replaces real-provider smoke |
156
+ | GH-533-J | Release candidate E2E | Add a release dry-run evidence bundle around package contents, tag readiness, CI logs, and release matrix | Release candidate proves package contents, version/tag policy, quality gate, rollback evidence, and accepted-risk records |
157
+
87
158
  ## Opt-In Provider Runtime Batteries
88
159
 
89
160
  Provider-backed runtime batteries are not part of default CI because they may
@@ -94,6 +165,25 @@ workflow provenance, and no-secret behavior without requiring a real Ollama
94
165
  daemon. A separate real-model smoke can be run with `ORCHESTRA_OLLAMA_SMOKE=1`
95
166
  when validating a local model installation.
96
167
 
168
+ ## Stubbed Provider And MCP Contract Evidence
169
+
170
+ `e2e/stubbed-provider-mcp-contract.test.js` is the default PR/push-safe provider
171
+ and MCP contract suite. It is included in `npm run test:e2e:init`, which the
172
+ normal CI dogfood job runs without hosted provider secrets. The focused command
173
+ is:
174
+
175
+ ```bash
176
+ npm run build && node --test e2e/stubbed-provider-mcp-contract.test.js
177
+ ```
178
+
179
+ The suite uses the built-in `fake` provider and workspace-scoped MCP config in a
180
+ temporary workspace. It asserts real CLI and web API outputs, records a
181
+ redacted report through `orchestra evidence add`, and labels that report
182
+ `contract evidence`. That evidence proves the local provider/MCP contract only.
183
+ It must not be used as acceptance evidence for real OpenAI, Anthropic, Gemini,
184
+ Ollama, or hosted MCP behavior. Real-provider acceptance remains covered only by
185
+ protected/manual smoke suites with explicit secrets and trusted-run policy.
186
+
97
187
  ## Definition Of Done
98
188
 
99
189
  An E2E battery is complete only when it has:
@@ -499,6 +499,10 @@ parent-agent fallback reason. `subagents` requires runtime-native support and
499
499
  fails fast if the runtime cannot satisfy it. `single-agent` forces the parent
500
500
  agent path and records that choice in phase provenance.
501
501
 
502
+ Gate mode is independent from execution mode: `--gates none` suppresses human
503
+ gate pauses, while `--phase-execution single-agent` is what prevents detached
504
+ runtime parent actions and subagent lifecycle requirements.
505
+
502
506
  When no task or role executor is configured and the default executor is
503
507
  `generic-runtime`, `auto` and strict `subagents` mode infer the active runtime
504
508
  from `OPEN_ORCHESTRA_ACTIVE_RUNTIME`, then from
@@ -155,9 +155,12 @@ adapter also reads optional `GEMINI_BASE_URL`; the base URL must be HTTPS and
155
155
  defaults to `https://generativelanguage.googleapis.com`.
156
156
 
157
157
  The Ollama adapter defaults to `http://localhost:11434/v1` and uses the
158
- OpenAI-compatible `/chat/completions` endpoint. Set `OLLAMA_BASE_URL` for a
159
- custom local or remote endpoint and `OLLAMA_API_KEY` when the endpoint requires
160
- one.
158
+ OpenAI-compatible `/chat/completions` endpoint. Set `OLLAMA_BASE_URL` only for
159
+ loopback or private-network endpoints; public internet hosts are rejected unless
160
+ a future trusted-provider policy explicitly implements that exception. Endpoint
161
+ URLs must use `http` or `https`, must not include credentials, and are recorded
162
+ in evidence only as redacted local-provider provenance. Set `OLLAMA_API_KEY`
163
+ when the local endpoint requires one.
161
164
 
162
165
  ## Runtime Execution
163
166
 
@@ -18,6 +18,7 @@ or local secret files.
18
18
  - `OPENAI_API_KEY_FILE`: optional OpenAI credential file path.
19
19
  - `OPEN_ORCHESTRA_CLAUDE_NATIVE_CALLBACK`: local Claude native callback marker.
20
20
  - `OPEN_ORCHESTRA_CLAUDE_NATIVE_CHILD_ID`: Claude native child id marker.
21
+ - `OPEN_ORCHESTRA_COMMAND_MANIFEST_OUT`: optional command manifest check output path.
21
22
  - `ORCHESTRA_GITLEAKS_BIN`: optional absolute gitleaks binary override.
22
23
  - `ORCHESTRA_SECRET_SCAN_FORCE_FALLBACK`: forces fallback secret scanning.
23
24
  - `ORCHESTRA_SKIP_UPDATE_CHECK`: disables package update checks.
@@ -64,10 +64,13 @@ Optional GitHub variables:
64
64
  quality gate fails.
65
65
  - `SONAR_RUNNER`: set to `self-hosted` to run the Sonar workflow on a local
66
66
  runner that can reach the shared SonarQube runtime directly. When this is set,
67
- the workflow uses `http://localhost:9001` by default and skips Cloudflare
68
- Access service-token checks.
67
+ the workflow resolves `SONAR_LOCAL_HOST_URL`, then `SONAR_HOST_URL`, then
68
+ `http://localhost:9001`. Cloudflare Access service-token checks still run
69
+ when the resolved URL uses `SONAR_HOST_URL` and Cloudflare credentials are
70
+ configured.
69
71
  - `SONAR_LOCAL_HOST_URL`: optional override for self-hosted runner mode when the
70
- runner reaches SonarQube through a different local-only URL.
72
+ runner reaches SonarQube through a different local-only URL. Prefer this over
73
+ `SONAR_HOST_URL` when the self-hosted runner should use a direct local path.
71
74
 
72
75
  The workflow skips analysis when `SONAR_TOKEN` is not configured. This keeps
73
76
  forks and offline development usable. For private repositories, keep
@@ -100,6 +103,10 @@ API access, issue API access, and security hotspot API access. It redacts the
100
103
  token, host URL, and Cloudflare Access service token values from diagnostic
101
104
  output. `hotspots` is a warning when unavailable because some Sonar tokens can
102
105
  analyze and read issues while hotspot review permissions are managed separately.
106
+ In local `sonarqube-local` mode, a branch-scoped quality gate `404` falls back
107
+ to the default branch when the project itself is readable, because SonarQube
108
+ Community Edition does not expose branch analysis unless an optional add-on or
109
+ commercial feature is installed.
103
110
 
104
111
  Common remediation:
105
112
 
@@ -266,12 +273,12 @@ cd ~/dev/sonarqube_jterrats_dev
266
273
  docker compose up -d
267
274
  ```
268
275
 
269
- When `SONAR_RUNNER=self-hosted`, the workflow resolves SonarQube to
270
- `http://localhost:9001` unless `SONAR_LOCAL_HOST_URL` is set. This intentionally
271
- ignores `SONAR_HOST_URL`, so organization-level Cloudflare tunnel secrets do not
272
- pull local machine analysis back through Zero Trust. Cloudflare Access remains
273
- available for human remote browser usage and for GitHub-hosted runner access to
274
- private SonarQube only. The CI scan uses
276
+ When `SONAR_RUNNER=self-hosted`, the workflow resolves SonarQube in this order:
277
+ `SONAR_LOCAL_HOST_URL`, `SONAR_HOST_URL`, then `http://localhost:9001`.
278
+ Use `SONAR_LOCAL_HOST_URL` to force a direct local path and avoid accidentally
279
+ pulling local machine analysis back through Zero Trust. When no local URL is
280
+ set and `SONAR_HOST_URL` points at a Cloudflare Access protected hostname, the
281
+ workflow enables the service-token proxy before preflight. The CI scan uses
275
282
  `continue-on-error` on the scanner step so Orchestra can still import and upload
276
283
  Sonar evidence when the quality gate fails; a final workflow step re-fails the
277
284
  job after evidence is captured.
@@ -299,6 +306,9 @@ Expected result:
299
306
  - Sonar authentication returns `{"valid":true}`.
300
307
  - `npm run sonar:preflight:local` passes `auth`, `project`, `qualityGate`, and
301
308
  `issues`; `hotspots` may warn when the token lacks hotspot read access.
309
+ - Pull request runs on local SonarQube Community Edition may report
310
+ `quality-gate-readable-default-branch` when branch-scoped quality gate status
311
+ is unavailable but default-branch quality gate access is valid.
302
312
 
303
313
  If the runner is online but jobs stay queued, verify the workflow labels match
304
314
  the runner labels exactly. If Sonar preflight fails, fix the token/project
@@ -0,0 +1,87 @@
1
+ # Verifier Contracts
2
+
3
+ Verifier contracts are task metadata that describe how a workflow run proves an
4
+ accepted outcome. They are used by agents, QA, and release gates to avoid
5
+ treating simulated handoffs or unmapped evidence as proof.
6
+
7
+ ## Fields
8
+
9
+ Each verifier entry is stored under `task.verifierContract.entries`:
10
+
11
+ - `id`: stable verifier id, unique within the task.
12
+ - `surface`: one of `cli`, `api`, `web`, `mobile`, `desktop`, `db`, `cloud`,
13
+ `workflow`, or `generated-artifact`.
14
+ - `setup`: environment or data setup required before verification.
15
+ - `action`: command, request, workflow action, or user action to execute.
16
+ - `expectedObservable`: observable result that must be proven.
17
+ - `assertionType`: `equals`, `contains`, `matches`, `exists`, or `custom`.
18
+ - `evidenceArtifact`: expected file, command output, trace, screenshot, log, or
19
+ report reference.
20
+ - `ownerRole`: role responsible for producing or reviewing evidence.
21
+ - `required`: defaults to `true`; optional verifiers are advisory.
22
+ - `acceptanceCriteria`: optional criteria references covered by the verifier.
23
+
24
+ ## CLI
25
+
26
+ Add a verifier while creating a task:
27
+
28
+ ```bash
29
+ orchestra task add --id STORY-001 --title "Generate manifest" --owner developer \
30
+ --verifier-id cli-manifest \
31
+ --verifier-surface cli \
32
+ --verifier-setup "package installed" \
33
+ --verifier-action "run manifest command" \
34
+ --verifier-expected "manifest generated" \
35
+ --verifier-evidence "manifest.json" \
36
+ --verifier-owner qa
37
+ ```
38
+
39
+ Add or update an entry later. Updates merge by verifier id:
40
+
41
+ ```bash
42
+ orchestra task update --id STORY-001 \
43
+ --verifier-id cli-manifest \
44
+ --verifier-surface cli \
45
+ --verifier-setup "package installed" \
46
+ --verifier-action "orchestra commands manifest --json" \
47
+ --verifier-expected "manifest generated" \
48
+ --verifier-evidence "manifest.json" \
49
+ --verifier-owner qa
50
+ ```
51
+
52
+ Inspect with:
53
+
54
+ ```bash
55
+ orchestra task show --id STORY-001 --json
56
+ orchestra context --task STORY-001 --json
57
+ ```
58
+
59
+ ## Evidence Mapping
60
+
61
+ Prefer explicit mapping:
62
+
63
+ ```bash
64
+ orchestra evidence add --task STORY-001 --role qa --type command \
65
+ --summary "manifest.json generated" \
66
+ --command "orchestra commands manifest --json" \
67
+ --exit-code 0 \
68
+ --surface cli \
69
+ --assertions "exit code 0; stdout contains manifest generated; stderr empty; artifact manifest.json written; final state manifest generated" \
70
+ --verifier-contract-id cli-manifest
71
+ ```
72
+
73
+ Legacy evidence can still match by task, surface, observable assertions, and
74
+ artifact reference. Explicit `--verifier-contract-id` is less ambiguous.
75
+
76
+ ## Gate Behavior
77
+
78
+ Tasks without verifier contracts keep existing behavior.
79
+
80
+ For tasks with required verifier entries, `qa-release` and `release-readiness`
81
+ block when evidence is missing, failed, or lacks observable outcome validation.
82
+ Missing keys use stable names such as:
83
+
84
+ - `verifierContract.<id>.evidence`
85
+ - `verifierContract.<id>.observableOutcome`
86
+
87
+ Optional verifiers are rendered in context and handoffs but do not block gates.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jterrats/open-orchestra",
3
- "version": "1.1.2",
3
+ "version": "1.2.2",
4
4
  "type": "module",
5
5
  "workspaces": [
6
6
  "extensions/vscode-open-orchestra",
@@ -16,7 +16,7 @@
16
16
  "test": "npm run build && node --test test/**/*.js extensions/**/*.test.cjs",
17
17
  "test:coverage": "npm run build && c8 --reporter=lcov --reports-dir coverage --exclude \"test/**\" --exclude \"e2e/**\" --exclude \"extensions/**/test/**\" --exclude \"dist/assets/**\" --exclude \"dist/web-console/**\" node --test test/**/*.js extensions/**/*.test.cjs",
18
18
  "test:e2e": "npm run build && npm run site:build && playwright test",
19
- "test:e2e:init": "node --test e2e/init-onboarding.test.js e2e/runtime-instruction-flow.test.js",
19
+ "test:e2e:init": "node --test --test-concurrency=1 e2e/stubbed-provider-mcp-contract.test.js e2e/init-onboarding.test.js e2e/runtime-instruction-flow.test.js",
20
20
  "test:e2e:runtime": "node --test e2e/runtime-manual-queue.test.js e2e/runtime-multi-squad.test.js",
21
21
  "test:e2e:security": "npm run build && node --test e2e/security-boundaries.test.js",
22
22
  "test:e2e:runtime:ollama": "npm run build && node --test e2e/runtime-ollama-provider.test.js",
@@ -30,7 +30,11 @@
30
30
  "validate:workflow": "node scripts/validate-workflow.js",
31
31
  "release:matrix": "node scripts/release-test-matrix.js",
32
32
  "performance:bench": "npm run build && node scripts/performance-benchmark.js",
33
- "precommit": "npm run lint && npm run typecheck && npm run secret-scan && npm run security:audit && npm test && npm run validate:workflow",
33
+ "check:commands": "npm run build && node scripts/check-command-manifest.js",
34
+ "precheck": "npm run check:commands",
35
+ "precommit": "npm run precheck",
36
+ "prepush": "npm run precheck",
37
+ "ci:quality": "npm run lint && npm run typecheck && npm run secret-scan && npm run security:audit && npm test && npm run validate:workflow",
34
38
  "package:build": "npm run build && npm run site:build",
35
39
  "package:validate": "node scripts/validate-package-contents.js",
36
40
  "prepack": "npm run package:build && npm run package:validate",