npm - @jterrats/open-orchestra - Versions diffs - 1.1.2 → 1.2.2 - Mend

@jterrats/open-orchestra 1.1.2 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (168) hide show

package/AGENTS.md +14 -1
package/CHANGELOG.md +48 -0
package/CLAUDE.md +14 -1
package/README.md +21 -6
package/dist/autonomous-phase-lifecycle.js +52 -2
package/dist/autonomous-phase-lifecycle.js.map +1 -1
package/dist/autonomous-run-state.d.ts +3 -1
package/dist/autonomous-run-state.js +28 -2
package/dist/autonomous-run-state.js.map +1 -1
package/dist/autonomous-run-store.js +9 -0
package/dist/autonomous-run-store.js.map +1 -1
package/dist/benchmark.js +54 -6
package/dist/benchmark.js.map +1 -1
package/dist/capability-commands.d.ts +2 -0
package/dist/capability-commands.js +82 -0
package/dist/capability-commands.js.map +1 -0
package/dist/capture-commands.d.ts +16 -0
package/dist/capture-commands.js +70 -0
package/dist/capture-commands.js.map +1 -0
package/dist/cli-capability-catalog.d.ts +10 -0
package/dist/cli-capability-catalog.js +110 -0
package/dist/cli-capability-catalog.js.map +1 -0
package/dist/cli-capability-data.d.ts +2 -0
package/dist/cli-capability-data.js +233 -0
package/dist/cli-capability-data.js.map +1 -0
package/dist/cli-capability-renderer.d.ts +2 -0
package/dist/cli-capability-renderer.js +43 -0
package/dist/cli-capability-renderer.js.map +1 -0
package/dist/cli-capability-types.d.ts +30 -0
package/dist/cli-capability-types.js +2 -0
package/dist/cli-capability-types.js.map +1 -0
package/dist/command-manifest.js +11 -5
package/dist/command-manifest.js.map +1 -1
package/dist/command-routes-integrations.js +2 -1
package/dist/command-routes-integrations.js.map +1 -1
package/dist/command-routes.js +8 -1
package/dist/command-routes.js.map +1 -1
package/dist/commands.d.ts +4 -2
package/dist/commands.js +7 -2
package/dist/commands.js.map +1 -1
package/dist/constants.js +2 -0
package/dist/constants.js.map +1 -1
package/dist/delivery-commands.js +1 -0
package/dist/delivery-commands.js.map +1 -1
package/dist/delivery-dashboard-charts.js +7 -3
package/dist/delivery-dashboard-charts.js.map +1 -1
package/dist/delivery-dashboard-types.d.ts +4 -0
package/dist/delivery-dashboard.js +6 -0
package/dist/delivery-dashboard.js.map +1 -1
package/dist/effort-classification.d.ts +1 -0
package/dist/effort-classification.js +15 -0
package/dist/effort-classification.js.map +1 -1
package/dist/fs-utils.js +6 -5
package/dist/fs-utils.js.map +1 -1
package/dist/mcp-integrations.d.ts +9 -2
package/dist/mcp-integrations.js +50 -13
package/dist/mcp-integrations.js.map +1 -1
package/dist/mcp-oauth-proxy.d.ts +8 -0
package/dist/mcp-oauth-proxy.js +25 -0
package/dist/mcp-oauth-proxy.js.map +1 -1
package/dist/mcp-runtime-config.d.ts +55 -0
package/dist/mcp-runtime-config.js +252 -0
package/dist/mcp-runtime-config.js.map +1 -0
package/dist/mcp-tool-adapter.js +2 -4
package/dist/mcp-tool-adapter.js.map +1 -1
package/dist/model-providers.d.ts +1 -1
package/dist/model-providers.js +1 -1
package/dist/model-providers.js.map +1 -1
package/dist/ollama-provider.d.ts +7 -0
package/dist/ollama-provider.js +41 -15
package/dist/ollama-provider.js.map +1 -1
package/dist/phase-playbooks.js +17 -0
package/dist/phase-playbooks.js.map +1 -1
package/dist/planning-commands.js +5 -0
package/dist/planning-commands.js.map +1 -1
package/dist/qa-evidence-validation.d.ts +1 -1
package/dist/qa-evidence-validation.js +3 -1
package/dist/qa-evidence-validation.js.map +1 -1
package/dist/quality-contracts.js +1 -1
package/dist/quality-contracts.js.map +1 -1
package/dist/release-inclusion.js +1 -1
package/dist/release-inclusion.js.map +1 -1
package/dist/runtime-bootstrap.js +13 -0
package/dist/runtime-bootstrap.js.map +1 -1
package/dist/runtime-child-prompt.js +25 -0
package/dist/runtime-child-prompt.js.map +1 -1
package/dist/runtime-commands.d.ts +3 -0
package/dist/runtime-commands.js +96 -0
package/dist/runtime-commands.js.map +1 -1
package/dist/runtime-events.d.ts +48 -0
package/dist/runtime-events.js +255 -0
package/dist/runtime-events.js.map +1 -0
package/dist/runtime-execution-renderer.js +8 -0
package/dist/runtime-execution-renderer.js.map +1 -1
package/dist/runtime-parent-actions.js +3 -0
package/dist/runtime-parent-actions.js.map +1 -1
package/dist/runtime-spawn-bridge.js +15 -5
package/dist/runtime-spawn-bridge.js.map +1 -1
package/dist/sonar-preflight.js +21 -1
package/dist/sonar-preflight.js.map +1 -1
package/dist/sprint-metrics.js +7 -1
package/dist/sprint-metrics.js.map +1 -1
package/dist/task-graph-commands.js +48 -0
package/dist/task-graph-commands.js.map +1 -1
package/dist/tool-commands.d.ts +1 -0
package/dist/tool-commands.js +129 -14
package/dist/tool-commands.js.map +1 -1
package/dist/types/metrics.d.ts +5 -1
package/dist/types/runtime.d.ts +3 -0
package/dist/types/tasks.d.ts +24 -1
package/dist/types/workflow-run.d.ts +16 -0
package/dist/types.d.ts +5 -3
package/dist/types.js.map +1 -1
package/dist/validation.js +6 -0
package/dist/validation.js.map +1 -1
package/dist/verifier-contracts.d.ts +29 -0
package/dist/verifier-contracts.js +184 -0
package/dist/verifier-contracts.js.map +1 -0
package/dist/web-api-read-routes.d.ts +1 -0
package/dist/web-api-read-routes.js +6 -1
package/dist/web-api-read-routes.js.map +1 -1
package/dist/web-api.js +58 -1
package/dist/web-api.js.map +1 -1
package/dist/web-console/assets/index-Cip-y4WE.css +1 -0
package/dist/web-console/assets/index-CuWjFxss.js +11 -0
package/dist/web-console/index.html +2 -2
package/dist/workflow-approval-service.js +36 -2
package/dist/workflow-approval-service.js.map +1 -1
package/dist/workflow-continuation-policy.d.ts +3 -0
package/dist/workflow-continuation-policy.js +20 -0
package/dist/workflow-continuation-policy.js.map +1 -0
package/dist/workflow-correction-context.d.ts +30 -0
package/dist/workflow-correction-context.js +117 -0
package/dist/workflow-correction-context.js.map +1 -0
package/dist/workflow-evidence-service.js +31 -22
package/dist/workflow-evidence-service.js.map +1 -1
package/dist/workflow-gates.js +6 -0
package/dist/workflow-gates.js.map +1 -1
package/dist/workflow-handoff-assessment.js +10 -2
package/dist/workflow-handoff-assessment.js.map +1 -1
package/dist/workflow-handoff-contract.d.ts +7 -0
package/dist/workflow-handoff-contract.js +18 -0
package/dist/workflow-handoff-contract.js.map +1 -1
package/dist/workflow-phase-planner.js +56 -22
package/dist/workflow-phase-planner.js.map +1 -1
package/dist/workflow-return-routing.d.ts +14 -0
package/dist/workflow-return-routing.js +61 -0
package/dist/workflow-return-routing.js.map +1 -0
package/dist/workflow-run-commands.js +173 -52
package/dist/workflow-run-commands.js.map +1 -1
package/dist/workflow-services.js +6 -3
package/dist/workflow-services.js.map +1 -1
package/dist/workflow-task-service.js +15 -7
package/dist/workflow-task-service.js.map +1 -1
package/docs/autonomous-workflow.md +10 -0
package/docs/ci-self-hosted-runners.md +82 -0
package/docs/command-contracts.md +21 -0
package/docs/e2e-test-batteries.md +115 -25
package/docs/runtime-adapters.md +4 -0
package/docs/runtime-llm-flow.md +6 -3
package/docs/security-env-vars.md +1 -0
package/docs/sonar-quality-gates.md +19 -9
package/docs/verifier-contracts.md +87 -0
package/package.json +7 -3
package/site/dist/assets/{index-Bi8l6tCE.js → index-B1Xsl_Kg.js} +1 -1
package/site/dist/index.html +1 -1
package/dist/web-console/assets/index--_RLc7Zp.js +0 -11
package/dist/web-console/assets/index-Cxo3REa4.css +0 -1

package/docs/e2e-test-batteries.md CHANGED Viewed

@@ -16,41 +16,92 @@ entry points a user or CI runner actually executes.
   flows.
 - External provider, GitHub, Sonar, or network-dependent paths must be opt-in
   and must report skipped or deferred evidence when offline.
+- Local provider smokes must default to mock or loopback/private endpoints,
+  reject public internet egress unless a trusted-provider policy explicitly
+  allows it, and record redacted provider provenance instead of credentials,
+  raw prompts, hostnames, or ports.
+- Stubbed provider/MCP batteries are contract evidence only. They may block PRs
+  when CLI/API/workflow contracts regress, but they must not satisfy
+  real-provider acceptance criteria for hosted auth, latency, rate limits,
+  provider-side failures, or production model behavior.
 - A release can ship only when P0 batteries pass or a release-manager accepted
   risk records the unavailable environment and compensating evidence.
+## Product Journey Coverage Matrix
+| Journey                         | Primary surface              | Environment model                                                      | Expected result                                                                                                                          | Evidence strategy                                                                                                             | Current battery                           | Child story recommendation            |
+| ------------------------------- | ---------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------- | ------------------------------------- |
+| First workspace setup           | CLI                          | Local `/tmp` workspace, source and packaged binary                     | User can initialize a project, preserve existing files, and see target runtime guidance                                                  | Assert exit codes, generated files, managed block boundaries, package version, and human/JSON output                          | Local and installed CLI onboarding        | GH-533-A source/package setup parity  |
+| Runtime-specific guidance regen | CLI + generated docs         | Simulated Codex, Claude, Cursor, VS Code, Windsurf, generic workspaces | `init --force` regenerates missing managed guidance without mixing target-specific instructions                                          | Filesystem diffs plus QA coverage JSON proving regenerated artifacts map back to acceptance criteria                          | Init refresh environments                 | GH-533-B runtime regen matrix         |
+| Workflow delivery lifecycle     | CLI workflow                 | Isolated local workspace                                               | Task, estimate, phase handoffs, gate pause/resume, QA failback, evidence, and release readiness work end to end                          | JSON event stream, handoff artifacts, review/evidence records, and release-readiness before/after assertions                  | Workflow lifecycle CLI                    | GH-533-C workflow evidence lifecycle  |
+| Runtime delegation              | CLI runtime actions          | Local runtime queue with detached sessions                             | Manual and multi-squad delegation queue safely, preserve parent availability, and reconcile lifecycle state                              | Spawn request artifacts, lifecycle commands, runtime session lists, queue state, and completion events                        | Runtime manual queue, multi-squad runtime | GH-533-D runtime delegation contracts |
+| Web console operator flow       | Browser app + API            | Local web server and persisted test workspace                          | Operator can inspect tasks, costs, providers, delegation, workflow progress, recovery, and artifacts                                     | Playwright visible assertions, API persistence checks, responsive/keyboard coverage, screenshots/traces on failure            | Browser console                           | GH-533-E web console operator journey |
+| Chat and provider flow          | Browser app + API + provider | Stubbed provider by default, opt-in local provider                     | Chat sends scoped messages, streams usable status, records provider provenance, and handles failures safely                              | API response assertions, SSE/event trace, DOM state, provider request fixture, redacted logs                                  | Browser console, provider opt-in          | GH-533-F chat provider E2E            |
+| Stubbed provider + MCP contract | CLI + web API + workflow     | Built-in fake provider and scoped MCP config in an isolated workspace  | PR-safe provider/MCP contract proves fake-provider routing, scoped MCP status, redacted evidence, and hosted provider fail-closed policy | CLI/API JSON comparisons, workflow executor provenance, redacted contract evidence report, hosted-provider negative assertion | Stubbed provider MCP contract             | GH-551 GH-528A PR-safe contract E2E   |
+| Public docs and site discovery  | Browser site                 | Local static/site build                                                | Users can find docs, navigate core concepts, inspect architecture, and read mobile-safe pages                                            | Playwright navigation, search, docs catalog, responsive text-fit, and no raw GitHub dead-end assertions                       | Public site, docs/site content source     | GH-533-G docs discovery E2E           |
+| Security-sensitive operations   | CLI + API + browser          | Local sandbox with hostile fixtures                                    | Unsafe file paths, shell patterns, secrets, stack traces, and telemetry leaks are blocked or redacted                                    | Hostile fixture assertions, redacted command/API evidence, browser-visible safe errors                                        | Security-sensitive operations             | GH-533-H security boundary E2E        |
+| Packaged renderer behavior      | CLI package path             | Built package artifacts                                                | Diagram/report commands use packaged paths and produce user-safe output without stale source-only assumptions                            | Compiled command output, package file list, XML/SVG/HTML/source invariant checks, and explicit missing messaging              | Renderer packaged paths                   | GH-540 packaged artifact E2E          |
+| Benchmark/dashboard reporting   | CLI metrics + dashboard JSON | Isolated workflow telemetry with completed and measurable stories      | Velocity, calibration, and dashboard distinguish completed stories from measurable actuals without fake duration                         | CLI JSON assertions, dashboard metric assertions, actual source checks, and calibration sample-size checks                    | Benchmark dashboard duration smoke        | GH-541 benchmark dashboard E2E        |
+| MCP scoped OAuth diagnostics    | CLI + web API                | Runtime-scoped MCP config in an isolated workspace                     | CLI and web API report matching runtime OAuth state, redact secrets, and hide cross-workspace MCP config                                 | CLI/API JSON comparisons, redaction assertions, scoped workspace config checks                                                | MCP scoped OAuth smoke                    | GH-542 MCP OAuth E2E                  |
+| Release candidate readiness     | CLI + CI artifacts           | Local dry run, CI/self-hosted runner                                   | Candidate package contents, version/tag policy, release matrix, and quality gates are release-ready                                      | `npm pack --dry-run`, release check JSON, CI run links/logs, and accepted-risk records for unavailable environments           | Package release dry run, source quality   | GH-533-J release candidate E2E        |
+## Regeneration Plan
+Regenerate E2E scenarios when product behavior changes one of these contracts:
+1. Public CLI command shape, command manifest, JSON output, generated files, or
+   package entry points change.
+2. Runtime profiles, phase playbooks, agent skills/rules, spawn request shape,
+   or managed guidance files change.
+3. Web console routes, visible task/provider/workflow states, API persistence,
+   SSE events, or recovery flows change.
+4. Provider integrations, MCP configuration, local/cloud execution policy,
+   secrets handling, or redaction boundaries change.
+5. Release, CI, package, tag, or self-hosted runner policy changes.
+For each regeneration event, QA must update the matrix row, add or revise the
+expected result, choose the real evidence surface, and record why lower-level
+tests are or are not enough. Stubbed tests can stay P0 only when the product
+contract is deterministic without external services; cloud, GitHub, Sonar,
+provider, mobile, or desktop-dependent evidence must be P1/P2 opt-in with a
+clear skip/deferred signal and owner.
 ## P0 Release-Blocking Batteries
-| Battery                   | Scope                                                                                                | Command                                                             | Minimum Assertions                                                                                                                                     | Evidence                                                           |
-| ------------------------- | ---------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------ |
-| Source quality            | Static checks, build, unit tests, workflow validation, secret scan, security audit                   | `npm run precommit`                                                 | exit code 0, no leaks, no audit blockers, workflow valid                                                                                               | command log                                                        |
-| Local CLI onboarding      | Current source CLI in `/tmp` workspaces                                                              | `ORCHESTRA_NODE_SCRIPT=$PWD/bin/orchestra.js npm run test:e2e:init` | `--version`, `init`, `status`, `validate`, first-use task, handoff, evidence, release readiness                                                        | stdout/stderr, JSON output, filesystem assertions                  |
-| Installed CLI onboarding  | Installed or packaged CLI in `/tmp` workspaces                                                       | `npm run test:e2e:init` after installing the candidate package      | same assertions as local CLI onboarding, proving the packaged binary matches source behavior                                                           | stdout/stderr, JSON output, filesystem assertions, package version |
-| Browser console           | Web console task, cost, provider, delegation, recovery, evidence, workflow, accessibility, artifacts | `npm run test:e2e`                                                  | visible state, API persistence, evidence attachment, lifecycle transitions, responsive/keyboard behavior                                               | Playwright report, screenshots/traces on failure                   |
-| Public site               | Documentation/site navigation, docs catalog, architecture viewer, mobile fit                         | `npm run test:e2e`                                                  | navigation order, local docs catalog search, no raw GitHub redirect for docs, mobile content fit                                                       | Playwright report                                                  |
-| Runtime manual queue      | Manual runtime delegation in a `/tmp` workspace                                                      | `npm run test:e2e:runtime`                                          | two active sessions, third manual `spawn-request` materializes `queued`, artifact includes lifecycle commands, `runtime sessions` lists queued session | stdout/stderr, JSON output, artifact content                       |
-| Init refresh environments | Simulated generic, Codex, Claude, Cursor, VS Code/GitHub Copilot, and Windsurf workspaces             | `npm run test:e2e:init`                                             | missing runtime guidance files regenerate on `init --force`, user content is preserved, managed blocks/frontmatter are updated only inside managed ranges, target-specific metadata excludes wrong-target content, generated-artifact evidence maps back to acceptance criteria | filesystem diff assertions, QA coverage JSON                       |
-| Workflow lifecycle CLI    | CLI workflow run, gate, resume, QA failback, release readiness                                       | `node --test e2e/workflow-lifecycle-cli.test.js`                    | task phases create handoffs, blocked QA routes back, routine gate resumes immediately, release readiness maps acceptance to evidence                   | JSON output, events, handoffs                                      |
+| Battery                   | Scope                                                                                                | Command                                                                                                | Minimum Assertions                                                                                                                                                                                                                                                              | Evidence                                                                                                                            |
+| ------------------------- | ---------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
+| Source quality            | Static checks, build, unit tests, workflow validation, secret scan, security audit                   | `npm run ci:quality`                                                                                   | exit code 0, no leaks, no audit blockers, workflow valid                                                                                                                                                                                                                        | command log                                                                                                                         |
+| Local CLI onboarding      | Current source CLI in `/tmp` workspaces                                                              | `ORCHESTRA_NODE_SCRIPT=$PWD/bin/orchestra.js npm run test:e2e:init`                                    | `--version`, `init`, `status`, `validate`, first-use task, handoff, evidence, release readiness                                                                                                                                                                                 | stdout/stderr, JSON output, filesystem assertions                                                                                   |
+| Installed CLI onboarding  | Installed or packaged CLI in `/tmp` workspaces                                                       | `npm run test:e2e:init` after installing the candidate package                                         | same assertions as local CLI onboarding, proving the packaged binary matches source behavior                                                                                                                                                                                    | stdout/stderr, JSON output, filesystem assertions, package version                                                                  |
+| Browser console           | Web console task, cost, provider, delegation, recovery, evidence, workflow, accessibility, artifacts | `npm run test:e2e`                                                                                     | visible state, API persistence, evidence attachment, lifecycle transitions, responsive/keyboard behavior                                                                                                                                                                        | Playwright report, screenshots/traces on failure                                                                                    |
+| Public site               | Documentation/site navigation, docs catalog, architecture viewer, mobile fit                         | `npm run test:e2e`                                                                                     | navigation order, local docs catalog search, no raw GitHub redirect for docs, mobile content fit                                                                                                                                                                                | Playwright report                                                                                                                   |
+| Runtime manual queue      | Manual runtime delegation in a `/tmp` workspace                                                      | `npm run test:e2e:runtime`                                                                             | two active sessions, third manual `spawn-request` materializes `queued`, artifact includes lifecycle commands, `runtime sessions` lists queued session                                                                                                                          | stdout/stderr, JSON output, artifact content                                                                                        |
+| Init refresh environments | Simulated generic, Codex, Claude, Cursor, VS Code/GitHub Copilot, and Windsurf workspaces            | `npm run test:e2e:init`                                                                                | missing runtime guidance files regenerate on `init --force`, user content is preserved, managed blocks/frontmatter are updated only inside managed ranges, target-specific metadata excludes wrong-target content, generated-artifact evidence maps back to acceptance criteria | filesystem diff assertions, QA coverage JSON                                                                                        |
+| Workflow lifecycle CLI    | CLI workflow run, gate, resume, QA failback, release readiness                                       | `node --test e2e/workflow-lifecycle-cli.test.js`                                                       | task phases create handoffs, blocked QA routes back, routine gate resumes immediately, release readiness maps acceptance to evidence                                                                                                                                            | JSON output, events, handoffs                                                                                                       |
+| Renderer packaged paths   | Diagram lint command, artifact previews, and report renderer package contracts                       | `npm run build && node --test test/renderer-compiled-path.test.js e2e/packaged-renderer-smoke.test.js` | compiled CLI path emits Mermaid install guidance without stack traces, candidate package includes compiled renderer modules, previews preserve draw.io/Mermaid/ERD/TSX invariants, report render command absence is explicit                                                    | stdout/stderr, JSON output, package file list, XML/SVG/HTML/source invariant checks                                                 |
+| Stubbed provider MCP      | Built-in fake provider, scoped MCP fixture, provider/MCP web API status, and contract evidence       | `npm run test:e2e:init` or `npm run build && node --test e2e/stubbed-provider-mcp-contract.test.js`    | fake provider routing is visible through CLI/API, scoped MCP CLI/API status matches, hosted provider execution fails before direct provider API use, evidence is redacted and labeled contract evidence, and no hosted provider secret is required                              | CLI/API JSON output, workflow events, `.agent-workflow/evidence/*-report.md`, `reports/stubbed-provider-mcp-contract-evidence.json` |
 ## P1 High-Risk Regression Batteries
-| Battery                        | Scope                                                                 | Command                                                               | Minimum Assertions                                                                                                                                                     | Evidence                                                     |
-| ------------------------------ | --------------------------------------------------------------------- | --------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------ |
-| Multi-squad runtime            | Parallel squad delegation with queue and threshold policy             | `npm run test:e2e:runtime`                                            | independent sessions, non-blocking parent, queued sessions do not fall back to parent, completion order reconciles                                                     | JSON output, lifecycle events                                |
-| Acceptance evidence            | CLI, API, browser, and deferred integration evidence                  | `node --test e2e/acceptance-evidence.test.js`                         | evidence maps to named acceptance criteria, deferred external validation requires owner and rationale                                                                  | evidence artifacts                                           |
-| Recovery and repair            | Interrupted runs, stale locks, failed provider phases                 | `node --test e2e/recovery-cli.test.js` plus browser recovery coverage | recovery detects issue, repair requires confirmation, repaired state is observable                                                                                     | JSON output, before/after state                              |
-| Docs/site content source       | Site content generated from docs and manifest                         | `npm run site:build && npm run test:e2e -- --grep docs`               | docs render as human-friendly catalog, no markdown-only dead ends, search works                                                                                        | Playwright report                                            |
-| Security-sensitive operations  | File paths, shell execution, web writes, secrets, telemetry redaction | `npm run test:e2e:security`                                           | path traversal blocked, unsafe writes rejected, secret-like data redacted, no raw stack traces                                                                         | command/API evidence                                         |
-| Ollama provider-backed runtime | Local OpenAI-compatible Ollama provider route in a `/tmp` workspace   | `npm run test:e2e:runtime:ollama`                                     | `model connect --provider ollama`, provider-backed developer phase, OpenAI-compatible request shape, provider provenance, no runtime subagent credentials in artifacts | stdout/stderr, JSON output, mock provider request, event log |
+| Battery                        | Scope                                                                 | Command                                                                 | Minimum Assertions                                                                                                                                                     | Evidence                                                     |
+| ------------------------------ | --------------------------------------------------------------------- | ----------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------ |
+| Multi-squad runtime            | Parallel squad delegation with queue and threshold policy             | `npm run test:e2e:runtime`                                              | independent sessions, non-blocking parent, queued sessions do not fall back to parent, completion order reconciles                                                     | JSON output, lifecycle events                                |
+| Acceptance evidence            | CLI, API, browser, and deferred integration evidence                  | `node --test e2e/acceptance-evidence.test.js`                           | evidence maps to named acceptance criteria, deferred external validation requires owner and rationale                                                                  | evidence artifacts                                           |
+| Recovery and repair            | Interrupted runs, stale locks, failed provider phases                 | `node --test e2e/recovery-cli.test.js` plus browser recovery coverage   | recovery detects issue, repair requires confirmation, repaired state is observable                                                                                     | JSON output, before/after state                              |
+| Docs/site content source       | Site content generated from docs and manifest                         | `npm run site:build && npm run test:e2e -- --grep docs`                 | docs render as human-friendly catalog, no markdown-only dead ends, search works                                                                                        | Playwright report                                            |
+| Security-sensitive operations  | File paths, shell execution, web writes, secrets, telemetry redaction | `npm run test:e2e:security`                                             | path traversal blocked, unsafe writes rejected, secret-like data redacted, no raw stack traces                                                                         | command/API evidence                                         |
+| Ollama provider-backed runtime | Local OpenAI-compatible Ollama provider route in a `/tmp` workspace   | `npm run test:e2e:runtime:ollama`                                       | `model connect --provider ollama`, provider-backed developer phase, OpenAI-compatible request shape, provider provenance, no runtime subagent credentials in artifacts | stdout/stderr, JSON output, mock provider request, event log |
+| Benchmark dashboard duration   | Benchmark-derived velocity, calibration, and delivery dashboard JSON  | `npm run build && node --test e2e/benchmark-dashboard-duration.test.js` | completed stories are counted separately from stories with actual duration, completed-without-duration is visible, calibration sample size excludes null actuals       | CLI JSON output, dashboard JSON, calibration JSON            |
+| MCP scoped OAuth smoke         | Runtime-scoped MCP integration status through CLI and web API         | `npm run build && node --test e2e/mcp-scoped-oauth-smoke.test.js`       | CLI and web API agree for `--runtime claude-cli`, diagnostics redact OAuth/API-key material, cross-workspace MCP config is not exposed                                 | CLI/API JSON output, scoped config fixture, redaction checks |
 ## P2 Extended Confidence Batteries
-| Battery                    | Scope                                                 | Command                                                       | Minimum Assertions                                                                        | Evidence                     |
-| -------------------------- | ----------------------------------------------------- | ------------------------------------------------------------- | ----------------------------------------------------------------------------------------- | ---------------------------- |
-| Tracker and GitHub sync    | Issue import/export and close readiness               | opt-in CI job with network credentials                        | labels, comments, close gate, release readiness, no secret exposure                       | sanitized logs               |
-| Sonar quality loop         | Local or remote Sonar import and release gate mapping | configured Sonar workflow or local compose job                | insights imported, release readiness reflects quality gate, unavailable token is explicit | artifact import report       |
-| Provider-backed delegation | OpenAI, Anthropic, Gemini, Ollama, fake/local provider-backed routes plus runtime-native separation | opt-in provider E2E plus focused wrapper/unit coverage | registry routing, explicit direct API policy, forbidden fallback, budget and scheduler blocks, redacted evidence, no silent runtime-native fallback | redacted provider provenance |
-| Package release dry run    | npm package contents and release check                | `npm pack --dry-run --json && orchestra release check --json` | generated/private state excluded, version/tag policy valid, release readiness complete    | package list, release report |
+| Battery                    | Scope                                                                                               | Command                                                       | Minimum Assertions                                                                                                                                                                                            | Evidence                     |
+| -------------------------- | --------------------------------------------------------------------------------------------------- | ------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------- |
+| Tracker and GitHub sync    | Issue import/export and close readiness                                                             | opt-in CI job with network credentials                        | labels, comments, close gate, release readiness, no secret exposure                                                                                                                                           | sanitized logs               |
+| Sonar quality loop         | Local or remote Sonar import and release gate mapping                                               | configured Sonar workflow or local compose job                | insights imported, release readiness reflects quality gate, unavailable token is explicit                                                                                                                     | artifact import report       |
+| Provider-backed delegation | OpenAI, Anthropic, Gemini, Ollama, fake/local provider-backed routes plus runtime-native separation | opt-in provider E2E plus focused wrapper/unit coverage        | registry routing, explicit direct API policy, forbidden fallback, local-provider public egress rejection, timeout handling, budget and scheduler blocks, redacted evidence, no silent runtime-native fallback | redacted provider provenance |
+| Package release dry run    | npm package contents and release check                                                              | `npm pack --dry-run --json && orchestra release check --json` | generated/private state excluded, version/tag policy valid, release readiness complete                                                                                                                        | package list, release report |
 ## Required `/tmp` Fixture Patterns
@@ -81,9 +132,29 @@ the packaging/install path is wrong.
 3. Add `e2e/workflow-lifecycle-cli.test.js` for workflow run, gate, failback,
    resume, and release readiness.
 4. Add `e2e/runtime-multi-squad.test.js` for async background squad behavior.
-5. Add focused security and acceptance-evidence E2E only where unit tests cannot
+5. Add `e2e/stubbed-provider-mcp-contract.test.js` for PR-safe provider/MCP
+   contract evidence that cannot be confused with real-provider evidence.
+6. Add focused security and acceptance-evidence E2E only where unit tests cannot
    prove the user-visible contract.
+## Executable Child Story Recommendations
+| Story    | Scope                        | First executable check                                                                                    | Acceptance criteria seed                                                                                                            |
+| -------- | ---------------------------- | --------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
+| GH-533-A | Source/package setup parity  | Extend `e2e/init-onboarding.test.js` to run source and packaged modes from the same fixture table         | Source and installed binaries initialize equivalent state, report equivalent JSON contracts, and identify candidate package version |
+| GH-533-B | Runtime regen matrix         | Add fixture rows for each supported runtime target and compare regenerated managed blocks                 | Regeneration preserves user content, writes only managed ranges, and never emits wrong-target instructions                          |
+| GH-533-C | Workflow evidence lifecycle  | Expand `e2e/workflow-lifecycle-cli.test.js` around AC-to-evidence and QA failback assertions              | Release readiness stays blocked until evidence maps to acceptance criteria and blocked reviews return to the responsible role       |
+| GH-533-D | Runtime delegation contracts | Add queue and lifecycle assertions shared by manual and multi-squad runtime tests                         | Parent remains available, queued actions are explicit, child lifecycle is observable, and no fallback silently runs in parent       |
+| GH-533-E | Web console operator journey | Create a Page Object backed journey for task, provider, workflow, artifacts, and recovery panels          | Browser UI proves visible state, persisted API effects, keyboard/responsive behavior, and failure recovery                          |
+| GH-533-F | Chat provider E2E            | Add stubbed API/SSE/DOM checks plus opt-in local provider smoke                                           | Chat scope, provider provenance, streamed status, redacted errors, and retry/recovery behavior are asserted                         |
+| GH-533-G | Docs discovery E2E           | Split docs/site grep coverage into catalog, search, architecture, and mobile scenarios                    | Users can find docs without raw repository dead ends, and mobile pages have no clipping or overlap                                  |
+| GH-533-H | Security boundary E2E        | Add hostile fixture coverage for paths, shell-like input, secrets, telemetry, and stack traces            | Unsafe operations fail closed and all user-facing/API evidence is redacted                                                          |
+| GH-540   | Packaged artifact E2E        | Promote compiled renderer/package assertions into a package-candidate smoke                               | Packaged commands do not depend on source-only paths and produce inspectable artifacts or explicit unavailable messages             |
+| GH-541   | Benchmark dashboard E2E      | Add velocity, calibration, and dashboard JSON assertions for completed stories without measured duration  | Reports count completed work without inventing duration and calibration uses only measurable actuals                                |
+| GH-542   | MCP OAuth scoped E2E         | Add CLI/API status parity assertions for runtime-scoped MCP OAuth diagnostics                             | Scoped OAuth status is consistent across CLI/API and diagnostics stay redacted without cross-workspace leakage                      |
+| GH-551   | Stubbed provider/MCP E2E     | Add fake-provider workflow plus MCP dry-run CLI assertions in an isolated workspace                       | PR-safe contract evidence proves provider/MCP integration without secrets or network calls and never replaces real-provider smoke   |
+| GH-533-J | Release candidate E2E        | Add a release dry-run evidence bundle around package contents, tag readiness, CI logs, and release matrix | Release candidate proves package contents, version/tag policy, quality gate, rollback evidence, and accepted-risk records           |
 ## Opt-In Provider Runtime Batteries
 Provider-backed runtime batteries are not part of default CI because they may
@@ -94,6 +165,25 @@ workflow provenance, and no-secret behavior without requiring a real Ollama
 daemon. A separate real-model smoke can be run with `ORCHESTRA_OLLAMA_SMOKE=1`
 when validating a local model installation.
+## Stubbed Provider And MCP Contract Evidence
+`e2e/stubbed-provider-mcp-contract.test.js` is the default PR/push-safe provider
+and MCP contract suite. It is included in `npm run test:e2e:init`, which the
+normal CI dogfood job runs without hosted provider secrets. The focused command
+is:
+```bash
+npm run build && node --test e2e/stubbed-provider-mcp-contract.test.js
+```
+The suite uses the built-in `fake` provider and workspace-scoped MCP config in a
+temporary workspace. It asserts real CLI and web API outputs, records a
+redacted report through `orchestra evidence add`, and labels that report
+`contract evidence`. That evidence proves the local provider/MCP contract only.
+It must not be used as acceptance evidence for real OpenAI, Anthropic, Gemini,
+Ollama, or hosted MCP behavior. Real-provider acceptance remains covered only by
+protected/manual smoke suites with explicit secrets and trusted-run policy.
 ## Definition Of Done
 An E2E battery is complete only when it has:

package/docs/runtime-adapters.md CHANGED Viewed

@@ -499,6 +499,10 @@ parent-agent fallback reason. `subagents` requires runtime-native support and
 fails fast if the runtime cannot satisfy it. `single-agent` forces the parent
 agent path and records that choice in phase provenance.
+Gate mode is independent from execution mode: `--gates none` suppresses human
+gate pauses, while `--phase-execution single-agent` is what prevents detached
+runtime parent actions and subagent lifecycle requirements.
 When no task or role executor is configured and the default executor is
 `generic-runtime`, `auto` and strict `subagents` mode infer the active runtime
 from `OPEN_ORCHESTRA_ACTIVE_RUNTIME`, then from

package/docs/runtime-llm-flow.md CHANGED Viewed

@@ -155,9 +155,12 @@ adapter also reads optional `GEMINI_BASE_URL`; the base URL must be HTTPS and
 defaults to `https://generativelanguage.googleapis.com`.
 The Ollama adapter defaults to `http://localhost:11434/v1` and uses the
-OpenAI-compatible `/chat/completions` endpoint. Set `OLLAMA_BASE_URL` for a
-custom local or remote endpoint and `OLLAMA_API_KEY` when the endpoint requires
-one.
+OpenAI-compatible `/chat/completions` endpoint. Set `OLLAMA_BASE_URL` only for
+loopback or private-network endpoints; public internet hosts are rejected unless
+a future trusted-provider policy explicitly implements that exception. Endpoint
+URLs must use `http` or `https`, must not include credentials, and are recorded
+in evidence only as redacted local-provider provenance. Set `OLLAMA_API_KEY`
+when the local endpoint requires one.
 ## Runtime Execution

package/docs/security-env-vars.md CHANGED Viewed

@@ -18,6 +18,7 @@ or local secret files.
 - `OPENAI_API_KEY_FILE`: optional OpenAI credential file path.
 - `OPEN_ORCHESTRA_CLAUDE_NATIVE_CALLBACK`: local Claude native callback marker.
 - `OPEN_ORCHESTRA_CLAUDE_NATIVE_CHILD_ID`: Claude native child id marker.
+- `OPEN_ORCHESTRA_COMMAND_MANIFEST_OUT`: optional command manifest check output path.
 - `ORCHESTRA_GITLEAKS_BIN`: optional absolute gitleaks binary override.
 - `ORCHESTRA_SECRET_SCAN_FORCE_FALLBACK`: forces fallback secret scanning.
 - `ORCHESTRA_SKIP_UPDATE_CHECK`: disables package update checks.

package/docs/sonar-quality-gates.md CHANGED Viewed

@@ -64,10 +64,13 @@ Optional GitHub variables:
   quality gate fails.
 - `SONAR_RUNNER`: set to `self-hosted` to run the Sonar workflow on a local
   runner that can reach the shared SonarQube runtime directly. When this is set,
-  the workflow uses `http://localhost:9001` by default and skips Cloudflare
-  Access service-token checks.
+  the workflow resolves `SONAR_LOCAL_HOST_URL`, then `SONAR_HOST_URL`, then
+  `http://localhost:9001`. Cloudflare Access service-token checks still run
+  when the resolved URL uses `SONAR_HOST_URL` and Cloudflare credentials are
+  configured.
 - `SONAR_LOCAL_HOST_URL`: optional override for self-hosted runner mode when the
-  runner reaches SonarQube through a different local-only URL.
+  runner reaches SonarQube through a different local-only URL. Prefer this over
+  `SONAR_HOST_URL` when the self-hosted runner should use a direct local path.
 The workflow skips analysis when `SONAR_TOKEN` is not configured. This keeps
 forks and offline development usable. For private repositories, keep
@@ -100,6 +103,10 @@ API access, issue API access, and security hotspot API access. It redacts the
 token, host URL, and Cloudflare Access service token values from diagnostic
 output. `hotspots` is a warning when unavailable because some Sonar tokens can
 analyze and read issues while hotspot review permissions are managed separately.
+In local `sonarqube-local` mode, a branch-scoped quality gate `404` falls back
+to the default branch when the project itself is readable, because SonarQube
+Community Edition does not expose branch analysis unless an optional add-on or
+commercial feature is installed.
 Common remediation:
@@ -266,12 +273,12 @@ cd ~/dev/sonarqube_jterrats_dev
 docker compose up -d
 ```
-When `SONAR_RUNNER=self-hosted`, the workflow resolves SonarQube to
-`http://localhost:9001` unless `SONAR_LOCAL_HOST_URL` is set. This intentionally
-ignores `SONAR_HOST_URL`, so organization-level Cloudflare tunnel secrets do not
-pull local machine analysis back through Zero Trust. Cloudflare Access remains
-available for human remote browser usage and for GitHub-hosted runner access to
-private SonarQube only. The CI scan uses
+When `SONAR_RUNNER=self-hosted`, the workflow resolves SonarQube in this order:
+`SONAR_LOCAL_HOST_URL`, `SONAR_HOST_URL`, then `http://localhost:9001`.
+Use `SONAR_LOCAL_HOST_URL` to force a direct local path and avoid accidentally
+pulling local machine analysis back through Zero Trust. When no local URL is
+set and `SONAR_HOST_URL` points at a Cloudflare Access protected hostname, the
+workflow enables the service-token proxy before preflight. The CI scan uses
 `continue-on-error` on the scanner step so Orchestra can still import and upload
 Sonar evidence when the quality gate fails; a final workflow step re-fails the
 job after evidence is captured.
@@ -299,6 +306,9 @@ Expected result:
 - Sonar authentication returns `{"valid":true}`.
 - `npm run sonar:preflight:local` passes `auth`, `project`, `qualityGate`, and
   `issues`; `hotspots` may warn when the token lacks hotspot read access.
+- Pull request runs on local SonarQube Community Edition may report
+  `quality-gate-readable-default-branch` when branch-scoped quality gate status
+  is unavailable but default-branch quality gate access is valid.
 If the runner is online but jobs stay queued, verify the workflow labels match
 the runner labels exactly. If Sonar preflight fails, fix the token/project

package/docs/verifier-contracts.md ADDED Viewed

@@ -0,0 +1,87 @@
+# Verifier Contracts
+Verifier contracts are task metadata that describe how a workflow run proves an
+accepted outcome. They are used by agents, QA, and release gates to avoid
+treating simulated handoffs or unmapped evidence as proof.
+## Fields
+Each verifier entry is stored under `task.verifierContract.entries`:
+- `id`: stable verifier id, unique within the task.
+- `surface`: one of `cli`, `api`, `web`, `mobile`, `desktop`, `db`, `cloud`,
+  `workflow`, or `generated-artifact`.
+- `setup`: environment or data setup required before verification.
+- `action`: command, request, workflow action, or user action to execute.
+- `expectedObservable`: observable result that must be proven.
+- `assertionType`: `equals`, `contains`, `matches`, `exists`, or `custom`.
+- `evidenceArtifact`: expected file, command output, trace, screenshot, log, or
+  report reference.
+- `ownerRole`: role responsible for producing or reviewing evidence.
+- `required`: defaults to `true`; optional verifiers are advisory.
+- `acceptanceCriteria`: optional criteria references covered by the verifier.
+## CLI
+Add a verifier while creating a task:
+```bash
+orchestra task add --id STORY-001 --title "Generate manifest" --owner developer \
+  --verifier-id cli-manifest \
+  --verifier-surface cli \
+  --verifier-setup "package installed" \
+  --verifier-action "run manifest command" \
+  --verifier-expected "manifest generated" \
+  --verifier-evidence "manifest.json" \
+  --verifier-owner qa
+```
+Add or update an entry later. Updates merge by verifier id:
+```bash
+orchestra task update --id STORY-001 \
+  --verifier-id cli-manifest \
+  --verifier-surface cli \
+  --verifier-setup "package installed" \
+  --verifier-action "orchestra commands manifest --json" \
+  --verifier-expected "manifest generated" \
+  --verifier-evidence "manifest.json" \
+  --verifier-owner qa
+```
+Inspect with:
+```bash
+orchestra task show --id STORY-001 --json
+orchestra context --task STORY-001 --json
+```
+## Evidence Mapping
+Prefer explicit mapping:
+```bash
+orchestra evidence add --task STORY-001 --role qa --type command \
+  --summary "manifest.json generated" \
+  --command "orchestra commands manifest --json" \
+  --exit-code 0 \
+  --surface cli \
+  --assertions "exit code 0; stdout contains manifest generated; stderr empty; artifact manifest.json written; final state manifest generated" \
+  --verifier-contract-id cli-manifest
+```
+Legacy evidence can still match by task, surface, observable assertions, and
+artifact reference. Explicit `--verifier-contract-id` is less ambiguous.
+## Gate Behavior
+Tasks without verifier contracts keep existing behavior.
+For tasks with required verifier entries, `qa-release` and `release-readiness`
+block when evidence is missing, failed, or lacks observable outcome validation.
+Missing keys use stable names such as:
+- `verifierContract.<id>.evidence`
+- `verifierContract.<id>.observableOutcome`
+Optional verifiers are rendered in context and handoffs but do not block gates.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@jterrats/open-orchestra",
-  "version": "1.1.2",
+  "version": "1.2.2",
   "type": "module",
   "workspaces": [
     "extensions/vscode-open-orchestra",
@@ -16,7 +16,7 @@
     "test": "npm run build && node --test test/**/*.js extensions/**/*.test.cjs",
     "test:coverage": "npm run build && c8 --reporter=lcov --reports-dir coverage --exclude \"test/**\" --exclude \"e2e/**\" --exclude \"extensions/**/test/**\" --exclude \"dist/assets/**\" --exclude \"dist/web-console/**\" node --test test/**/*.js extensions/**/*.test.cjs",
     "test:e2e": "npm run build && npm run site:build && playwright test",
-    "test:e2e:init": "node --test e2e/init-onboarding.test.js e2e/runtime-instruction-flow.test.js",
+    "test:e2e:init": "node --test --test-concurrency=1 e2e/stubbed-provider-mcp-contract.test.js e2e/init-onboarding.test.js e2e/runtime-instruction-flow.test.js",
     "test:e2e:runtime": "node --test e2e/runtime-manual-queue.test.js e2e/runtime-multi-squad.test.js",
     "test:e2e:security": "npm run build && node --test e2e/security-boundaries.test.js",
     "test:e2e:runtime:ollama": "npm run build && node --test e2e/runtime-ollama-provider.test.js",
@@ -30,7 +30,11 @@
     "validate:workflow": "node scripts/validate-workflow.js",
     "release:matrix": "node scripts/release-test-matrix.js",
     "performance:bench": "npm run build && node scripts/performance-benchmark.js",
-    "precommit": "npm run lint && npm run typecheck && npm run secret-scan && npm run security:audit && npm test && npm run validate:workflow",
+    "check:commands": "npm run build && node scripts/check-command-manifest.js",
+    "precheck": "npm run check:commands",
+    "precommit": "npm run precheck",
+    "prepush": "npm run precheck",
+    "ci:quality": "npm run lint && npm run typecheck && npm run secret-scan && npm run security:audit && npm test && npm run validate:workflow",
     "package:build": "npm run build && npm run site:build",
     "package:validate": "node scripts/validate-package-contents.js",
     "prepack": "npm run package:build && npm run package:validate",