@jterrats/open-orchestra 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +13 -1
- package/CLAUDE.md +13 -1
- package/dist/autonomous-phase-lifecycle.js +52 -2
- package/dist/autonomous-phase-lifecycle.js.map +1 -1
- package/dist/autonomous-run-state.d.ts +3 -1
- package/dist/autonomous-run-state.js +28 -2
- package/dist/autonomous-run-state.js.map +1 -1
- package/dist/autonomous-run-store.js +9 -0
- package/dist/autonomous-run-store.js.map +1 -1
- package/dist/capture-commands.js +1 -1
- package/dist/capture-commands.js.map +1 -1
- package/dist/cli-capability-data.js +30 -0
- package/dist/cli-capability-data.js.map +1 -1
- package/dist/command-manifest.js +5 -4
- package/dist/command-manifest.js.map +1 -1
- package/dist/command-routes-integrations.js +2 -1
- package/dist/command-routes-integrations.js.map +1 -1
- package/dist/commands.d.ts +1 -1
- package/dist/commands.js +1 -1
- package/dist/commands.js.map +1 -1
- package/dist/delivery-commands.js +1 -0
- package/dist/delivery-commands.js.map +1 -1
- package/dist/fs-utils.js +6 -5
- package/dist/fs-utils.js.map +1 -1
- package/dist/mcp-runtime-config.js +20 -3
- package/dist/mcp-runtime-config.js.map +1 -1
- package/dist/model-providers.d.ts +1 -1
- package/dist/model-providers.js +1 -1
- package/dist/model-providers.js.map +1 -1
- package/dist/ollama-provider.d.ts +7 -0
- package/dist/ollama-provider.js +41 -15
- package/dist/ollama-provider.js.map +1 -1
- package/dist/phase-playbooks.js +14 -0
- package/dist/phase-playbooks.js.map +1 -1
- package/dist/planning-commands.js +5 -0
- package/dist/planning-commands.js.map +1 -1
- package/dist/qa-evidence-validation.d.ts +1 -1
- package/dist/qa-evidence-validation.js +3 -1
- package/dist/qa-evidence-validation.js.map +1 -1
- package/dist/quality-contracts.js +1 -1
- package/dist/quality-contracts.js.map +1 -1
- package/dist/release-inclusion.js +1 -1
- package/dist/release-inclusion.js.map +1 -1
- package/dist/runtime-bootstrap.js +12 -0
- package/dist/runtime-bootstrap.js.map +1 -1
- package/dist/sonar-preflight.js +21 -1
- package/dist/sonar-preflight.js.map +1 -1
- package/dist/task-graph-commands.js +48 -0
- package/dist/task-graph-commands.js.map +1 -1
- package/dist/tool-commands.d.ts +1 -0
- package/dist/tool-commands.js +95 -2
- package/dist/tool-commands.js.map +1 -1
- package/dist/types/tasks.d.ts +24 -1
- package/dist/types/workflow-run.d.ts +16 -0
- package/dist/types.d.ts +4 -3
- package/dist/types.js.map +1 -1
- package/dist/validation.js +6 -0
- package/dist/validation.js.map +1 -1
- package/dist/verifier-contracts.d.ts +29 -0
- package/dist/verifier-contracts.js +184 -0
- package/dist/verifier-contracts.js.map +1 -0
- package/dist/workflow-approval-service.js +36 -2
- package/dist/workflow-approval-service.js.map +1 -1
- package/dist/workflow-continuation-policy.d.ts +3 -0
- package/dist/workflow-continuation-policy.js +20 -0
- package/dist/workflow-continuation-policy.js.map +1 -0
- package/dist/workflow-correction-context.d.ts +30 -0
- package/dist/workflow-correction-context.js +117 -0
- package/dist/workflow-correction-context.js.map +1 -0
- package/dist/workflow-evidence-service.js +31 -22
- package/dist/workflow-evidence-service.js.map +1 -1
- package/dist/workflow-gates.js +6 -0
- package/dist/workflow-gates.js.map +1 -1
- package/dist/workflow-handoff-assessment.js +10 -2
- package/dist/workflow-handoff-assessment.js.map +1 -1
- package/dist/workflow-handoff-contract.d.ts +7 -0
- package/dist/workflow-handoff-contract.js +18 -0
- package/dist/workflow-handoff-contract.js.map +1 -1
- package/dist/workflow-phase-planner.js +56 -22
- package/dist/workflow-phase-planner.js.map +1 -1
- package/dist/workflow-return-routing.d.ts +14 -0
- package/dist/workflow-return-routing.js +61 -0
- package/dist/workflow-return-routing.js.map +1 -0
- package/dist/workflow-run-commands.js +173 -52
- package/dist/workflow-run-commands.js.map +1 -1
- package/dist/workflow-services.js +1 -0
- package/dist/workflow-services.js.map +1 -1
- package/dist/workflow-task-service.js +15 -7
- package/dist/workflow-task-service.js.map +1 -1
- package/docs/autonomous-workflow.md +4 -0
- package/docs/ci-self-hosted-runners.md +27 -21
- package/docs/e2e-test-batteries.md +74 -42
- package/docs/runtime-adapters.md +4 -0
- package/docs/runtime-llm-flow.md +6 -3
- package/docs/sonar-quality-gates.md +19 -9
- package/docs/verifier-contracts.md +87 -0
- package/package.json +2 -2
|
@@ -16,25 +16,34 @@ entry points a user or CI runner actually executes.
|
|
|
16
16
|
flows.
|
|
17
17
|
- External provider, GitHub, Sonar, or network-dependent paths must be opt-in
|
|
18
18
|
and must report skipped or deferred evidence when offline.
|
|
19
|
+
- Local provider smokes must default to mock or loopback/private endpoints,
|
|
20
|
+
reject public internet egress unless a trusted-provider policy explicitly
|
|
21
|
+
allows it, and record redacted provider provenance instead of credentials,
|
|
22
|
+
raw prompts, hostnames, or ports.
|
|
23
|
+
- Stubbed provider/MCP batteries are contract evidence only. They may block PRs
|
|
24
|
+
when CLI/API/workflow contracts regress, but they must not satisfy
|
|
25
|
+
real-provider acceptance criteria for hosted auth, latency, rate limits,
|
|
26
|
+
provider-side failures, or production model behavior.
|
|
19
27
|
- A release can ship only when P0 batteries pass or a release-manager accepted
|
|
20
28
|
risk records the unavailable environment and compensating evidence.
|
|
21
29
|
|
|
22
30
|
## Product Journey Coverage Matrix
|
|
23
31
|
|
|
24
|
-
| Journey | Primary surface | Environment model | Expected result
|
|
25
|
-
| ------------------------------- | ---------------------------- | ---------------------------------------------------------------------- |
|
|
26
|
-
| First workspace setup | CLI | Local `/tmp` workspace, source and packaged binary | User can initialize a project, preserve existing files, and see target runtime guidance
|
|
27
|
-
| Runtime-specific guidance regen | CLI + generated docs | Simulated Codex, Claude, Cursor, VS Code, Windsurf, generic workspaces | `init --force` regenerates missing managed guidance without mixing target-specific instructions
|
|
28
|
-
| Workflow delivery lifecycle | CLI workflow | Isolated local workspace | Task, estimate, phase handoffs, gate pause/resume, QA failback, evidence, and release readiness work end to end
|
|
29
|
-
| Runtime delegation | CLI runtime actions | Local runtime queue with detached sessions | Manual and multi-squad delegation queue safely, preserve parent availability, and reconcile lifecycle state
|
|
30
|
-
| Web console operator flow | Browser app + API | Local web server and persisted test workspace | Operator can inspect tasks, costs, providers, delegation, workflow progress, recovery, and artifacts
|
|
31
|
-
| Chat and provider flow | Browser app + API + provider | Stubbed provider by default, opt-in local provider | Chat sends scoped messages, streams usable status, records provider provenance, and handles failures safely
|
|
32
|
-
|
|
|
33
|
-
|
|
|
34
|
-
|
|
|
35
|
-
|
|
|
36
|
-
|
|
|
37
|
-
|
|
|
32
|
+
| Journey | Primary surface | Environment model | Expected result | Evidence strategy | Current battery | Child story recommendation |
|
|
33
|
+
| ------------------------------- | ---------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------- | ------------------------------------- |
|
|
34
|
+
| First workspace setup | CLI | Local `/tmp` workspace, source and packaged binary | User can initialize a project, preserve existing files, and see target runtime guidance | Assert exit codes, generated files, managed block boundaries, package version, and human/JSON output | Local and installed CLI onboarding | GH-533-A source/package setup parity |
|
|
35
|
+
| Runtime-specific guidance regen | CLI + generated docs | Simulated Codex, Claude, Cursor, VS Code, Windsurf, generic workspaces | `init --force` regenerates missing managed guidance without mixing target-specific instructions | Filesystem diffs plus QA coverage JSON proving regenerated artifacts map back to acceptance criteria | Init refresh environments | GH-533-B runtime regen matrix |
|
|
36
|
+
| Workflow delivery lifecycle | CLI workflow | Isolated local workspace | Task, estimate, phase handoffs, gate pause/resume, QA failback, evidence, and release readiness work end to end | JSON event stream, handoff artifacts, review/evidence records, and release-readiness before/after assertions | Workflow lifecycle CLI | GH-533-C workflow evidence lifecycle |
|
|
37
|
+
| Runtime delegation | CLI runtime actions | Local runtime queue with detached sessions | Manual and multi-squad delegation queue safely, preserve parent availability, and reconcile lifecycle state | Spawn request artifacts, lifecycle commands, runtime session lists, queue state, and completion events | Runtime manual queue, multi-squad runtime | GH-533-D runtime delegation contracts |
|
|
38
|
+
| Web console operator flow | Browser app + API | Local web server and persisted test workspace | Operator can inspect tasks, costs, providers, delegation, workflow progress, recovery, and artifacts | Playwright visible assertions, API persistence checks, responsive/keyboard coverage, screenshots/traces on failure | Browser console | GH-533-E web console operator journey |
|
|
39
|
+
| Chat and provider flow | Browser app + API + provider | Stubbed provider by default, opt-in local provider | Chat sends scoped messages, streams usable status, records provider provenance, and handles failures safely | API response assertions, SSE/event trace, DOM state, provider request fixture, redacted logs | Browser console, provider opt-in | GH-533-F chat provider E2E |
|
|
40
|
+
| Stubbed provider + MCP contract | CLI + web API + workflow | Built-in fake provider and scoped MCP config in an isolated workspace | PR-safe provider/MCP contract proves fake-provider routing, scoped MCP status, redacted evidence, and hosted provider fail-closed policy | CLI/API JSON comparisons, workflow executor provenance, redacted contract evidence report, hosted-provider negative assertion | Stubbed provider MCP contract | GH-551 GH-528A PR-safe contract E2E |
|
|
41
|
+
| Public docs and site discovery | Browser site | Local static/site build | Users can find docs, navigate core concepts, inspect architecture, and read mobile-safe pages | Playwright navigation, search, docs catalog, responsive text-fit, and no raw GitHub dead-end assertions | Public site, docs/site content source | GH-533-G docs discovery E2E |
|
|
42
|
+
| Security-sensitive operations | CLI + API + browser | Local sandbox with hostile fixtures | Unsafe file paths, shell patterns, secrets, stack traces, and telemetry leaks are blocked or redacted | Hostile fixture assertions, redacted command/API evidence, browser-visible safe errors | Security-sensitive operations | GH-533-H security boundary E2E |
|
|
43
|
+
| Packaged renderer behavior | CLI package path | Built package artifacts | Diagram/report commands use packaged paths and produce user-safe output without stale source-only assumptions | Compiled command output, package file list, XML/SVG/HTML/source invariant checks, and explicit missing messaging | Renderer packaged paths | GH-540 packaged artifact E2E |
|
|
44
|
+
| Benchmark/dashboard reporting | CLI metrics + dashboard JSON | Isolated workflow telemetry with completed and measurable stories | Velocity, calibration, and dashboard distinguish completed stories from measurable actuals without fake duration | CLI JSON assertions, dashboard metric assertions, actual source checks, and calibration sample-size checks | Benchmark dashboard duration smoke | GH-541 benchmark dashboard E2E |
|
|
45
|
+
| MCP scoped OAuth diagnostics | CLI + web API | Runtime-scoped MCP config in an isolated workspace | CLI and web API report matching runtime OAuth state, redact secrets, and hide cross-workspace MCP config | CLI/API JSON comparisons, redaction assertions, scoped workspace config checks | MCP scoped OAuth smoke | GH-542 MCP OAuth E2E |
|
|
46
|
+
| Release candidate readiness | CLI + CI artifacts | Local dry run, CI/self-hosted runner | Candidate package contents, version/tag policy, release matrix, and quality gates are release-ready | `npm pack --dry-run`, release check JSON, CI run links/logs, and accepted-risk records for unavailable environments | Package release dry run, source quality | GH-533-J release candidate E2E |
|
|
38
47
|
|
|
39
48
|
## Regeneration Plan
|
|
40
49
|
|
|
@@ -59,39 +68,40 @@ clear skip/deferred signal and owner.
|
|
|
59
68
|
|
|
60
69
|
## P0 Release-Blocking Batteries
|
|
61
70
|
|
|
62
|
-
| Battery | Scope | Command
|
|
63
|
-
| ------------------------- | ---------------------------------------------------------------------------------------------------- |
|
|
64
|
-
| Source quality | Static checks, build, unit tests, workflow validation, secret scan, security audit | `npm run ci:quality`
|
|
65
|
-
| Local CLI onboarding | Current source CLI in `/tmp` workspaces | `ORCHESTRA_NODE_SCRIPT=$PWD/bin/orchestra.js npm run test:e2e:init`
|
|
66
|
-
| Installed CLI onboarding | Installed or packaged CLI in `/tmp` workspaces | `npm run test:e2e:init` after installing the candidate package
|
|
67
|
-
| Browser console | Web console task, cost, provider, delegation, recovery, evidence, workflow, accessibility, artifacts | `npm run test:e2e`
|
|
68
|
-
| Public site | Documentation/site navigation, docs catalog, architecture viewer, mobile fit | `npm run test:e2e`
|
|
69
|
-
| Runtime manual queue | Manual runtime delegation in a `/tmp` workspace | `npm run test:e2e:runtime`
|
|
70
|
-
| Init refresh environments | Simulated generic, Codex, Claude, Cursor, VS Code/GitHub Copilot, and Windsurf workspaces | `npm run test:e2e:init`
|
|
71
|
-
| Workflow lifecycle CLI | CLI workflow run, gate, resume, QA failback, release readiness | `node --test e2e/workflow-lifecycle-cli.test.js`
|
|
72
|
-
| Renderer packaged paths | Diagram lint command, artifact previews, and report renderer package contracts | `npm run build && node --test test/renderer-compiled-path.test.js e2e/packaged-renderer-smoke.test.js` | compiled CLI path emits Mermaid install guidance without stack traces, candidate package includes compiled renderer modules, previews preserve draw.io/Mermaid/ERD/TSX invariants, report render command absence is explicit
|
|
71
|
+
| Battery | Scope | Command | Minimum Assertions | Evidence |
|
|
72
|
+
| ------------------------- | ---------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
|
|
73
|
+
| Source quality | Static checks, build, unit tests, workflow validation, secret scan, security audit | `npm run ci:quality` | exit code 0, no leaks, no audit blockers, workflow valid | command log |
|
|
74
|
+
| Local CLI onboarding | Current source CLI in `/tmp` workspaces | `ORCHESTRA_NODE_SCRIPT=$PWD/bin/orchestra.js npm run test:e2e:init` | `--version`, `init`, `status`, `validate`, first-use task, handoff, evidence, release readiness | stdout/stderr, JSON output, filesystem assertions |
|
|
75
|
+
| Installed CLI onboarding | Installed or packaged CLI in `/tmp` workspaces | `npm run test:e2e:init` after installing the candidate package | same assertions as local CLI onboarding, proving the packaged binary matches source behavior | stdout/stderr, JSON output, filesystem assertions, package version |
|
|
76
|
+
| Browser console | Web console task, cost, provider, delegation, recovery, evidence, workflow, accessibility, artifacts | `npm run test:e2e` | visible state, API persistence, evidence attachment, lifecycle transitions, responsive/keyboard behavior | Playwright report, screenshots/traces on failure |
|
|
77
|
+
| Public site | Documentation/site navigation, docs catalog, architecture viewer, mobile fit | `npm run test:e2e` | navigation order, local docs catalog search, no raw GitHub redirect for docs, mobile content fit | Playwright report |
|
|
78
|
+
| Runtime manual queue | Manual runtime delegation in a `/tmp` workspace | `npm run test:e2e:runtime` | two active sessions, third manual `spawn-request` materializes `queued`, artifact includes lifecycle commands, `runtime sessions` lists queued session | stdout/stderr, JSON output, artifact content |
|
|
79
|
+
| Init refresh environments | Simulated generic, Codex, Claude, Cursor, VS Code/GitHub Copilot, and Windsurf workspaces | `npm run test:e2e:init` | missing runtime guidance files regenerate on `init --force`, user content is preserved, managed blocks/frontmatter are updated only inside managed ranges, target-specific metadata excludes wrong-target content, generated-artifact evidence maps back to acceptance criteria | filesystem diff assertions, QA coverage JSON |
|
|
80
|
+
| Workflow lifecycle CLI | CLI workflow run, gate, resume, QA failback, release readiness | `node --test e2e/workflow-lifecycle-cli.test.js` | task phases create handoffs, blocked QA routes back, routine gate resumes immediately, release readiness maps acceptance to evidence | JSON output, events, handoffs |
|
|
81
|
+
| Renderer packaged paths | Diagram lint command, artifact previews, and report renderer package contracts | `npm run build && node --test test/renderer-compiled-path.test.js e2e/packaged-renderer-smoke.test.js` | compiled CLI path emits Mermaid install guidance without stack traces, candidate package includes compiled renderer modules, previews preserve draw.io/Mermaid/ERD/TSX invariants, report render command absence is explicit | stdout/stderr, JSON output, package file list, XML/SVG/HTML/source invariant checks |
|
|
82
|
+
| Stubbed provider MCP | Built-in fake provider, scoped MCP fixture, provider/MCP web API status, and contract evidence | `npm run test:e2e:init` or `npm run build && node --test e2e/stubbed-provider-mcp-contract.test.js` | fake provider routing is visible through CLI/API, scoped MCP CLI/API status matches, hosted provider execution fails before direct provider API use, evidence is redacted and labeled contract evidence, and no hosted provider secret is required | CLI/API JSON output, workflow events, `.agent-workflow/evidence/*-report.md`, `reports/stubbed-provider-mcp-contract-evidence.json` |
|
|
73
83
|
|
|
74
84
|
## P1 High-Risk Regression Batteries
|
|
75
85
|
|
|
76
|
-
| Battery | Scope | Command
|
|
77
|
-
| ------------------------------ | --------------------------------------------------------------------- |
|
|
78
|
-
| Multi-squad runtime | Parallel squad delegation with queue and threshold policy | `npm run test:e2e:runtime`
|
|
79
|
-
| Acceptance evidence | CLI, API, browser, and deferred integration evidence | `node --test e2e/acceptance-evidence.test.js`
|
|
80
|
-
| Recovery and repair | Interrupted runs, stale locks, failed provider phases | `node --test e2e/recovery-cli.test.js` plus browser recovery coverage
|
|
81
|
-
| Docs/site content source | Site content generated from docs and manifest | `npm run site:build && npm run test:e2e -- --grep docs`
|
|
82
|
-
| Security-sensitive operations | File paths, shell execution, web writes, secrets, telemetry redaction | `npm run test:e2e:security`
|
|
83
|
-
| Ollama provider-backed runtime | Local OpenAI-compatible Ollama provider route in a `/tmp` workspace | `npm run test:e2e:runtime:ollama`
|
|
86
|
+
| Battery | Scope | Command | Minimum Assertions | Evidence |
|
|
87
|
+
| ------------------------------ | --------------------------------------------------------------------- | ----------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------ |
|
|
88
|
+
| Multi-squad runtime | Parallel squad delegation with queue and threshold policy | `npm run test:e2e:runtime` | independent sessions, non-blocking parent, queued sessions do not fall back to parent, completion order reconciles | JSON output, lifecycle events |
|
|
89
|
+
| Acceptance evidence | CLI, API, browser, and deferred integration evidence | `node --test e2e/acceptance-evidence.test.js` | evidence maps to named acceptance criteria, deferred external validation requires owner and rationale | evidence artifacts |
|
|
90
|
+
| Recovery and repair | Interrupted runs, stale locks, failed provider phases | `node --test e2e/recovery-cli.test.js` plus browser recovery coverage | recovery detects issue, repair requires confirmation, repaired state is observable | JSON output, before/after state |
|
|
91
|
+
| Docs/site content source | Site content generated from docs and manifest | `npm run site:build && npm run test:e2e -- --grep docs` | docs render as human-friendly catalog, no markdown-only dead ends, search works | Playwright report |
|
|
92
|
+
| Security-sensitive operations | File paths, shell execution, web writes, secrets, telemetry redaction | `npm run test:e2e:security` | path traversal blocked, unsafe writes rejected, secret-like data redacted, no raw stack traces | command/API evidence |
|
|
93
|
+
| Ollama provider-backed runtime | Local OpenAI-compatible Ollama provider route in a `/tmp` workspace | `npm run test:e2e:runtime:ollama` | `model connect --provider ollama`, provider-backed developer phase, OpenAI-compatible request shape, provider provenance, no runtime subagent credentials in artifacts | stdout/stderr, JSON output, mock provider request, event log |
|
|
84
94
|
| Benchmark dashboard duration | Benchmark-derived velocity, calibration, and delivery dashboard JSON | `npm run build && node --test e2e/benchmark-dashboard-duration.test.js` | completed stories are counted separately from stories with actual duration, completed-without-duration is visible, calibration sample size excludes null actuals | CLI JSON output, dashboard JSON, calibration JSON |
|
|
85
95
|
| MCP scoped OAuth smoke | Runtime-scoped MCP integration status through CLI and web API | `npm run build && node --test e2e/mcp-scoped-oauth-smoke.test.js` | CLI and web API agree for `--runtime claude-cli`, diagnostics redact OAuth/API-key material, cross-workspace MCP config is not exposed | CLI/API JSON output, scoped config fixture, redaction checks |
|
|
86
96
|
|
|
87
97
|
## P2 Extended Confidence Batteries
|
|
88
98
|
|
|
89
|
-
| Battery | Scope | Command | Minimum Assertions
|
|
90
|
-
| -------------------------- | --------------------------------------------------------------------------------------------------- | ------------------------------------------------------------- |
|
|
91
|
-
| Tracker and GitHub sync | Issue import/export and close readiness | opt-in CI job with network credentials | labels, comments, close gate, release readiness, no secret exposure
|
|
92
|
-
| Sonar quality loop | Local or remote Sonar import and release gate mapping | configured Sonar workflow or local compose job | insights imported, release readiness reflects quality gate, unavailable token is explicit
|
|
93
|
-
| Provider-backed delegation | OpenAI, Anthropic, Gemini, Ollama, fake/local provider-backed routes plus runtime-native separation | opt-in provider E2E plus focused wrapper/unit coverage | registry routing, explicit direct API policy, forbidden fallback, budget and scheduler blocks, redacted evidence, no silent runtime-native fallback | redacted provider provenance |
|
|
94
|
-
| Package release dry run | npm package contents and release check | `npm pack --dry-run --json && orchestra release check --json` | generated/private state excluded, version/tag policy valid, release readiness complete
|
|
99
|
+
| Battery | Scope | Command | Minimum Assertions | Evidence |
|
|
100
|
+
| -------------------------- | --------------------------------------------------------------------------------------------------- | ------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------- |
|
|
101
|
+
| Tracker and GitHub sync | Issue import/export and close readiness | opt-in CI job with network credentials | labels, comments, close gate, release readiness, no secret exposure | sanitized logs |
|
|
102
|
+
| Sonar quality loop | Local or remote Sonar import and release gate mapping | configured Sonar workflow or local compose job | insights imported, release readiness reflects quality gate, unavailable token is explicit | artifact import report |
|
|
103
|
+
| Provider-backed delegation | OpenAI, Anthropic, Gemini, Ollama, fake/local provider-backed routes plus runtime-native separation | opt-in provider E2E plus focused wrapper/unit coverage | registry routing, explicit direct API policy, forbidden fallback, local-provider public egress rejection, timeout handling, budget and scheduler blocks, redacted evidence, no silent runtime-native fallback | redacted provider provenance |
|
|
104
|
+
| Package release dry run | npm package contents and release check | `npm pack --dry-run --json && orchestra release check --json` | generated/private state excluded, version/tag policy valid, release readiness complete | package list, release report |
|
|
95
105
|
|
|
96
106
|
## Required `/tmp` Fixture Patterns
|
|
97
107
|
|
|
@@ -122,7 +132,9 @@ the packaging/install path is wrong.
|
|
|
122
132
|
3. Add `e2e/workflow-lifecycle-cli.test.js` for workflow run, gate, failback,
|
|
123
133
|
resume, and release readiness.
|
|
124
134
|
4. Add `e2e/runtime-multi-squad.test.js` for async background squad behavior.
|
|
125
|
-
5. Add
|
|
135
|
+
5. Add `e2e/stubbed-provider-mcp-contract.test.js` for PR-safe provider/MCP
|
|
136
|
+
contract evidence that cannot be confused with real-provider evidence.
|
|
137
|
+
6. Add focused security and acceptance-evidence E2E only where unit tests cannot
|
|
126
138
|
prove the user-visible contract.
|
|
127
139
|
|
|
128
140
|
## Executable Child Story Recommendations
|
|
@@ -138,8 +150,9 @@ the packaging/install path is wrong.
|
|
|
138
150
|
| GH-533-G | Docs discovery E2E | Split docs/site grep coverage into catalog, search, architecture, and mobile scenarios | Users can find docs without raw repository dead ends, and mobile pages have no clipping or overlap |
|
|
139
151
|
| GH-533-H | Security boundary E2E | Add hostile fixture coverage for paths, shell-like input, secrets, telemetry, and stack traces | Unsafe operations fail closed and all user-facing/API evidence is redacted |
|
|
140
152
|
| GH-540 | Packaged artifact E2E | Promote compiled renderer/package assertions into a package-candidate smoke | Packaged commands do not depend on source-only paths and produce inspectable artifacts or explicit unavailable messages |
|
|
141
|
-
| GH-541 | Benchmark dashboard E2E | Add velocity, calibration, and dashboard JSON assertions for completed stories without measured duration
|
|
142
|
-
| GH-542 | MCP OAuth scoped E2E | Add CLI/API status parity assertions for runtime-scoped MCP OAuth diagnostics
|
|
153
|
+
| GH-541 | Benchmark dashboard E2E | Add velocity, calibration, and dashboard JSON assertions for completed stories without measured duration | Reports count completed work without inventing duration and calibration uses only measurable actuals |
|
|
154
|
+
| GH-542 | MCP OAuth scoped E2E | Add CLI/API status parity assertions for runtime-scoped MCP OAuth diagnostics | Scoped OAuth status is consistent across CLI/API and diagnostics stay redacted without cross-workspace leakage |
|
|
155
|
+
| GH-551 | Stubbed provider/MCP E2E | Add fake-provider workflow plus MCP dry-run CLI assertions in an isolated workspace | PR-safe contract evidence proves provider/MCP integration without secrets or network calls and never replaces real-provider smoke |
|
|
143
156
|
| GH-533-J | Release candidate E2E | Add a release dry-run evidence bundle around package contents, tag readiness, CI logs, and release matrix | Release candidate proves package contents, version/tag policy, quality gate, rollback evidence, and accepted-risk records |
|
|
144
157
|
|
|
145
158
|
## Opt-In Provider Runtime Batteries
|
|
@@ -152,6 +165,25 @@ workflow provenance, and no-secret behavior without requiring a real Ollama
|
|
|
152
165
|
daemon. A separate real-model smoke can be run with `ORCHESTRA_OLLAMA_SMOKE=1`
|
|
153
166
|
when validating a local model installation.
|
|
154
167
|
|
|
168
|
+
## Stubbed Provider And MCP Contract Evidence
|
|
169
|
+
|
|
170
|
+
`e2e/stubbed-provider-mcp-contract.test.js` is the default PR/push-safe provider
|
|
171
|
+
and MCP contract suite. It is included in `npm run test:e2e:init`, which the
|
|
172
|
+
normal CI dogfood job runs without hosted provider secrets. The focused command
|
|
173
|
+
is:
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
npm run build && node --test e2e/stubbed-provider-mcp-contract.test.js
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
The suite uses the built-in `fake` provider and workspace-scoped MCP config in a
|
|
180
|
+
temporary workspace. It asserts real CLI and web API outputs, records a
|
|
181
|
+
redacted report through `orchestra evidence add`, and labels that report
|
|
182
|
+
`contract evidence`. That evidence proves the local provider/MCP contract only.
|
|
183
|
+
It must not be used as acceptance evidence for real OpenAI, Anthropic, Gemini,
|
|
184
|
+
Ollama, or hosted MCP behavior. Real-provider acceptance remains covered only by
|
|
185
|
+
protected/manual smoke suites with explicit secrets and trusted-run policy.
|
|
186
|
+
|
|
155
187
|
## Definition Of Done
|
|
156
188
|
|
|
157
189
|
An E2E battery is complete only when it has:
|
package/docs/runtime-adapters.md
CHANGED
|
@@ -499,6 +499,10 @@ parent-agent fallback reason. `subagents` requires runtime-native support and
|
|
|
499
499
|
fails fast if the runtime cannot satisfy it. `single-agent` forces the parent
|
|
500
500
|
agent path and records that choice in phase provenance.
|
|
501
501
|
|
|
502
|
+
Gate mode is independent from execution mode: `--gates none` suppresses human
|
|
503
|
+
gate pauses, while `--phase-execution single-agent` is what prevents detached
|
|
504
|
+
runtime parent actions and subagent lifecycle requirements.
|
|
505
|
+
|
|
502
506
|
When no task or role executor is configured and the default executor is
|
|
503
507
|
`generic-runtime`, `auto` and strict `subagents` mode infer the active runtime
|
|
504
508
|
from `OPEN_ORCHESTRA_ACTIVE_RUNTIME`, then from
|
package/docs/runtime-llm-flow.md
CHANGED
|
@@ -155,9 +155,12 @@ adapter also reads optional `GEMINI_BASE_URL`; the base URL must be HTTPS and
|
|
|
155
155
|
defaults to `https://generativelanguage.googleapis.com`.
|
|
156
156
|
|
|
157
157
|
The Ollama adapter defaults to `http://localhost:11434/v1` and uses the
|
|
158
|
-
OpenAI-compatible `/chat/completions` endpoint. Set `OLLAMA_BASE_URL` for
|
|
159
|
-
|
|
160
|
-
|
|
158
|
+
OpenAI-compatible `/chat/completions` endpoint. Set `OLLAMA_BASE_URL` only for
|
|
159
|
+
loopback or private-network endpoints; public internet hosts are rejected unless
|
|
160
|
+
a future trusted-provider policy explicitly implements that exception. Endpoint
|
|
161
|
+
URLs must use `http` or `https`, must not include credentials, and are recorded
|
|
162
|
+
in evidence only as redacted local-provider provenance. Set `OLLAMA_API_KEY`
|
|
163
|
+
when the local endpoint requires one.
|
|
161
164
|
|
|
162
165
|
## Runtime Execution
|
|
163
166
|
|
|
@@ -64,10 +64,13 @@ Optional GitHub variables:
|
|
|
64
64
|
quality gate fails.
|
|
65
65
|
- `SONAR_RUNNER`: set to `self-hosted` to run the Sonar workflow on a local
|
|
66
66
|
runner that can reach the shared SonarQube runtime directly. When this is set,
|
|
67
|
-
the workflow
|
|
68
|
-
Access service-token checks
|
|
67
|
+
the workflow resolves `SONAR_LOCAL_HOST_URL`, then `SONAR_HOST_URL`, then
|
|
68
|
+
`http://localhost:9001`. Cloudflare Access service-token checks still run
|
|
69
|
+
when the resolved URL uses `SONAR_HOST_URL` and Cloudflare credentials are
|
|
70
|
+
configured.
|
|
69
71
|
- `SONAR_LOCAL_HOST_URL`: optional override for self-hosted runner mode when the
|
|
70
|
-
runner reaches SonarQube through a different local-only URL.
|
|
72
|
+
runner reaches SonarQube through a different local-only URL. Prefer this over
|
|
73
|
+
`SONAR_HOST_URL` when the self-hosted runner should use a direct local path.
|
|
71
74
|
|
|
72
75
|
The workflow skips analysis when `SONAR_TOKEN` is not configured. This keeps
|
|
73
76
|
forks and offline development usable. For private repositories, keep
|
|
@@ -100,6 +103,10 @@ API access, issue API access, and security hotspot API access. It redacts the
|
|
|
100
103
|
token, host URL, and Cloudflare Access service token values from diagnostic
|
|
101
104
|
output. `hotspots` is a warning when unavailable because some Sonar tokens can
|
|
102
105
|
analyze and read issues while hotspot review permissions are managed separately.
|
|
106
|
+
In local `sonarqube-local` mode, a branch-scoped quality gate `404` falls back
|
|
107
|
+
to the default branch when the project itself is readable, because SonarQube
|
|
108
|
+
Community Edition does not expose branch analysis unless an optional add-on or
|
|
109
|
+
commercial feature is installed.
|
|
103
110
|
|
|
104
111
|
Common remediation:
|
|
105
112
|
|
|
@@ -266,12 +273,12 @@ cd ~/dev/sonarqube_jterrats_dev
|
|
|
266
273
|
docker compose up -d
|
|
267
274
|
```
|
|
268
275
|
|
|
269
|
-
When `SONAR_RUNNER=self-hosted`, the workflow resolves SonarQube
|
|
270
|
-
`http://localhost:9001
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
276
|
+
When `SONAR_RUNNER=self-hosted`, the workflow resolves SonarQube in this order:
|
|
277
|
+
`SONAR_LOCAL_HOST_URL`, `SONAR_HOST_URL`, then `http://localhost:9001`.
|
|
278
|
+
Use `SONAR_LOCAL_HOST_URL` to force a direct local path and avoid accidentally
|
|
279
|
+
pulling local machine analysis back through Zero Trust. When no local URL is
|
|
280
|
+
set and `SONAR_HOST_URL` points at a Cloudflare Access protected hostname, the
|
|
281
|
+
workflow enables the service-token proxy before preflight. The CI scan uses
|
|
275
282
|
`continue-on-error` on the scanner step so Orchestra can still import and upload
|
|
276
283
|
Sonar evidence when the quality gate fails; a final workflow step re-fails the
|
|
277
284
|
job after evidence is captured.
|
|
@@ -299,6 +306,9 @@ Expected result:
|
|
|
299
306
|
- Sonar authentication returns `{"valid":true}`.
|
|
300
307
|
- `npm run sonar:preflight:local` passes `auth`, `project`, `qualityGate`, and
|
|
301
308
|
`issues`; `hotspots` may warn when the token lacks hotspot read access.
|
|
309
|
+
- Pull request runs on local SonarQube Community Edition may report
|
|
310
|
+
`quality-gate-readable-default-branch` when branch-scoped quality gate status
|
|
311
|
+
is unavailable but default-branch quality gate access is valid.
|
|
302
312
|
|
|
303
313
|
If the runner is online but jobs stay queued, verify the workflow labels match
|
|
304
314
|
the runner labels exactly. If Sonar preflight fails, fix the token/project
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Verifier Contracts
|
|
2
|
+
|
|
3
|
+
Verifier contracts are task metadata that describe how a workflow run proves an
|
|
4
|
+
accepted outcome. They are used by agents, QA, and release gates to avoid
|
|
5
|
+
treating simulated handoffs or unmapped evidence as proof.
|
|
6
|
+
|
|
7
|
+
## Fields
|
|
8
|
+
|
|
9
|
+
Each verifier entry is stored under `task.verifierContract.entries`:
|
|
10
|
+
|
|
11
|
+
- `id`: stable verifier id, unique within the task.
|
|
12
|
+
- `surface`: one of `cli`, `api`, `web`, `mobile`, `desktop`, `db`, `cloud`,
|
|
13
|
+
`workflow`, or `generated-artifact`.
|
|
14
|
+
- `setup`: environment or data setup required before verification.
|
|
15
|
+
- `action`: command, request, workflow action, or user action to execute.
|
|
16
|
+
- `expectedObservable`: observable result that must be proven.
|
|
17
|
+
- `assertionType`: `equals`, `contains`, `matches`, `exists`, or `custom`.
|
|
18
|
+
- `evidenceArtifact`: expected file, command output, trace, screenshot, log, or
|
|
19
|
+
report reference.
|
|
20
|
+
- `ownerRole`: role responsible for producing or reviewing evidence.
|
|
21
|
+
- `required`: defaults to `true`; optional verifiers are advisory.
|
|
22
|
+
- `acceptanceCriteria`: optional criteria references covered by the verifier.
|
|
23
|
+
|
|
24
|
+
## CLI
|
|
25
|
+
|
|
26
|
+
Add a verifier while creating a task:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
orchestra task add --id STORY-001 --title "Generate manifest" --owner developer \
|
|
30
|
+
--verifier-id cli-manifest \
|
|
31
|
+
--verifier-surface cli \
|
|
32
|
+
--verifier-setup "package installed" \
|
|
33
|
+
--verifier-action "run manifest command" \
|
|
34
|
+
--verifier-expected "manifest generated" \
|
|
35
|
+
--verifier-evidence "manifest.json" \
|
|
36
|
+
--verifier-owner qa
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Add or update an entry later. Updates merge by verifier id:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
orchestra task update --id STORY-001 \
|
|
43
|
+
--verifier-id cli-manifest \
|
|
44
|
+
--verifier-surface cli \
|
|
45
|
+
--verifier-setup "package installed" \
|
|
46
|
+
--verifier-action "orchestra commands manifest --json" \
|
|
47
|
+
--verifier-expected "manifest generated" \
|
|
48
|
+
--verifier-evidence "manifest.json" \
|
|
49
|
+
--verifier-owner qa
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Inspect with:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
orchestra task show --id STORY-001 --json
|
|
56
|
+
orchestra context --task STORY-001 --json
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Evidence Mapping
|
|
60
|
+
|
|
61
|
+
Prefer explicit mapping:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
orchestra evidence add --task STORY-001 --role qa --type command \
|
|
65
|
+
--summary "manifest.json generated" \
|
|
66
|
+
--command "orchestra commands manifest --json" \
|
|
67
|
+
--exit-code 0 \
|
|
68
|
+
--surface cli \
|
|
69
|
+
--assertions "exit code 0; stdout contains manifest generated; stderr empty; artifact manifest.json written; final state manifest generated" \
|
|
70
|
+
--verifier-contract-id cli-manifest
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Legacy evidence can still match by task, surface, observable assertions, and
|
|
74
|
+
artifact reference. Explicit `--verifier-contract-id` is less ambiguous.
|
|
75
|
+
|
|
76
|
+
## Gate Behavior
|
|
77
|
+
|
|
78
|
+
Tasks without verifier contracts keep existing behavior.
|
|
79
|
+
|
|
80
|
+
For tasks with required verifier entries, `qa-release` and `release-readiness`
|
|
81
|
+
block when evidence is missing, failed, or lacks observable outcome validation.
|
|
82
|
+
Missing keys use stable names such as:
|
|
83
|
+
|
|
84
|
+
- `verifierContract.<id>.evidence`
|
|
85
|
+
- `verifierContract.<id>.observableOutcome`
|
|
86
|
+
|
|
87
|
+
Optional verifiers are rendered in context and handoffs but do not block gates.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@jterrats/open-orchestra",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.2",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"workspaces": [
|
|
6
6
|
"extensions/vscode-open-orchestra",
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
"test": "npm run build && node --test test/**/*.js extensions/**/*.test.cjs",
|
|
17
17
|
"test:coverage": "npm run build && c8 --reporter=lcov --reports-dir coverage --exclude \"test/**\" --exclude \"e2e/**\" --exclude \"extensions/**/test/**\" --exclude \"dist/assets/**\" --exclude \"dist/web-console/**\" node --test test/**/*.js extensions/**/*.test.cjs",
|
|
18
18
|
"test:e2e": "npm run build && npm run site:build && playwright test",
|
|
19
|
-
"test:e2e:init": "node --test e2e/init-onboarding.test.js e2e/runtime-instruction-flow.test.js",
|
|
19
|
+
"test:e2e:init": "node --test --test-concurrency=1 e2e/stubbed-provider-mcp-contract.test.js e2e/init-onboarding.test.js e2e/runtime-instruction-flow.test.js",
|
|
20
20
|
"test:e2e:runtime": "node --test e2e/runtime-manual-queue.test.js e2e/runtime-multi-squad.test.js",
|
|
21
21
|
"test:e2e:security": "npm run build && node --test e2e/security-boundaries.test.js",
|
|
22
22
|
"test:e2e:runtime:ollama": "npm run build && node --test e2e/runtime-ollama-provider.test.js",
|