@jterrats/open-orchestra 1.0.10 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/automation-evidence.d.ts +1 -1
  2. package/dist/automation-evidence.js +133 -11
  3. package/dist/automation-evidence.js.map +1 -1
  4. package/dist/command-manifest.js +3 -3
  5. package/dist/command-manifest.js.map +1 -1
  6. package/dist/phase-playbooks.js +2 -0
  7. package/dist/phase-playbooks.js.map +1 -1
  8. package/dist/qa-coverage-evidence.d.ts +3 -0
  9. package/dist/qa-coverage-evidence.js +92 -0
  10. package/dist/qa-coverage-evidence.js.map +1 -0
  11. package/dist/qa-coverage-guidance.d.ts +6 -0
  12. package/dist/qa-coverage-guidance.js +141 -0
  13. package/dist/qa-coverage-guidance.js.map +1 -0
  14. package/dist/qa-coverage-rules.d.ts +7 -0
  15. package/dist/qa-coverage-rules.js +127 -0
  16. package/dist/qa-coverage-rules.js.map +1 -0
  17. package/dist/qa-coverage-types.d.ts +47 -0
  18. package/dist/qa-coverage-types.js +2 -0
  19. package/dist/qa-coverage-types.js.map +1 -0
  20. package/dist/qa-coverage.d.ts +2 -20
  21. package/dist/qa-coverage.js +42 -132
  22. package/dist/qa-coverage.js.map +1 -1
  23. package/dist/recoverable-failure-lessons.d.ts +2 -0
  24. package/dist/recoverable-failure-lessons.js +55 -0
  25. package/dist/recoverable-failure-lessons.js.map +1 -0
  26. package/dist/release-readiness.js +3 -1
  27. package/dist/release-readiness.js.map +1 -1
  28. package/dist/roles/qa-ux-roles.js +5 -0
  29. package/dist/roles/qa-ux-roles.js.map +1 -1
  30. package/dist/runtime-adapters.js +1 -1
  31. package/dist/runtime-adapters.js.map +1 -1
  32. package/dist/runtime-completion-validation.d.ts +16 -0
  33. package/dist/runtime-completion-validation.js +206 -0
  34. package/dist/runtime-completion-validation.js.map +1 -0
  35. package/dist/runtime-lifecycle-watch.js +43 -37
  36. package/dist/runtime-lifecycle-watch.js.map +1 -1
  37. package/dist/runtime-parent-action-dispatch.d.ts +2 -1
  38. package/dist/runtime-parent-action-dispatch.js +94 -12
  39. package/dist/runtime-parent-action-dispatch.js.map +1 -1
  40. package/dist/runtime-spawn-bridge.js +6 -0
  41. package/dist/runtime-spawn-bridge.js.map +1 -1
  42. package/dist/skills-catalog.js +2 -0
  43. package/dist/skills-catalog.js.map +1 -1
  44. package/dist/task-graph-commands.js +3 -0
  45. package/dist/task-graph-commands.js.map +1 -1
  46. package/dist/types/runtime.d.ts +23 -0
  47. package/dist/types/tasks.d.ts +3 -0
  48. package/dist/types.d.ts +1 -1
  49. package/dist/types.js.map +1 -1
  50. package/dist/workflow-evidence-service.js +2 -0
  51. package/dist/workflow-evidence-service.js.map +1 -1
  52. package/dist/workflow-gates.js +6 -0
  53. package/dist/workflow-gates.js.map +1 -1
  54. package/dist/workflow-run-commands.js +87 -3
  55. package/dist/workflow-run-commands.js.map +1 -1
  56. package/dist/workflow-task-service.js +3 -0
  57. package/dist/workflow-task-service.js.map +1 -1
  58. package/docs/claude-adapter-qa-matrix.md +31 -19
  59. package/docs/e2e-test-batteries.md +3 -3
  60. package/docs/runtime-adapters.md +28 -18
  61. package/docs/traceability-flow.md +14 -4
  62. package/package.json +2 -2
@@ -29,7 +29,7 @@ entry points a user or CI runner actually executes.
29
29
  | Browser console | Web console task, cost, provider, delegation, recovery, evidence, workflow, accessibility, artifacts | `npm run test:e2e` | visible state, API persistence, evidence attachment, lifecycle transitions, responsive/keyboard behavior | Playwright report, screenshots/traces on failure |
30
30
  | Public site | Documentation/site navigation, docs catalog, architecture viewer, mobile fit | `npm run test:e2e` | navigation order, local docs catalog search, no raw GitHub redirect for docs, mobile content fit | Playwright report |
31
31
  | Runtime manual queue | Manual runtime delegation in a `/tmp` workspace | `npm run test:e2e:runtime` | two active sessions, third manual `spawn-request` materializes `queued`, artifact includes lifecycle commands, `runtime sessions` lists queued session | stdout/stderr, JSON output, artifact content |
32
- | Init refresh environments | Simulated Codex, Claude, Cursor, generic workspaces | `node --test e2e/init-refresh-environments.test.js` | missing runtime guidance files regenerate on `init --force`, user content is preserved, managed blocks are updated only inside managed ranges | filesystem diff assertions |
32
+ | Init refresh environments | Simulated generic, Codex, Claude, Cursor, VS Code/GitHub Copilot, and Windsurf workspaces | `npm run test:e2e:init` | missing runtime guidance files regenerate on `init --force`, user content is preserved, managed blocks/frontmatter are updated only inside managed ranges, target-specific metadata excludes wrong-target content, generated-artifact evidence maps back to acceptance criteria | filesystem diff assertions, QA coverage JSON |
33
33
  | Workflow lifecycle CLI | CLI workflow run, gate, resume, QA failback, release readiness | `node --test e2e/workflow-lifecycle-cli.test.js` | task phases create handoffs, blocked QA routes back, routine gate resumes immediately, release readiness maps acceptance to evidence | JSON output, events, handoffs |
34
34
 
35
35
  ## P1 High-Risk Regression Batteries
@@ -76,8 +76,8 @@ the packaging/install path is wrong.
76
76
 
77
77
  1. Keep `e2e/runtime-manual-queue.test.js` release-blocking as runtime
78
78
  delegation evolves.
79
- 2. Add `e2e/init-refresh-environments.test.js` for Codex, Claude, Cursor, and
80
- generic project simulations.
79
+ 2. Keep `e2e/init-onboarding.test.js` covering Codex, Claude, Cursor, VS Code,
80
+ GitHub Copilot, Windsurf, and generic project simulations.
81
81
  3. Add `e2e/workflow-lifecycle-cli.test.js` for workflow run, gate, failback,
82
82
  resume, and release readiness.
83
83
  4. Add `e2e/runtime-multi-squad.test.js` for async background squad behavior.
@@ -204,7 +204,7 @@ have two supported paths:
204
204
  `runtime parent-actions --task <id> --dispatch --until-idle --runtime <runtime-id>`.
205
205
  The dispatcher repeatedly inspects pending parent actions, dispatches only
206
206
  safe actions for the active runtime, records spawned and active lifecycle
207
- events with stable runtime child ids or deterministic fallback labels, applies
207
+ events with stable runtime child ids or verified callback correlation ids, applies
208
208
  `runtime watch` completions when expected handoff artifacts appear, resumes
209
209
  paused workflow runs, and continues across later phases until idle or timeout.
210
210
 
@@ -219,11 +219,12 @@ access. This keeps the boundary explicit: Orchestra emits auditable actions and
219
219
  lifecycle commands; the active parent runtime executes native tools such as
220
220
  Codex `spawn_agent`, and the dispatcher only consumes actions that are safe for
221
221
  the runtime declared on the command line. For Claude, the tested dispatch
222
- contract accepts `claude-agent-request` with `tool=claude-code-agent`, records
223
- `spawned` and `active` lifecycle states with a deterministic
224
- `claude-code-agent:<session>` label when no native child id is available, and
225
- remains idempotent across repeated dispatch attempts. Orchestra does not call
226
- Claude Code, Anthropic APIs, or another provider API.
222
+ contract accepts `claude-agent-request` with `tool=claude-code-agent`, but it
223
+ records `spawned` and `active` only when the active parent runtime is Claude and
224
+ the native callback capability is explicitly verified. Unsupported Codex, CI,
225
+ non-Claude, or callback-unavailable contexts return fallback guidance and do not
226
+ claim native execution. Orchestra does not call Claude Code, Anthropic APIs, or
227
+ another provider API.
227
228
 
228
229
  Runtime lifecycle watching is adapter-driven. Each inspected session reports a
229
230
  `watcher` object with adapter id, detection mode, support level, fallback
@@ -242,10 +243,12 @@ not proof that Orchestra can invoke Claude Code or Anthropic APIs by itself.
242
243
  The tested local behavior covers:
243
244
 
244
245
  - Dispatch support: eligible `claude-agent-request` actions for `claude-cli`
245
- with `tool=claude-code-agent` can be consumed by
246
+ with `tool=claude-code-agent` can be inspected by
246
247
  `runtime parent-actions --dispatch --runtime claude-cli`. The dispatch path
247
- records `spawned` and `active` lifecycle state with a stable child identifier
248
- or deterministic `claude-code-agent:<session>` fallback label.
248
+ records `spawned` and `active` lifecycle state only when the bridge verifies a
249
+ Claude parent runtime and callback capability. In local contract tests this is
250
+ simulated with explicit environment markers; in unsupported environments the
251
+ action is skipped with manual fallback guidance.
249
252
  - Alias policy: `claude-code-agent` is the only auto-dispatchable Claude tool
250
253
  name in the tested contract. `Task` is a legacy/manual alias and is skipped
251
254
  as `tool-mismatch`; accepting it in auto-dispatch requires new tests and
@@ -254,19 +257,26 @@ The tested local behavior covers:
254
257
  terminal, mismatched, or unavailable actions return structured eligibility
255
258
  metadata, fallback guidance, prompt artifact, expected result artifact, and
256
259
  manual lifecycle commands. Fallback never runs the phase in the parent agent
257
- silently and never switches to direct provider APIs.
260
+ silently, never records native Claude lifecycle events, and never switches to
261
+ direct provider APIs.
258
262
  - Guardrails: dispatch is bounded by runtime guardrails, runtime filters,
259
263
  session status, safety state, action kind, tool name, and stale-session
260
264
  checks. It preserves `directProviderApiAllowed=false` for runtime-native
261
265
  delegation artifacts.
262
- - Completion reconciliation: current tested support relies on explicit
263
- lifecycle events and bounded expected-artifact inspection. GH-434 tracks
264
- stricter validation of task id, phase, role, runtime, session id, and safe
265
- expected artifact path before a Claude session is marked complete.
266
- - Gate preservation: auto-dispatch must not approve or skip human gates. GH-435
267
- tracks the dedicated regression suite for safe workflow resume across
268
- `gates=none`, `gates=phase`, `gates=all`, multi-phase dispatch, and manual
269
- fallback recovery.
266
+ - Completion reconciliation: `runtime watch` validates the expected completion
267
+ metadata before marking a Claude session complete. The validation checks task
268
+ id, phase, role, runtime, session id, and the safe expected handoff path, and
269
+ it also requires the final handoff artifact to repeat those metadata fields.
270
+ It skips mismatches, missing artifact metadata, unsafe paths, and duplicate
271
+ completions with explicit reasons instead of treating any handoff file as
272
+ completion proof. Native immediate `completionResult` payloads use the same
273
+ validation path when present.
274
+ - Gate preservation: auto-dispatch must not approve or skip human gates.
275
+ `workflow run --resume` now holds unapproved gates until
276
+ `workflow gate-approve` records explicit approval, and runtime lifecycle
277
+ auto-resume records no gate approval events. The regression suite covers safe
278
+ non-gated resume, unapproved gate hold behavior, opt-out, queued/pending
279
+ messaging, and multi-pass parent action dispatch.
270
280
 
271
281
  Manual recovery for a skipped or unavailable Claude action:
272
282
 
@@ -54,10 +54,20 @@ orchestra review --task STORY-1 --role qa --result approve --findings "..." --re
54
54
  ```
55
55
 
56
56
  Developer-to-QA handoff should include touched files, commands, known gaps, and
57
- recommended Playwright, CLI, shell, or API coverage. `qa coverage` maps each
58
- acceptance criterion to `covered`, `planned`, `skipped`, or `gap` using task
59
- paths, project scripts, and existing evidence; release readiness surfaces
60
- unresolved QA automation gaps before promotion.
57
+ recommended Playwright, CLI, shell, API, integration, workflow, mobile, desktop,
58
+ data, or generated-artifact coverage. `qa coverage` maps each acceptance
59
+ criterion to `covered`, `weak`, `missing`, `deferred`, or `blocked`
60
+ using task paths, project scripts, and existing evidence; release readiness and
61
+ the `qa-release` gate surface unresolved QA automation gaps before promotion.
62
+
63
+ Generated artifacts are a first-class QA surface. When rules, skills, runtime
64
+ guidance, Markdown files, MDC files, or managed bootstrap blocks change,
65
+ evidence must assert generated paths, managed metadata, target-specific content,
66
+ refresh/drift behavior, user-content preservation, and absence of wrong-target
67
+ content. CLI evidence must assert exit code, stdout, stderr, generated
68
+ files/events, and final state. Integration evidence must include receiver-side
69
+ sandbox/mock/contract/webhook/event/log validation or an explicit deferred owner
70
+ and rationale.
61
71
 
62
72
  Evidence summaries should name the acceptance criterion they cover or say
63
73
  "covers all acceptance criteria" when a single artifact proves the full story.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jterrats/open-orchestra",
3
- "version": "1.0.10",
3
+ "version": "1.0.11",
4
4
  "type": "module",
5
5
  "workspaces": [
6
6
  "extensions/vscode-open-orchestra",
@@ -16,7 +16,7 @@
16
16
  "test": "npm run build && node --test test/**/*.js extensions/**/*.test.cjs",
17
17
  "test:coverage": "npm run build && c8 --reporter=lcov --reports-dir coverage --exclude \"test/**\" --exclude \"e2e/**\" --exclude \"extensions/**/test/**\" --exclude \"dist/assets/**\" --exclude \"dist/web-console/**\" node --test test/**/*.js extensions/**/*.test.cjs",
18
18
  "test:e2e": "npm run build && npm run site:build && playwright test",
19
- "test:e2e:init": "node --test e2e/init-onboarding.test.js",
19
+ "test:e2e:init": "node --test e2e/init-onboarding.test.js e2e/runtime-instruction-flow.test.js",
20
20
  "test:e2e:runtime": "node --test e2e/runtime-manual-queue.test.js",
21
21
  "test:e2e:runtime:ollama": "npm run build && node --test e2e/runtime-ollama-provider.test.js",
22
22
  "lint": "eslint . && prettier --check \"{bin,e2e,scripts,test,src}/**/*.js\" \"{site,web-console}/src/**/*.{css,js,jsx}\" \"{site,web-console}/*.{html,js,json}\" \"extensions/**/*.{cjs,json,md}\" \"src/**/*.ts\" \"*.{js,json}\"",