npm - @jterrats/open-orchestra - Versions diffs - 1.0.10 → 1.0.11 - Mend

@jterrats/open-orchestra 1.0.10 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/dist/automation-evidence.d.ts +1 -1
package/dist/automation-evidence.js +133 -11
package/dist/automation-evidence.js.map +1 -1
package/dist/command-manifest.js +3 -3
package/dist/command-manifest.js.map +1 -1
package/dist/phase-playbooks.js +2 -0
package/dist/phase-playbooks.js.map +1 -1
package/dist/qa-coverage-evidence.d.ts +3 -0
package/dist/qa-coverage-evidence.js +92 -0
package/dist/qa-coverage-evidence.js.map +1 -0
package/dist/qa-coverage-guidance.d.ts +6 -0
package/dist/qa-coverage-guidance.js +141 -0
package/dist/qa-coverage-guidance.js.map +1 -0
package/dist/qa-coverage-rules.d.ts +7 -0
package/dist/qa-coverage-rules.js +127 -0
package/dist/qa-coverage-rules.js.map +1 -0
package/dist/qa-coverage-types.d.ts +47 -0
package/dist/qa-coverage-types.js +2 -0
package/dist/qa-coverage-types.js.map +1 -0
package/dist/qa-coverage.d.ts +2 -20
package/dist/qa-coverage.js +42 -132
package/dist/qa-coverage.js.map +1 -1
package/dist/recoverable-failure-lessons.d.ts +2 -0
package/dist/recoverable-failure-lessons.js +55 -0
package/dist/recoverable-failure-lessons.js.map +1 -0
package/dist/release-readiness.js +3 -1
package/dist/release-readiness.js.map +1 -1
package/dist/roles/qa-ux-roles.js +5 -0
package/dist/roles/qa-ux-roles.js.map +1 -1
package/dist/runtime-adapters.js +1 -1
package/dist/runtime-adapters.js.map +1 -1
package/dist/runtime-completion-validation.d.ts +16 -0
package/dist/runtime-completion-validation.js +206 -0
package/dist/runtime-completion-validation.js.map +1 -0
package/dist/runtime-lifecycle-watch.js +43 -37
package/dist/runtime-lifecycle-watch.js.map +1 -1
package/dist/runtime-parent-action-dispatch.d.ts +2 -1
package/dist/runtime-parent-action-dispatch.js +94 -12
package/dist/runtime-parent-action-dispatch.js.map +1 -1
package/dist/runtime-spawn-bridge.js +6 -0
package/dist/runtime-spawn-bridge.js.map +1 -1
package/dist/skills-catalog.js +2 -0
package/dist/skills-catalog.js.map +1 -1
package/dist/task-graph-commands.js +3 -0
package/dist/task-graph-commands.js.map +1 -1
package/dist/types/runtime.d.ts +23 -0
package/dist/types/tasks.d.ts +3 -0
package/dist/types.d.ts +1 -1
package/dist/types.js.map +1 -1
package/dist/workflow-evidence-service.js +2 -0
package/dist/workflow-evidence-service.js.map +1 -1
package/dist/workflow-gates.js +6 -0
package/dist/workflow-gates.js.map +1 -1
package/dist/workflow-run-commands.js +87 -3
package/dist/workflow-run-commands.js.map +1 -1
package/dist/workflow-task-service.js +3 -0
package/dist/workflow-task-service.js.map +1 -1
package/docs/claude-adapter-qa-matrix.md +31 -19
package/docs/e2e-test-batteries.md +3 -3
package/docs/runtime-adapters.md +28 -18
package/docs/traceability-flow.md +14 -4
package/package.json +2 -2

package/docs/e2e-test-batteries.md CHANGED Viewed

@@ -29,7 +29,7 @@ entry points a user or CI runner actually executes.
 | Browser console           | Web console task, cost, provider, delegation, recovery, evidence, workflow, accessibility, artifacts | `npm run test:e2e`                                                  | visible state, API persistence, evidence attachment, lifecycle transitions, responsive/keyboard behavior                                               | Playwright report, screenshots/traces on failure                   |
 | Public site               | Documentation/site navigation, docs catalog, architecture viewer, mobile fit                         | `npm run test:e2e`                                                  | navigation order, local docs catalog search, no raw GitHub redirect for docs, mobile content fit                                                       | Playwright report                                                  |
 | Runtime manual queue      | Manual runtime delegation in a `/tmp` workspace                                                      | `npm run test:e2e:runtime`                                          | two active sessions, third manual `spawn-request` materializes `queued`, artifact includes lifecycle commands, `runtime sessions` lists queued session | stdout/stderr, JSON output, artifact content                       |
-| Init refresh environments | Simulated Codex, Claude, Cursor, generic workspaces                                                  | `node --test e2e/init-refresh-environments.test.js`                 | missing runtime guidance files regenerate on `init --force`, user content is preserved, managed blocks are updated only inside managed ranges          | filesystem diff assertions                                         |
+| Init refresh environments | Simulated generic, Codex, Claude, Cursor, VS Code/GitHub Copilot, and Windsurf workspaces             | `npm run test:e2e:init`                                             | missing runtime guidance files regenerate on `init --force`, user content is preserved, managed blocks/frontmatter are updated only inside managed ranges, target-specific metadata excludes wrong-target content, generated-artifact evidence maps back to acceptance criteria | filesystem diff assertions, QA coverage JSON                       |
 | Workflow lifecycle CLI    | CLI workflow run, gate, resume, QA failback, release readiness                                       | `node --test e2e/workflow-lifecycle-cli.test.js`                    | task phases create handoffs, blocked QA routes back, routine gate resumes immediately, release readiness maps acceptance to evidence                   | JSON output, events, handoffs                                      |
 ## P1 High-Risk Regression Batteries
@@ -76,8 +76,8 @@ the packaging/install path is wrong.
 1. Keep `e2e/runtime-manual-queue.test.js` release-blocking as runtime
    delegation evolves.
-2. Add `e2e/init-refresh-environments.test.js` for Codex, Claude, Cursor, and
-   generic project simulations.
+2. Keep `e2e/init-onboarding.test.js` covering Codex, Claude, Cursor, VS Code,
+   GitHub Copilot, Windsurf, and generic project simulations.
 3. Add `e2e/workflow-lifecycle-cli.test.js` for workflow run, gate, failback,
    resume, and release readiness.
 4. Add `e2e/runtime-multi-squad.test.js` for async background squad behavior.

package/docs/runtime-adapters.md CHANGED Viewed

@@ -204,7 +204,7 @@ have two supported paths:
   `runtime parent-actions --task <id> --dispatch --until-idle --runtime <runtime-id>`.
   The dispatcher repeatedly inspects pending parent actions, dispatches only
   safe actions for the active runtime, records spawned and active lifecycle
-  events with stable runtime child ids or deterministic fallback labels, applies
+  events with stable runtime child ids or verified callback correlation ids, applies
   `runtime watch` completions when expected handoff artifacts appear, resumes
   paused workflow runs, and continues across later phases until idle or timeout.
@@ -219,11 +219,12 @@ access. This keeps the boundary explicit: Orchestra emits auditable actions and
 lifecycle commands; the active parent runtime executes native tools such as
 Codex `spawn_agent`, and the dispatcher only consumes actions that are safe for
 the runtime declared on the command line. For Claude, the tested dispatch
-contract accepts `claude-agent-request` with `tool=claude-code-agent`, records
-`spawned` and `active` lifecycle states with a deterministic
-`claude-code-agent:<session>` label when no native child id is available, and
-remains idempotent across repeated dispatch attempts. Orchestra does not call
-Claude Code, Anthropic APIs, or another provider API.
+contract accepts `claude-agent-request` with `tool=claude-code-agent`, but it
+records `spawned` and `active` only when the active parent runtime is Claude and
+the native callback capability is explicitly verified. Unsupported Codex, CI,
+non-Claude, or callback-unavailable contexts return fallback guidance and do not
+claim native execution. Orchestra does not call Claude Code, Anthropic APIs, or
+another provider API.
 Runtime lifecycle watching is adapter-driven. Each inspected session reports a
 `watcher` object with adapter id, detection mode, support level, fallback
@@ -242,10 +243,12 @@ not proof that Orchestra can invoke Claude Code or Anthropic APIs by itself.
 The tested local behavior covers:
 - Dispatch support: eligible `claude-agent-request` actions for `claude-cli`
-  with `tool=claude-code-agent` can be consumed by
+  with `tool=claude-code-agent` can be inspected by
   `runtime parent-actions --dispatch --runtime claude-cli`. The dispatch path
-  records `spawned` and `active` lifecycle state with a stable child identifier
-  or deterministic `claude-code-agent:<session>` fallback label.
+  records `spawned` and `active` lifecycle state only when the bridge verifies a
+  Claude parent runtime and callback capability. In local contract tests this is
+  simulated with explicit environment markers; in unsupported environments the
+  action is skipped with manual fallback guidance.
 - Alias policy: `claude-code-agent` is the only auto-dispatchable Claude tool
   name in the tested contract. `Task` is a legacy/manual alias and is skipped
   as `tool-mismatch`; accepting it in auto-dispatch requires new tests and
@@ -254,19 +257,26 @@ The tested local behavior covers:
   terminal, mismatched, or unavailable actions return structured eligibility
   metadata, fallback guidance, prompt artifact, expected result artifact, and
   manual lifecycle commands. Fallback never runs the phase in the parent agent
-  silently and never switches to direct provider APIs.
+  silently, never records native Claude lifecycle events, and never switches to
+  direct provider APIs.
 - Guardrails: dispatch is bounded by runtime guardrails, runtime filters,
   session status, safety state, action kind, tool name, and stale-session
   checks. It preserves `directProviderApiAllowed=false` for runtime-native
   delegation artifacts.
-- Completion reconciliation: current tested support relies on explicit
-  lifecycle events and bounded expected-artifact inspection. GH-434 tracks
-  stricter validation of task id, phase, role, runtime, session id, and safe
-  expected artifact path before a Claude session is marked complete.
-- Gate preservation: auto-dispatch must not approve or skip human gates. GH-435
-  tracks the dedicated regression suite for safe workflow resume across
-  `gates=none`, `gates=phase`, `gates=all`, multi-phase dispatch, and manual
-  fallback recovery.
+- Completion reconciliation: `runtime watch` validates the expected completion
+  metadata before marking a Claude session complete. The validation checks task
+  id, phase, role, runtime, session id, and the safe expected handoff path, and
+  it also requires the final handoff artifact to repeat those metadata fields.
+  It skips mismatches, missing artifact metadata, unsafe paths, and duplicate
+  completions with explicit reasons instead of treating any handoff file as
+  completion proof. Native immediate `completionResult` payloads use the same
+  validation path when present.
+- Gate preservation: auto-dispatch must not approve or skip human gates.
+  `workflow run --resume` now holds unapproved gates until
+  `workflow gate-approve` records explicit approval, and runtime lifecycle
+  auto-resume records no gate approval events. The regression suite covers safe
+  non-gated resume, unapproved gate hold behavior, opt-out, queued/pending
+  messaging, and multi-pass parent action dispatch.
 Manual recovery for a skipped or unavailable Claude action:

package/docs/traceability-flow.md CHANGED Viewed

@@ -54,10 +54,20 @@ orchestra review --task STORY-1 --role qa --result approve --findings "..." --re
 ```
 Developer-to-QA handoff should include touched files, commands, known gaps, and
-recommended Playwright, CLI, shell, or API coverage. `qa coverage` maps each
-acceptance criterion to `covered`, `planned`, `skipped`, or `gap` using task
-paths, project scripts, and existing evidence; release readiness surfaces
-unresolved QA automation gaps before promotion.
+recommended Playwright, CLI, shell, API, integration, workflow, mobile, desktop,
+data, or generated-artifact coverage. `qa coverage` maps each acceptance
+criterion to `covered`, `weak`, `missing`, `deferred`, or `blocked`
+using task paths, project scripts, and existing evidence; release readiness and
+the `qa-release` gate surface unresolved QA automation gaps before promotion.
+Generated artifacts are a first-class QA surface. When rules, skills, runtime
+guidance, Markdown files, MDC files, or managed bootstrap blocks change,
+evidence must assert generated paths, managed metadata, target-specific content,
+refresh/drift behavior, user-content preservation, and absence of wrong-target
+content. CLI evidence must assert exit code, stdout, stderr, generated
+files/events, and final state. Integration evidence must include receiver-side
+sandbox/mock/contract/webhook/event/log validation or an explicit deferred owner
+and rationale.
 Evidence summaries should name the acceptance criterion they cover or say
 "covers all acceptance criteria" when a single artifact proves the full story.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@jterrats/open-orchestra",
-  "version": "1.0.10",
+  "version": "1.0.11",
   "type": "module",
   "workspaces": [
     "extensions/vscode-open-orchestra",
@@ -16,7 +16,7 @@
     "test": "npm run build && node --test test/**/*.js extensions/**/*.test.cjs",
     "test:coverage": "npm run build && c8 --reporter=lcov --reports-dir coverage --exclude \"test/**\" --exclude \"e2e/**\" --exclude \"extensions/**/test/**\" --exclude \"dist/assets/**\" --exclude \"dist/web-console/**\" node --test test/**/*.js extensions/**/*.test.cjs",
     "test:e2e": "npm run build && npm run site:build && playwright test",
-    "test:e2e:init": "node --test e2e/init-onboarding.test.js",
+    "test:e2e:init": "node --test e2e/init-onboarding.test.js e2e/runtime-instruction-flow.test.js",
     "test:e2e:runtime": "node --test e2e/runtime-manual-queue.test.js",
     "test:e2e:runtime:ollama": "npm run build && node --test e2e/runtime-ollama-provider.test.js",
     "lint": "eslint . && prettier --check \"{bin,e2e,scripts,test,src}/**/*.js\" \"{site,web-console}/src/**/*.{css,js,jsx}\" \"{site,web-console}/*.{html,js,json}\" \"extensions/**/*.{cjs,json,md}\" \"src/**/*.ts\" \"*.{js,json}\"",