npm - @jterrats/open-orchestra - Versions diffs - 1.0.10 → 1.0.12 - Mend

@jterrats/open-orchestra 1.0.10 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/CHANGELOG.md +33 -0
package/dist/automation-evidence.d.ts +1 -1
package/dist/automation-evidence.js +133 -11
package/dist/automation-evidence.js.map +1 -1
package/dist/command-manifest.js +4 -4
package/dist/command-manifest.js.map +1 -1
package/dist/phase-playbooks.js +2 -0
package/dist/phase-playbooks.js.map +1 -1
package/dist/qa-coverage-evidence.d.ts +3 -0
package/dist/qa-coverage-evidence.js +92 -0
package/dist/qa-coverage-evidence.js.map +1 -0
package/dist/qa-coverage-guidance.d.ts +6 -0
package/dist/qa-coverage-guidance.js +141 -0
package/dist/qa-coverage-guidance.js.map +1 -0
package/dist/qa-coverage-rules.d.ts +7 -0
package/dist/qa-coverage-rules.js +127 -0
package/dist/qa-coverage-rules.js.map +1 -0
package/dist/qa-coverage-types.d.ts +47 -0
package/dist/qa-coverage-types.js +2 -0
package/dist/qa-coverage-types.js.map +1 -0
package/dist/qa-coverage.d.ts +2 -20
package/dist/qa-coverage.js +42 -132
package/dist/qa-coverage.js.map +1 -1
package/dist/recoverable-failure-lessons.d.ts +2 -0
package/dist/recoverable-failure-lessons.js +55 -0
package/dist/recoverable-failure-lessons.js.map +1 -0
package/dist/release-readiness.js +3 -1
package/dist/release-readiness.js.map +1 -1
package/dist/roles/qa-ux-roles.js +5 -0
package/dist/roles/qa-ux-roles.js.map +1 -1
package/dist/runtime-adapters.js +1 -1
package/dist/runtime-adapters.js.map +1 -1
package/dist/runtime-completion-validation.d.ts +16 -0
package/dist/runtime-completion-validation.js +206 -0
package/dist/runtime-completion-validation.js.map +1 -0
package/dist/runtime-execution.js +3 -0
package/dist/runtime-execution.js.map +1 -1
package/dist/runtime-lifecycle-watch.js +43 -37
package/dist/runtime-lifecycle-watch.js.map +1 -1
package/dist/runtime-parent-action-dispatch.d.ts +2 -1
package/dist/runtime-parent-action-dispatch.js +94 -12
package/dist/runtime-parent-action-dispatch.js.map +1 -1
package/dist/runtime-spawn-bridge.js +6 -0
package/dist/runtime-spawn-bridge.js.map +1 -1
package/dist/skills-catalog.js +2 -0
package/dist/skills-catalog.js.map +1 -1
package/dist/task-graph-commands.js +21 -14
package/dist/task-graph-commands.js.map +1 -1
package/dist/types/runtime.d.ts +23 -0
package/dist/types/tasks.d.ts +3 -0
package/dist/types.d.ts +1 -1
package/dist/types.js.map +1 -1
package/dist/workflow-evidence-service.js +2 -0
package/dist/workflow-evidence-service.js.map +1 -1
package/dist/workflow-gates.js +6 -0
package/dist/workflow-gates.js.map +1 -1
package/dist/workflow-run-commands.js +104 -12
package/dist/workflow-run-commands.js.map +1 -1
package/dist/workflow-task-service.js +30 -1
package/dist/workflow-task-service.js.map +1 -1
package/docs/audio-video-transcription-skill.md +441 -45
package/docs/autonomous-workflow.md +38 -0
package/docs/backlog/web-code-editor-lsp-spike.md +289 -0
package/docs/claude-adapter-qa-matrix.md +31 -19
package/docs/context-vault.md +80 -0
package/docs/e2e-test-batteries.md +3 -3
package/docs/runtime-adapters.md +28 -18
package/docs/site-manifest.json +1 -0
package/docs/traceability-flow.md +14 -4
package/package.json +2 -2

package/docs/backlog/web-code-editor-lsp-spike.md ADDED Viewed

@@ -0,0 +1,289 @@
+# Spike: Orchestra-Aware Web Code Editor With LSP
+Backlog Item ID: EPIC-ORCHESTRA-AWARE-CODE-EDITOR
+GitHub Issue: GH-447
+Lead role: Architect
+Supporting roles: UX/UI, Developer, Security, QA
+Status: proposed
+## Goal
+Define the MVP architecture for an embedded web code editor that lets users
+inspect workspace-local files, use standard language intelligence, and perform
+governed edits without losing Orchestra task traceability, policy gates, QA
+evidence, or workspace isolation.
+This spike is architecture and planning only. It does not implement source
+changes.
+## Product Position
+The editor should not compete with VS Code as a full IDE. Its differentiator is
+workflow-aware editing: active task, backlog item, acceptance criteria, role
+contracts, context-pack suggestions, visible policy gates, task-scoped diffs,
+file timeline, rollback guidance, and quality warnings such as god-file risk,
+missing tests, stale generated docs, and release gate blockers.
+## Editor Choice
+Use CodeMirror 6 for the MVP, with Monaco kept as a later option if native VS
+Code-language parity becomes more valuable than bundle control and UI
+composition.
+CodeMirror 6 benefits: smaller, modular, easier to compose inside the existing
+React/Vite console, more controllable for mobile and constrained panels, and
+well suited to incremental viewer, diagnostics, diff preview, and governed-save
+slices. Costs: LSP support is less turnkey than Monaco and users expecting VS
+Code behavior may notice missing commands.
+Monaco benefits: closer to VS Code expectations with mature TypeScript and
+JavaScript language behavior, diagnostics, hover, completion, symbols, and
+go-to-definition. Costs: heavier runtime, worker/CSP complexity, harder mobile
+composition, and a stronger pull toward an IDE-like footprint.
+Adopt CodeMirror 6 for the first implementation slices. Revisit Monaco after
+the read-only viewer, diff integration, and governed write contract are proven.
+The LSP bridge contract should be editor-neutral so Monaco can be introduced
+later without changing policy, workspace, or evidence boundaries.
+## Architecture Boundary
+```mermaid
+flowchart LR
+  ui["Web console editor shell"]
+  context["Orchestra context sidebar"]
+  api["Editor API contract"]
+  policy["Policy gate service"]
+  workspace["Workspace file service"]
+  git["Git diff and timeline service"]
+  lsp["LSP bridge supervisor"]
+  server["Language server process"]
+  evidence["Evidence and review records"]
+  ui --> api
+  context --> api
+  api --> policy
+  api --> workspace
+  api --> git
+  api --> lsp
+  lsp --> server
+  policy --> evidence
+  workspace --> evidence
+  git --> evidence
+```
+The web console must remain a client of stable JSON contracts. The browser does
+not read arbitrary files directly, spawn processes, or decide write policy.
+## LSP Bridge Contract
+The LSP bridge is a local or SaaS-side service boundary that mediates language
+server lifecycle, document sync, diagnostics, and language requests.
+Required responsibilities:
+- accept only workspace-relative paths that resolve inside the active workspace;
+- require task id and editor session id for every opened document;
+- start language servers through array-based process APIs, never shell
+  interpolation;
+- keep one supervised language-server pool per workspace, language, and tenant
+  boundary;
+- enforce startup timeout, idle shutdown, max memory, max files, max document
+  size, and bounded request concurrency;
+- redact or suppress diagnostics that expose internal absolute paths or secret
+  values;
+- degrade to syntax-only editing when a language server is unavailable;
+- emit lifecycle and diagnostic summaries that can be attached as evidence.
+Initial target: TypeScript and JavaScript through the project-local TypeScript
+server or a pinned LSP-compatible wrapper. Extension targets are Python
+through pyright, Java through jdtls, .NET through OmniSharp or Roslyn LSP, Apex
+through a Salesforce language server if licensing and setup allow it, and custom
+domain servers through future extension manifests.
+LSP messages must never be trusted as policy input. They can inform diagnostics
+and navigation, but write permission remains owned by the policy gate service.
+## Workspace Isolation
+All file operations must be scoped to an active workspace root resolved by the
+server.
+Local mode:
+- bind local services to `127.0.0.1`;
+- reject absolute paths, traversal, symlinks that escape the workspace, and
+  generated or secret-sensitive paths unless explicitly allowlisted;
+- use existing task and evidence state under `.agent-workflow/` as the durable
+  traceability source;
+- keep language servers local to the workspace and terminate them when the
+  workspace session closes.
+SaaS mode:
+- isolate tenants by account, workspace, runtime sandbox, storage prefix, and
+  language-server process boundary;
+- enforce data residency before source text or diagnostics cross regions;
+- avoid shared language-server processes across tenants;
+- store only policy-approved evidence summaries unless the user explicitly
+  attaches file snippets or diffs;
+- require audit records for open, preview, save request, policy decision, write,
+  rollback, and evidence attachment events.
+## Governed Edit Contract
+MVP starts read-only. Edit mode is a deliberate transition with visible task and
+policy state.
+Open file request fields: `taskId`, `workspaceId`, `relativePath`, `mode`
+(`readOnly` or `editIntent`), and optional `selection`, `contextPackId`, and
+`role`.
+Save request fields: `taskId`, `workspaceId`, `relativePath`, `baseRevision`,
+`proposedContentHash`, `patch`, `userIntent`, and `policyAcknowledgements`.
+Save response fields: `decision` (`allowed`, `blocked`, or `needsReview`),
+`diffSummary`, `policyFindings`, `evidenceId`, `rollbackHint`, and
+`nextActions`.
+Policy checks before write:
+- task exists, has backlog item, and is active or explicitly selected;
+- path is inside owned write scope for the task;
+- file is not locked by another role or workflow;
+- file is not a release bump file unless the task permits release ownership;
+- generated files require source-of-truth confirmation;
+- security-sensitive paths require Security review;
+- save is based on current file revision or requires conflict resolution;
+- diff preview is acknowledged before write.
+Every write must create or update a task-scoped diff record and evidence hook.
+The UI should offer rollback guidance based on Git state, but it should not
+perform destructive Git operations without a separate approval flow.
+## UX Flow
+Primary user: a human operator reviewing or making a governed task-scoped edit
+from the local or SaaS web console.
+MVP flow: user selects an active task; the console shows acceptance criteria,
+owned paths, gates, related files, and evidence gaps; the user opens a related
+file in read-only mode; the editor shows syntax highlighting, diagnostics when
+available, and a task context sidebar; the user requests edit mode; the console
+shows write policy status, locks, and required reviewers; the user edits,
+previews a diff, submits a save request, receives an allowed, blocked, or
+needs-review decision, and gets an evidence link plus next steps.
+Responsive behavior:
+- mobile defaults to task context, file list, and read-only code view with
+  collapsible diagnostics;
+- tablet uses stacked editor and context panels;
+- desktop uses three regions: file/navigation rail, editor/diff, and Orchestra
+  context sidebar;
+- no critical action should depend on horizontal scrolling;
+- keyboard users can open files, search, inspect diagnostics, preview diff,
+  request edit mode, and submit or cancel saves.
+Required states:
+- loading file and loading diagnostics;
+- empty related files;
+- language server unavailable with syntax-only fallback;
+- policy blocked with clear next action;
+- conflict detected with reload or compare options;
+- save succeeded with evidence link;
+- save failed without exposing stack traces or internal paths.
+## Security Constraints
+Security review is mandatory before implementation because this feature touches
+file paths, process execution, workspace source code, secrets, network calls,
+and future multi-tenant boundaries.
+Non-negotiables:
+- no shell interpolation for language server startup or Git operations;
+- validate and normalize every path server-side;
+- never expose stack traces, host paths, environment variables, or raw process
+  errors to the browser;
+- scan proposed diffs for configured secret patterns before write;
+- treat workspace files and LSP responses as untrusted input;
+- sanitize markdown, diagnostics, and hover content before rendering;
+- apply content security policy before enabling Monaco workers or remote
+  extension assets;
+- fail closed when policy, path validation, revision checks, secret scan, or
+  audit write fails;
+- keep local ports bound to `127.0.0.1` by default;
+- require explicit tenant and data residency controls before SaaS rollout.
+## QA Evidence Strategy
+The spike output is validated by review evidence. Implementation stories need
+observable acceptance evidence.
+Recommended automated coverage:
+- unit tests for path normalization, task write-scope policy, save contract,
+  revision conflict handling, and LSP lifecycle state transitions;
+- contract tests for editor API request and response shapes;
+- integration tests with fake language-server processes for diagnostics,
+  timeout, crash, unavailable server, and idle shutdown;
+- Playwright tests for read-only viewer, responsive layout, keyboard flow,
+  policy-blocked save, conflict recovery, and successful evidence link;
+- security tests for traversal, symlink escape, secret diff rejection, unsafe
+  diagnostic rendering, and blocked process arguments.
+Evidence required per implementation story:
+- exact commands and pass/fail results;
+- AC-to-evidence matrix;
+- screenshots or traces for desktop and mobile editor flows;
+- sample diff/evidence artifact;
+- security review result for file/process/network changes;
+- documented deferred language support when a language server is unavailable.
+## Implementation Slices
+1. Read-only code viewer and file open contract: CodeMirror 6 surface, syntax
+   highlighting, loading/empty/error states, path validation, task association,
+   and Playwright smoke.
+2. Task context sidebar: active task, backlog item, acceptance criteria, roles,
+   owned paths, context-pack references, evidence gaps, and gate warnings.
+3. Git diff and task timeline integration: revision metadata, diff preview,
+   task-scoped file timeline, conflict detection, and evidence hook.
+4. TypeScript/JavaScript LSP bridge MVP: supervised lifecycle, diagnostics,
+   hover, completion, go-to-definition, fallback states, and fake LSP tests.
+5. Governed edit mode: edit intent, patch submission, policy checks, diff
+   acknowledgement, save response, blocked UX, and needs-review UX.
+6. Security hardening: traversal and symlink protection, secret scanning,
+   process allowlist, diagnostic sanitization, CSP review, and local binding
+   checks.
+7. SaaS isolation design: tenant sandboxing, process isolation, storage and data
+   residency policy, audit logs, quota controls, and abuse controls.
+8. Language extension framework: server capability manifest, health status,
+   per-language setup guidance, and extension points.
+## Open Risks
+- LSP servers execute project-aware code paths and can be expensive or unsafe if
+  not tightly supervised.
+- Monaco may be required later if CodeMirror extension quality does not meet
+  user expectations for TypeScript-heavy projects.
+- SaaS editing has materially higher tenant isolation, data residency, and audit
+  requirements than local mode.
+- Secret detection can produce false negatives and should not be the only
+  control before writing.
+- Generated-file editing can violate source-of-truth contracts unless policy
+  gates are strict.
+## Recommended Next Stories
+- GH-447-A: Build read-only CodeMirror file viewer with task-scoped open
+  contract.
+- GH-447-B: Add Orchestra task context sidebar for editor sessions.
+- GH-447-C: Add Git diff preview and task-scoped file timeline.
+- GH-447-D: Implement TypeScript/JavaScript LSP bridge MVP with fake LSP tests.
+- GH-447-E: Implement governed edit mode with policy-gated saves.
+- GH-447-F: Add editor security hardening and negative test matrix.
+- GH-447-G: Define SaaS tenant isolation and data residency ADR.

package/docs/claude-adapter-qa-matrix.md CHANGED Viewed

@@ -10,9 +10,10 @@ claim real Claude Code native execution or Anthropic/provider API execution.
 | --- | --- | --- | --- |
 | #432 / `GH-432-CLAUDE-ADAPTER-CONTRACT` | Claude action eligibility, skip reasons, alias policy, non-regression docs | QA handoff, release handoff, `npm run build`, `node --test test/runtime-adapters.test.js` with 51 passing tests, `git diff --check` | Pass |
 | #433 / `GH-433-CLAUDE-DISPATCH-BRIDGE` | Dispatch bridge boundary, spawned/active lifecycle recording, idempotency, fallback guidance | QA handoff, release handoff, `npm run build`, `node --test test/runtime-adapters.test.js` with 54 passing tests, `git diff --check` | Pass |
-| #434 / `GH-434-CLAUDE-COMPLETION-RECONCILIATION` | Strict completion validation by task, phase, role, runtime, session, and expected artifact | Issue exists and remains open; no local QA handoff found for this slice | Pending |
-| #435 / `GH-435-CLAUDE-GATE-PRESERVATION` | Safe workflow resume and human gate preservation regression coverage | Issue exists and remains open; no local QA handoff found for this slice | Deferred |
-| #436 / `GH-436-CLAUDE-DOCS-QA-EVIDENCE` | Documentation, QA matrix, release evidence, support-level framing | This document, `docs/runtime-adapters.md`, GH-436 QA handoff | Pending |
+| #434 / `GH-434-CLAUDE-COMPLETION-RECONCILIATION` | Strict completion validation by task, phase, role, runtime, session, and expected artifact | `runtime watch` validation tests in `test/runtime-adapters.test.js`; `npm run build`; `node --test test/runtime-adapters.test.js` with 58 passing tests | Pass |
+| #435 / `GH-435-CLAUDE-GATE-PRESERVATION` | Safe workflow resume and human gate preservation regression coverage | `npm run build`; `node --test test/autonomous-workflow-cli.test.js`; `node --test test/runtime-adapters.test.js` with 59 passing tests | Pass |
+| #436 / `GH-436-CLAUDE-DOCS-QA-EVIDENCE` | Documentation, QA matrix, release evidence, support-level framing | This document, `docs/runtime-adapters.md`, GH-436 QA/release handoffs, and follow-up #434/#435 evidence updates | Pass |
+| #439 / `GH-439-CLAUDE-NATIVE-CALLBACK-BRIDGE` | Native callback bridge contract, fallback truthfulness, lifecycle validation | Local contract tests in `test/runtime-adapters.test.js`; `docs/runtime-adapters.md`; this matrix | In progress |
 ## Acceptance Criteria Matrix
@@ -28,21 +29,26 @@ claim real Claude Code native execution or Anthropic/provider API execution.
 | #433 | Repeated dispatch is idempotent and never creates duplicate lifecycle events for the same session. | CLI unit | GH-433 QA handoff | QA reports repeated dispatch keeps one spawned and one active event. | Pass |
 | #433 | Unavailable or unsupported native tool paths return explicit fallback guidance and manual lifecycle commands. | CLI unit/code review | GH-433 QA handoff | QA reports skipped result includes prompt artifact, expected result artifact, and manual spawned command. | Pass |
 | #433 | Tests cover successful dispatch, unavailable tool fallback, repeated dispatch idempotency, runtime mismatch, and guardrail rejection. | Automated tests | GH-433 QA handoff; `node --test test/runtime-adapters.test.js` with 54 passing tests | Required scenarios mapped to deterministic tests. | Pass |
-| #434 | Completion validation checks task id, phase, role, runtime, session id, and expected result artifact path. | Planned unit/watch tests | GitHub issue #434 | No local implementation or QA evidence reviewed in this slice. | Pending |
-| #434 | Wrong-task, wrong-role, wrong-runtime, wrong-session, missing, duplicate, and unsafe-path artifacts are rejected or skipped with explicit reasons. | Planned unit/watch tests | GitHub issue #434 | No local implementation or QA evidence reviewed in this slice. | Pending |
-| #434 | `runtime watch` records completed exactly once for a valid spawned or active Claude session. | Planned watch tests | GitHub issue #434 | No local implementation or QA evidence reviewed in this slice. | Pending |
-| #434 | Native immediate completion results follow the same validation rules when supported. | Planned contract tests | GitHub issue #434 | Native immediate completion is not claimed as supported by current evidence. | Pending |
-| #434 | Tests cover artifact validation, duplicate completion prevention, timeout/stale behavior, and safe path handling. | Planned automated tests | GitHub issue #434 | No local implementation or QA evidence reviewed in this slice. | Pending |
-| #435 | Verified completion resumes the paused run to the next safe phase when no human gate is pending. | Planned workflow tests | GitHub issue #435 | No local implementation or QA evidence reviewed in this slice. | Deferred |
-| #435 | `po-to-architect`, `qa-to-release`, and configured human gates remain paused until explicit approval. | Planned workflow tests/manual review | GitHub issue #435 | Dedicated regression evidence is still required before release claim. | Deferred |
-| #435 | Auto-dispatch never records gate approval or skips a gate. | Planned workflow tests | GitHub issue #435 | Dedicated regression evidence is still required before release claim. | Deferred |
-| #435 | Tests cover `gates=none`, `gates=phase`, `gates=all`, multi-phase dispatch until idle, manual fallback recovery, and GH-421 spawn-state messaging. | Planned CLI/workflow tests | GitHub issue #435 | No local implementation or QA evidence reviewed in this slice. | Deferred |
-| #435 | Existing Codex, Cursor, generic, VS Code, Windsurf, and OpenCode behavior is unchanged or covered by regression tests. | Planned regression tests | GitHub issue #435 | Broad cross-runtime regression evidence is still required. | Deferred |
+| #434 | Completion validation checks task id, phase, role, runtime, session id, and expected result artifact path. | Watch/contract tests | `test/runtime-adapters.test.js`; `src/runtime-completion-validation.ts` | `runtime watch` validates completion against structured spawn/session metadata before recording completed. | Pass |
+| #434 | Wrong-task, wrong-role, wrong-runtime, wrong-session, missing, duplicate, and unsafe-path artifacts are rejected or skipped with explicit reasons. | Watch/negative tests | `runtime watch rejects Claude completion metadata mismatches`; existing unsafe/missing/duplicate watch coverage | Mismatches are skipped with explicit reasons; unsafe paths do not create completed lifecycle events. | Pass |
+| #434 | `runtime watch` records completed exactly once for a valid spawned or active Claude session. | Watch test | `runtime watch completes a valid Claude session once` | Two watch passes produce one completed lifecycle event for the Claude session. | Pass |
+| #434 | Native immediate completion results follow the same validation rules when supported. | Watch/contract test | `runtime watch validates Claude native immediate completion results`; `src/runtime-completion-validation.ts` | Immediate `completionResult` payloads resolve through the same expected-artifact and artifact-metadata validator. | Pass |
+| #434 | Tests cover artifact validation, duplicate completion prevention, timeout/stale behavior, and safe path handling. | Automated tests | `node --test test/runtime-adapters.test.js` with 58 passing tests | Focused runtime adapter suite covers artifact metadata validation, duplicate ignored reasons, immediate completion payloads, and existing timeout/stale/unsafe-path behavior. | Pass |
+| #435 | Verified completion resumes the paused run to the next safe phase when no human gate is pending. | Runtime lifecycle tests | `runtime watch completes spawned sessions once and auto-resumes workflow`; `runtime lifecycle completion can opt out of auto-resume` | Runtime completion resumes safe non-gated work and keeps opt-out behavior. | Pass |
+| #435 | `po-to-architect`, `qa-to-release`, and configured human gates remain paused until explicit approval. | Workflow CLI tests | `workflow resume holds human gates until explicit approval` | Resume now holds unapproved gates and advances only after `workflow gate-approve`. | Pass |
+| #435 | Auto-dispatch never records gate approval or skips a gate. | Runtime lifecycle tests | `runtime lifecycle auto-resume does not approve human gates` | Runtime lifecycle auto-resume leaves paused work unapproved and emits no `AUTONOMOUS_GATE_APPROVED` event. | Pass |
+| #435 | Tests cover `gates=none`, `gates=phase`, `gates=all`, multi-phase dispatch until idle, manual fallback recovery, and GH-421 spawn-state messaging. | CLI/runtime regression tests | `test/autonomous-workflow-cli.test.js`; `test/runtime-adapters.test.js` | Existing runtime suite covers multi-pass dispatch, queued/pending messaging, manual recovery guidance, and gate modes; #435 adds stricter unapproved-gate hold coverage. | Pass |
+| #435 | Existing Codex, Cursor, generic, VS Code, Windsurf, and OpenCode behavior is unchanged or covered by regression tests. | Runtime regression tests | `test/runtime-adapters.test.js` | Runtime adapter catalog and cross-runtime parent action tests still pass. | Pass |
 | #436 | Runtime adapter docs document Claude dispatch support, alias policy, fallback behavior, manual recovery commands, guardrails, and gate preservation. | Documentation review | `docs/runtime-adapters.md` | Updated in this slice. | Pass |
 | #436 | QA matrix maps each GH-422 child story acceptance criterion to unit, workflow, CLI, or manual evidence. | Documentation | This file | Matrix records Pass/Pending/Deferred by criterion and evidence type. | Pass |
-| #436 | Release evidence includes exact commands, pass/fail results, unsupported CI/manual verification notes, and unresolved risks. | QA handoff/evidence | GH-436 handoff under `.agent-workflow/handoffs/` | Handoff records commands and recommended validations. | Pending |
+| #436 | Release evidence includes exact commands, pass/fail results, unsupported CI/manual verification notes, and unresolved risks. | QA handoff/evidence | GH-436 handoff under `.agent-workflow/handoffs/`; #434/#435 QA evidence and release-readiness checks | Handoffs and evidence record exact commands, pass/fail results, known unsupported Claude callback/provider claims, and residual real-transport risk. | Pass |
 | #436 | Documentation does not claim native Claude execution beyond tested behavior. | Documentation review | `docs/runtime-adapters.md`; this file | Docs frame support as parent-runtime contract plus manual/runtime-owned launch. | Pass |
-| #436 | Product/release review records go/no-go based on evidence and known limitations. | Review artifact | Pending release review for GH-436 | Needs release/product review after documentation QA. | Pending |
+| #436 | Product/release review records go/no-go based on evidence and known limitations. | Review artifact | `.agent-workflow/reviews/GH-436-CLAUDE-DOCS-QA-EVIDENCE-release_manager-review.md`; #435 release-readiness gate | Release review records go with known limitations; #435 follow-up gate preservation now passes. | Pass |
+| #439 | Claude parent-runtime adapter can hand off only when a supported Claude parent runtime and native callback capability are explicitly verified. | Unit/contract | `runtime parent-actions dispatches eligible Claude requests with stable lifecycle`; `runtime parent-actions returns truthful Claude native fallback outside Claude parent runtime` | Local tests simulate the verified callback contract and verify unsupported parent runtimes skip without lifecycle writes. | In progress |
+| #439 | Adapter captures a native child identifier or verified callback result and records spawned and active without manual lifecycle commands. | Unit/contract | `test/runtime-adapters.test.js` Claude native dispatch test | Simulated verified bridge records one spawned event and one active heartbeat with the supplied native child id. | In progress |
+| #439 | Completion reconciliation validates task id, phase, role, runtime, session id, and expected artifact before completion. | Watch/contract tests | Existing #434 watch tests in `test/runtime-adapters.test.js` | Completion still flows through the expected handoff metadata and safe path validators. | Pass |
+| #439 | Unsupported environments return fallback guidance without claiming native execution. | Unit/contract/docs | `runtime parent-actions returns truthful Claude native fallback outside Claude parent runtime`; `docs/runtime-adapters.md` | Codex/non-Claude context returns skipped fallback guidance and records no spawned lifecycle event. | In progress |
+| #439 | Documentation separates tested local contract, real Claude runtime proof, and unsupported CI/non-Claude contexts. | Documentation | `docs/runtime-adapters.md`; this matrix | Docs label local simulation and unsupported contexts; real Claude runtime proof remains manual QA. | In progress |
 ## Unsupported Or Deferred Claims
@@ -50,7 +56,13 @@ claim real Claude Code native execution or Anthropic/provider API execution.
   Agent/Subagent tools from CI or from a non-Claude parent runtime.
 - No evidence proves direct Anthropic or provider API execution for runtime
   delegation; runtime-native artifacts keep `directProviderApiAllowed=false`.
-- #434 completion reconciliation hardening remains pending.
-- #435 workflow resume and human gate preservation regression coverage remains
-  deferred until its implementation and QA pass.
+- Native immediate Claude completion is covered when represented as a
+  `completionResult` payload; real callback transport remains future adapter
+  work.
+- #435 workflow resume and human gate preservation regression coverage now
+  passes locally; real Claude Code callback transport remains outside this
+  local contract matrix.
+- #439 adds a truthful native callback bridge contract: non-Claude and
+  callback-unavailable environments skip without lifecycle writes. Real Claude
+  Code Agent/Subagent proof still requires manual QA from a Claude parent
+  runtime that exposes the native callback capability.

package/docs/context-vault.md ADDED Viewed

@@ -0,0 +1,80 @@
+# Context Vault
+Context vault is the planned workspace catalog for source materials that inform
+agent work but should not be pasted directly into every prompt. It covers
+documents, statements of work, PDFs, diagrams, images, audio/video recordings,
+transcripts, and client reference artifacts.
+The vault is related to context indexing and transcription, but it has a
+different job: it records provenance, sensitivity, retention, ownership, and
+safe consumption rules for project inputs.
+## Goals
+- Register source artifacts with stable metadata and checksums.
+- Classify sensitivity before an artifact is used by an agent.
+- Convert large or binary inputs into bounded summaries, excerpts, transcripts,
+  or context packs.
+- Preserve provenance so BA, PO, Architect, QA, and Release can cite which
+  artifact informed a requirement, risk, decision, or validation result.
+- Keep raw sensitive inputs out of runtime prompts unless policy explicitly
+  allows a bounded excerpt.
+## Artifact Metadata
+Every vault artifact should record:
+- artifact id and workspace or tenant id;
+- original source, file name, media type, size, checksum, and registered time;
+- owner role or user;
+- sensitivity classification and retention policy;
+- ingestion status, conversion status, redaction status, and error state;
+- derived outputs such as Markdown conversion, transcript, summary, embeddings,
+  or context-pack references.
+## Consumption Model
+Agents should not load raw vault artifacts by default. They should request a
+bounded context pack or artifact summary scoped to the active task, role, phase,
+and token budget.
+The pack should include:
+- artifact references and provenance links;
+- selected excerpts with inclusion reasons;
+- redaction decisions and omitted-sensitive-data notes;
+- budget summary and truncation indicators;
+- stale or failed-ingestion warnings.
+## Security And Privacy
+Vault ingestion must be tenant-aware and fail closed when classification or
+redaction is uncertain. Secrets, credentials, health data, payment data, and
+other regulated content require explicit policy handling before they can be
+summarized or exposed to a runtime.
+External conversion or transcription providers are opt-in. Local conversion and
+local transcription should be preferred when a workspace or tenant requires
+offline handling.
+## API Shape
+The implementation should define APIs before storage details:
+- `vault artifact add` or API equivalent for registering files and metadata;
+- `vault artifact list` with pagination, filtering, status, type, and
+  sensitivity filters;
+- `vault artifact show` for metadata and derived-output status;
+- `vault ingest` for conversion, transcription, and redaction jobs;
+- `context pack build` integration that can cite vault artifacts without
+  reading raw files broadly.
+The web console can later expose this as a catalog view, but it should consume
+the same domain services as the CLI/API.
+## Related Work
+- GitHub issue `#449`: context vault epic.
+- GitHub issue `#367`: audio and video transcription evidence skill.
+- GitHub issues `#423` through `#427`: context index, search, bounded context
+  packs, redaction, and runtime prompt integration.

package/docs/e2e-test-batteries.md CHANGED Viewed

@@ -29,7 +29,7 @@ entry points a user or CI runner actually executes.
 | Browser console           | Web console task, cost, provider, delegation, recovery, evidence, workflow, accessibility, artifacts | `npm run test:e2e`                                                  | visible state, API persistence, evidence attachment, lifecycle transitions, responsive/keyboard behavior                                               | Playwright report, screenshots/traces on failure                   |
 | Public site               | Documentation/site navigation, docs catalog, architecture viewer, mobile fit                         | `npm run test:e2e`                                                  | navigation order, local docs catalog search, no raw GitHub redirect for docs, mobile content fit                                                       | Playwright report                                                  |
 | Runtime manual queue      | Manual runtime delegation in a `/tmp` workspace                                                      | `npm run test:e2e:runtime`                                          | two active sessions, third manual `spawn-request` materializes `queued`, artifact includes lifecycle commands, `runtime sessions` lists queued session | stdout/stderr, JSON output, artifact content                       |
-| Init refresh environments | Simulated Codex, Claude, Cursor, generic workspaces                                                  | `node --test e2e/init-refresh-environments.test.js`                 | missing runtime guidance files regenerate on `init --force`, user content is preserved, managed blocks are updated only inside managed ranges          | filesystem diff assertions                                         |
+| Init refresh environments | Simulated generic, Codex, Claude, Cursor, VS Code/GitHub Copilot, and Windsurf workspaces             | `npm run test:e2e:init`                                             | missing runtime guidance files regenerate on `init --force`, user content is preserved, managed blocks/frontmatter are updated only inside managed ranges, target-specific metadata excludes wrong-target content, generated-artifact evidence maps back to acceptance criteria | filesystem diff assertions, QA coverage JSON                       |
 | Workflow lifecycle CLI    | CLI workflow run, gate, resume, QA failback, release readiness                                       | `node --test e2e/workflow-lifecycle-cli.test.js`                    | task phases create handoffs, blocked QA routes back, routine gate resumes immediately, release readiness maps acceptance to evidence                   | JSON output, events, handoffs                                      |
 ## P1 High-Risk Regression Batteries
@@ -76,8 +76,8 @@ the packaging/install path is wrong.
 1. Keep `e2e/runtime-manual-queue.test.js` release-blocking as runtime
    delegation evolves.
-2. Add `e2e/init-refresh-environments.test.js` for Codex, Claude, Cursor, and
-   generic project simulations.
+2. Keep `e2e/init-onboarding.test.js` covering Codex, Claude, Cursor, VS Code,
+   GitHub Copilot, Windsurf, and generic project simulations.
 3. Add `e2e/workflow-lifecycle-cli.test.js` for workflow run, gate, failback,
    resume, and release readiness.
 4. Add `e2e/runtime-multi-squad.test.js` for async background squad behavior.

package/docs/runtime-adapters.md CHANGED Viewed

@@ -204,7 +204,7 @@ have two supported paths:
   `runtime parent-actions --task <id> --dispatch --until-idle --runtime <runtime-id>`.
   The dispatcher repeatedly inspects pending parent actions, dispatches only
   safe actions for the active runtime, records spawned and active lifecycle
-  events with stable runtime child ids or deterministic fallback labels, applies
+  events with stable runtime child ids or verified callback correlation ids, applies
   `runtime watch` completions when expected handoff artifacts appear, resumes
   paused workflow runs, and continues across later phases until idle or timeout.
@@ -219,11 +219,12 @@ access. This keeps the boundary explicit: Orchestra emits auditable actions and
 lifecycle commands; the active parent runtime executes native tools such as
 Codex `spawn_agent`, and the dispatcher only consumes actions that are safe for
 the runtime declared on the command line. For Claude, the tested dispatch
-contract accepts `claude-agent-request` with `tool=claude-code-agent`, records
-`spawned` and `active` lifecycle states with a deterministic
-`claude-code-agent:<session>` label when no native child id is available, and
-remains idempotent across repeated dispatch attempts. Orchestra does not call
-Claude Code, Anthropic APIs, or another provider API.
+contract accepts `claude-agent-request` with `tool=claude-code-agent`, but it
+records `spawned` and `active` only when the active parent runtime is Claude and
+the native callback capability is explicitly verified. Unsupported Codex, CI,
+non-Claude, or callback-unavailable contexts return fallback guidance and do not
+claim native execution. Orchestra does not call Claude Code, Anthropic APIs, or
+another provider API.
 Runtime lifecycle watching is adapter-driven. Each inspected session reports a
 `watcher` object with adapter id, detection mode, support level, fallback
@@ -242,10 +243,12 @@ not proof that Orchestra can invoke Claude Code or Anthropic APIs by itself.
 The tested local behavior covers:
 - Dispatch support: eligible `claude-agent-request` actions for `claude-cli`
-  with `tool=claude-code-agent` can be consumed by
+  with `tool=claude-code-agent` can be inspected by
   `runtime parent-actions --dispatch --runtime claude-cli`. The dispatch path
-  records `spawned` and `active` lifecycle state with a stable child identifier
-  or deterministic `claude-code-agent:<session>` fallback label.
+  records `spawned` and `active` lifecycle state only when the bridge verifies a
+  Claude parent runtime and callback capability. In local contract tests this is
+  simulated with explicit environment markers; in unsupported environments the
+  action is skipped with manual fallback guidance.
 - Alias policy: `claude-code-agent` is the only auto-dispatchable Claude tool
   name in the tested contract. `Task` is a legacy/manual alias and is skipped
   as `tool-mismatch`; accepting it in auto-dispatch requires new tests and
@@ -254,19 +257,26 @@ The tested local behavior covers:
   terminal, mismatched, or unavailable actions return structured eligibility
   metadata, fallback guidance, prompt artifact, expected result artifact, and
   manual lifecycle commands. Fallback never runs the phase in the parent agent
-  silently and never switches to direct provider APIs.
+  silently, never records native Claude lifecycle events, and never switches to
+  direct provider APIs.
 - Guardrails: dispatch is bounded by runtime guardrails, runtime filters,
   session status, safety state, action kind, tool name, and stale-session
   checks. It preserves `directProviderApiAllowed=false` for runtime-native
   delegation artifacts.
-- Completion reconciliation: current tested support relies on explicit
-  lifecycle events and bounded expected-artifact inspection. GH-434 tracks
-  stricter validation of task id, phase, role, runtime, session id, and safe
-  expected artifact path before a Claude session is marked complete.
-- Gate preservation: auto-dispatch must not approve or skip human gates. GH-435
-  tracks the dedicated regression suite for safe workflow resume across
-  `gates=none`, `gates=phase`, `gates=all`, multi-phase dispatch, and manual
-  fallback recovery.
+- Completion reconciliation: `runtime watch` validates the expected completion
+  metadata before marking a Claude session complete. The validation checks task
+  id, phase, role, runtime, session id, and the safe expected handoff path, and
+  it also requires the final handoff artifact to repeat those metadata fields.
+  It skips mismatches, missing artifact metadata, unsafe paths, and duplicate
+  completions with explicit reasons instead of treating any handoff file as
+  completion proof. Native immediate `completionResult` payloads use the same
+  validation path when present.
+- Gate preservation: auto-dispatch must not approve or skip human gates.
+  `workflow run --resume` now holds unapproved gates until
+  `workflow gate-approve` records explicit approval, and runtime lifecycle
+  auto-resume records no gate approval events. The regression suite covers safe
+  non-gated resume, unapproved gate hold behavior, opt-out, queued/pending
+  messaging, and multi-pass parent action dispatch.
 Manual recovery for a skipped or unavailable Claude action:

package/docs/site-manifest.json CHANGED Viewed

@@ -117,6 +117,7 @@
       { "title": "Sonar quality gates", "source": "docs/sonar-quality-gates.md", "heading": "Sonar Quality Gates" },
       { "title": "Sonar architecture model", "source": "docs/sonar-architecture-model.md", "heading": "Sonar Architecture Model" },
       { "title": "Runtime adapters", "source": "docs/runtime-adapters.md", "heading": "Runtime Adapters" },
+      { "title": "Context vault", "source": "docs/context-vault.md", "heading": "Context Vault" },
       { "title": "Site content workflow", "source": "docs/site-content-workflow.md", "heading": "Public Site Content Workflow" }
     ]
   },

package/docs/traceability-flow.md CHANGED Viewed

@@ -54,10 +54,20 @@ orchestra review --task STORY-1 --role qa --result approve --findings "..." --re
 ```
 Developer-to-QA handoff should include touched files, commands, known gaps, and
-recommended Playwright, CLI, shell, or API coverage. `qa coverage` maps each
-acceptance criterion to `covered`, `planned`, `skipped`, or `gap` using task
-paths, project scripts, and existing evidence; release readiness surfaces
-unresolved QA automation gaps before promotion.
+recommended Playwright, CLI, shell, API, integration, workflow, mobile, desktop,
+data, or generated-artifact coverage. `qa coverage` maps each acceptance
+criterion to `covered`, `weak`, `missing`, `deferred`, or `blocked`
+using task paths, project scripts, and existing evidence; release readiness and
+the `qa-release` gate surface unresolved QA automation gaps before promotion.
+Generated artifacts are a first-class QA surface. When rules, skills, runtime
+guidance, Markdown files, MDC files, or managed bootstrap blocks change,
+evidence must assert generated paths, managed metadata, target-specific content,
+refresh/drift behavior, user-content preservation, and absence of wrong-target
+content. CLI evidence must assert exit code, stdout, stderr, generated
+files/events, and final state. Integration evidence must include receiver-side
+sandbox/mock/contract/webhook/event/log validation or an explicit deferred owner
+and rationale.
 Evidence summaries should name the acceptance criterion they cover or say
 "covers all acceptance criteria" when a single artifact proves the full story.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@jterrats/open-orchestra",
-  "version": "1.0.10",
+  "version": "1.0.12",
   "type": "module",
   "workspaces": [
     "extensions/vscode-open-orchestra",
@@ -16,7 +16,7 @@
     "test": "npm run build && node --test test/**/*.js extensions/**/*.test.cjs",
     "test:coverage": "npm run build && c8 --reporter=lcov --reports-dir coverage --exclude \"test/**\" --exclude \"e2e/**\" --exclude \"extensions/**/test/**\" --exclude \"dist/assets/**\" --exclude \"dist/web-console/**\" node --test test/**/*.js extensions/**/*.test.cjs",
     "test:e2e": "npm run build && npm run site:build && playwright test",
-    "test:e2e:init": "node --test e2e/init-onboarding.test.js",
+    "test:e2e:init": "node --test e2e/init-onboarding.test.js e2e/runtime-instruction-flow.test.js",
     "test:e2e:runtime": "node --test e2e/runtime-manual-queue.test.js",
     "test:e2e:runtime:ollama": "npm run build && node --test e2e/runtime-ollama-provider.test.js",
     "lint": "eslint . && prettier --check \"{bin,e2e,scripts,test,src}/**/*.js\" \"{site,web-console}/src/**/*.{css,js,jsx}\" \"{site,web-console}/*.{html,js,json}\" \"extensions/**/*.{cjs,json,md}\" \"src/**/*.ts\" \"*.{js,json}\"",