npm - @jterrats/open-orchestra - Versions diffs - 0.1.0 → 0.2.1 - Mend

@jterrats/open-orchestra 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (187) hide show

package/CHANGELOG.md +53 -0
package/README.md +17 -2
package/dist/assets/web-console.js +743 -0
package/dist/cli.js +157 -4
package/dist/cli.js.map +1 -1
package/dist/collaboration-flows.d.ts +5 -0
package/dist/collaboration-flows.js +256 -0
package/dist/collaboration-flows.js.map +1 -0
package/dist/command-manifest.d.ts +11 -0
package/dist/command-manifest.js +52 -0
package/dist/command-manifest.js.map +1 -0
package/dist/commands.d.ts +31 -0
package/dist/commands.js +644 -2
package/dist/commands.js.map +1 -1
package/dist/constants.d.ts +4 -0
package/dist/constants.js +22 -0
package/dist/constants.js.map +1 -1
package/dist/defaults.d.ts +7 -11
package/dist/defaults.js +7 -625
package/dist/defaults.js.map +1 -1
package/dist/delegation-decision.d.ts +14 -0
package/dist/delegation-decision.js +391 -0
package/dist/delegation-decision.js.map +1 -0
package/dist/detect-commands.d.ts +3 -0
package/dist/detect-commands.js +28 -0
package/dist/detect-commands.js.map +1 -0
package/dist/diagram-validation.d.ts +36 -0
package/dist/diagram-validation.js +118 -0
package/dist/diagram-validation.js.map +1 -0
package/dist/fs-utils.d.ts +2 -0
package/dist/fs-utils.js +75 -6
package/dist/fs-utils.js.map +1 -1
package/dist/health-checks.d.ts +28 -0
package/dist/health-checks.js +219 -0
package/dist/health-checks.js.map +1 -0
package/dist/health-commands.d.ts +2 -0
package/dist/health-commands.js +18 -0
package/dist/health-commands.js.map +1 -0
package/dist/instruction-apply.d.ts +34 -0
package/dist/instruction-apply.js +150 -0
package/dist/instruction-apply.js.map +1 -0
package/dist/instruction-blocks.d.ts +22 -0
package/dist/instruction-blocks.js +120 -0
package/dist/instruction-blocks.js.map +1 -0
package/dist/instruction-imports.d.ts +12 -0
package/dist/instruction-imports.js +45 -0
package/dist/instruction-imports.js.map +1 -0
package/dist/instruction-stale.d.ts +9 -0
package/dist/instruction-stale.js +106 -0
package/dist/instruction-stale.js.map +1 -0
package/dist/instruction-types.d.ts +66 -0
package/dist/instruction-types.js +2 -0
package/dist/instruction-types.js.map +1 -0
package/dist/instruction-updates.d.ts +4 -0
package/dist/instruction-updates.js +5 -0
package/dist/instruction-updates.js.map +1 -0
package/dist/knowledge-base.d.ts +10 -0
package/dist/knowledge-base.js +117 -0
package/dist/knowledge-base.js.map +1 -0
package/dist/mcp-oauth-proxy.d.ts +39 -0
package/dist/mcp-oauth-proxy.js +80 -0
package/dist/mcp-oauth-proxy.js.map +1 -0
package/dist/pr-review.d.ts +20 -0
package/dist/pr-review.js +142 -0
package/dist/pr-review.js.map +1 -0
package/dist/project-detection.d.ts +22 -0
package/dist/project-detection.js +174 -0
package/dist/project-detection.js.map +1 -0
package/dist/prompt-registry.d.ts +56 -0
package/dist/prompt-registry.js +163 -0
package/dist/prompt-registry.js.map +1 -0
package/dist/release-candidate.d.ts +41 -0
package/dist/release-candidate.js +196 -0
package/dist/release-candidate.js.map +1 -0
package/dist/release-commands.d.ts +4 -0
package/dist/release-commands.js +50 -0
package/dist/release-commands.js.map +1 -0
package/dist/roles/ai-support-roles.d.ts +11 -0
package/dist/roles/ai-support-roles.js +67 -0
package/dist/roles/ai-support-roles.js.map +1 -0
package/dist/roles/core-roles.d.ts +11 -0
package/dist/roles/core-roles.js +144 -0
package/dist/roles/core-roles.js.map +1 -0
package/dist/roles/engineering-roles.d.ts +11 -0
package/dist/roles/engineering-roles.js +176 -0
package/dist/roles/engineering-roles.js.map +1 -0
package/dist/roles/governance-roles.d.ts +11 -0
package/dist/roles/governance-roles.js +117 -0
package/dist/roles/governance-roles.js.map +1 -0
package/dist/roles/index.d.ts +11 -0
package/dist/roles/index.js +17 -0
package/dist/roles/index.js.map +1 -0
package/dist/roles/platform-ops-roles.d.ts +11 -0
package/dist/roles/platform-ops-roles.js +158 -0
package/dist/roles/platform-ops-roles.js.map +1 -0
package/dist/roles/qa-ux-roles.d.ts +11 -0
package/dist/roles/qa-ux-roles.js +193 -0
package/dist/roles/qa-ux-roles.js.map +1 -0
package/dist/roles/release-ops-roles.d.ts +11 -0
package/dist/roles/release-ops-roles.js +109 -0
package/dist/roles/release-ops-roles.js.map +1 -0
package/dist/runtime-adapters.d.ts +6 -0
package/dist/runtime-adapters.js +88 -0
package/dist/runtime-adapters.js.map +1 -0
package/dist/runtime-bootstrap.d.ts +12 -0
package/dist/runtime-bootstrap.js +85 -0
package/dist/runtime-bootstrap.js.map +1 -0
package/dist/skills.d.ts +36 -0
package/dist/skills.js +665 -0
package/dist/skills.js.map +1 -0
package/dist/subagent-protocol.d.ts +41 -0
package/dist/subagent-protocol.js +179 -0
package/dist/subagent-protocol.js.map +1 -0
package/dist/telemetry-consent.d.ts +24 -0
package/dist/telemetry-consent.js +95 -0
package/dist/telemetry-consent.js.map +1 -0
package/dist/telemetry-export.d.ts +14 -0
package/dist/telemetry-export.js +126 -0
package/dist/telemetry-export.js.map +1 -0
package/dist/telemetry-records.d.ts +3 -0
package/dist/telemetry-records.js +96 -0
package/dist/telemetry-records.js.map +1 -0
package/dist/telemetry-redaction.d.ts +9 -0
package/dist/telemetry-redaction.js +55 -0
package/dist/telemetry-redaction.js.map +1 -0
package/dist/telemetry-types.d.ts +52 -0
package/dist/telemetry-types.js +2 -0
package/dist/telemetry-types.js.map +1 -0
package/dist/telemetry.d.ts +4 -0
package/dist/telemetry.js +4 -0
package/dist/telemetry.js.map +1 -0
package/dist/types.d.ts +176 -1
package/dist/validation.d.ts +3 -1
package/dist/validation.js +28 -5
package/dist/validation.js.map +1 -1
package/dist/web-api.js +167 -3
package/dist/web-api.js.map +1 -1
package/dist/web-console.js +6 -160
package/dist/web-console.js.map +1 -1
package/dist/workflow-gates.js +4 -2
package/dist/workflow-gates.js.map +1 -1
package/dist/workflow-services.js +125 -67
package/dist/workflow-services.js.map +1 -1
package/dist/workflow-templates.d.ts +10 -0
package/dist/workflow-templates.js +141 -0
package/dist/workflow-templates.js.map +1 -0
package/dist/workspace-classification.d.ts +5 -0
package/dist/workspace-classification.js +127 -0
package/dist/workspace-classification.js.map +1 -0
package/dist/workspace-validator.js +11 -1
package/dist/workspace-validator.js.map +1 -1
package/dist/workspace.d.ts +8 -4
package/dist/workspace.js +111 -4
package/dist/workspace.js.map +1 -1
package/docs/dev-team-specialist-role-profiles.md +171 -0
package/docs/mcp-oauth-proxy-evaluation.md +44 -0
package/docs/multi-agent-orchestrator-backlog.md +413 -1
package/docs/open-orchestra-dogfooding-findings.md +66 -0
package/docs/orchestra-mvp.md +46 -1
package/docs/runtime-adapters.md +86 -0
package/docs/runtime-llm-flow.md +124 -0
package/docs/setup-agents-dogfooding-findings.md +101 -0
package/docs/skill-loading-strategy.md +114 -0
package/docs/source-of-truth-and-agent-learning.md +83 -0
package/package.json +9 -5
package/rules/agent-roles.mdc +30 -0
package/rules/ai-assisted-development.mdc +22 -0
package/skills/agent-learning/SKILL.md +24 -0
package/skills/agent-learning/manifest.json +40 -0
package/skills/backlog-sync/SKILL.md +24 -0
package/skills/backlog-sync/manifest.json +41 -0
package/skills/diagram-export/SKILL.md +35 -0
package/skills/diagram-export/manifest.json +40 -0
package/skills/model-evaluation/SKILL.md +25 -0
package/skills/model-evaluation/manifest.json +41 -0
package/skills/playwright-evidence/SKILL.md +28 -0
package/skills/playwright-evidence/manifest.json +46 -0
package/skills/pr-review/SKILL.md +23 -0
package/skills/pr-review/manifest.json +43 -0
package/skills/prompt-registry/SKILL.md +24 -0
package/skills/prompt-registry/manifest.json +45 -0
package/skills/release-readiness/SKILL.md +25 -0
package/skills/release-readiness/manifest.json +45 -0
package/skills/source-of-truth/SKILL.md +24 -0
package/skills/source-of-truth/manifest.json +47 -0
package/skills/static-analysis/SKILL.md +26 -0
package/skills/static-analysis/manifest.json +46 -0

package/docs/dev-team-specialist-role-profiles.md ADDED Viewed

@@ -0,0 +1,171 @@
+# Dev Team Specialist Role Profiles
+These profiles extend the default delivery team catalog with optional specialist personas. They should be activated by task impact and risk, not by default for every task.
+## Source Signals
+- DORA / Google Cloud DevOps capabilities emphasize maintainability, continuous integration, test automation, continuous delivery, monitoring and observability, security shifting left, small batches, value stream visibility, and empowered tool choices: https://cloud.google.com/architecture/devops
+- Google SRE guidance centers reliability decisions on user-relevant SLIs/SLOs, error budgets, availability, latency, throughput, correctness, and operational control loops: https://sre.google/sre-book/service-level-objectives/
+- Platform engineering guidance frames internal developer platforms as product-minded self-service systems with secure, governed golden paths that reduce cognitive load: https://learn.microsoft.com/en-us/platform-engineering/what-is-platform-engineering and https://www.cncf.io/blog/2023/11/20/announcing-the-platform-engineering-maturity-model/
+- Playwright best practices prioritize user-visible behavior, isolated tests, resilient locators, web-first assertions, traceable debugging, parallelism, and test reliability: https://playwright.dev/docs/best-practices and https://playwright.dev/docs/locators
+- WCAG 2.2 is the current W3C recommendation for accessible web content across desktop, mobile, and other devices; success criteria are testable statements: https://www.w3.org/TR/wcag/
+- OWASP secure coding guidance keeps secure implementation technology-agnostic and lifecycle-integrated, including input validation, output encoding, authentication, access control, cryptography, logging, data protection, communication security, and file handling: https://owasp.org/www-project-secure-coding-practices-quick-reference-guide/stable-en/02-checklist/
+- Apple App Review guidelines highlight mobile release completeness, on-device stability, accurate metadata, privacy/safety expectations, and review readiness: https://developer.apple.com/app-store/review/guidelines/
+- OpenAI eval guidance recommends eval-driven development, task-specific evals, production-shaped datasets, automated scoring where possible, human calibration, pairwise comparisons, rubrics, and continuous evaluation: https://platform.openai.com/docs/guides/evaluation-best-practices
+## Tech Lead / Engineering Lead
+State-of-the-art profile:
+- Treats implementation as a coordinated delivery system: ownership boundaries, code review focus, sequencing, integration risk, and maintainability.
+- Uses small batches, clear ownership, and visible work-in-progress to reduce handoff and merge risk.
+- Bridges architecture intent and code-level decisions without replacing the Architect role.
+Activate when:
+- Multiple developers or agents touch related modules.
+- Ownership boundaries are unclear.
+- A change has integration, maintainability, or sequencing risk.
+Expected evidence:
+- Implementation plan.
+- Ownership map.
+- Review checklist.
+- Integration risk notes.
+## SDET / Test Automation Engineer
+State-of-the-art profile:
+- Designs automation around user-visible behavior and acceptance criteria.
+- Prefers resilient Playwright locators, isolated tests, web-first assertions, fixtures, page objects, traces, screenshots, and CI reliability evidence.
+- Turns QA plans into repeatable regression coverage without replacing QA's release judgment.
+Activate when:
+- Browser E2E coverage is needed.
+- Existing E2E tests are flaky or selector-heavy.
+- A critical workflow needs repeatable regression evidence.
+Expected evidence:
+- Automation plan.
+- Locator strategy.
+- Playwright screenshots/traces.
+- CI stability notes.
+## Platform Engineer
+State-of-the-art profile:
+- Builds internal developer platform capabilities as reusable products for developers.
+- Provides secure, governed self-service and golden paths for common delivery workflows.
+- Reduces cognitive load while preserving compliance, cost visibility, and operational control.
+Activate when:
+- A workflow should become reusable across repositories.
+- CI/CD, environment setup, scaffolding, or templates are changing.
+- Developer experience and governance need the same solution.
+Expected evidence:
+- Golden path documentation.
+- Template validation.
+- Self-service smoke test.
+- Adoption and feedback notes.
+## Frontend Specialist
+State-of-the-art profile:
+- Owns component architecture, browser behavior, accessibility implementation, responsive layouts, client performance, and UI state coverage.
+- Uses WCAG-oriented accessibility checks and Playwright-visible user flows as implementation evidence.
+- Works with UX/UI and QA to ensure the interface is usable, not merely rendered.
+Activate when:
+- A user-facing web flow changes.
+- Responsive, accessibility, or client performance risk exists.
+- Component architecture or frontend state management changes.
+Expected evidence:
+- Responsive screenshots.
+- Accessibility notes.
+- Component or browser smoke test.
+- State coverage notes.
+## Backend Specialist
+State-of-the-art profile:
+- Owns service boundaries, API contracts, domain logic, persistence integration, concurrency, failure modes, and server-side tests.
+- Applies secure coding and operational thinking to inputs, outputs, auth, access control, data protection, logging, and communication security.
+- Works with SRE, Security, DBA, and Architect when backend behavior affects reliability, privacy, or data integrity.
+Activate when:
+- API contracts, domain services, or persistence behavior change.
+- Concurrency, transactions, idempotency, retries, or failure behavior matter.
+- Server-side security or reliability risk exists.
+Expected evidence:
+- API contract notes.
+- Unit/integration test results.
+- Failure mode review.
+- Observability notes.
+## Mobile Specialist
+State-of-the-art profile:
+- Owns mobile UX, device compatibility, offline behavior, native/hybrid runtime constraints, privacy metadata, performance, and app store readiness.
+- Validates on-device behavior rather than assuming desktop responsive checks are sufficient.
+- Coordinates with Release Manager and QA for store submission, beta testing, metadata, and device matrix evidence.
+Activate when:
+- Native, hybrid, or mobile-specific behavior changes.
+- Offline, permissions, device capabilities, push, camera, location, or store metadata are involved.
+- A mobile release or review process is affected.
+Expected evidence:
+- Device matrix.
+- Mobile screenshots or recordings.
+- Offline and permissions test notes.
+- Store readiness checklist.
+## AI Evaluation / Prompt Quality Engineer
+State-of-the-art profile:
+- Treats prompt/model behavior as testable product behavior, not subjective chat quality.
+- Defines eval objectives, datasets, rubrics, automated graders, human calibration, model comparisons, and continuous evaluation checks.
+- Owns regression evidence for prompt, model routing, provider fallback, and generated-output quality changes.
+Activate when:
+- System prompts, role prompts, model routing, fallback behavior, or provider selection changes.
+- LLM output quality is release-critical.
+- Human review should be calibrated against automated evals.
+Expected evidence:
+- Eval dataset or cases.
+- Rubric results.
+- Model comparison summary.
+- Failure analysis and accepted residual risk.
+## Support / Customer Operations
+State-of-the-art profile:
+- Owns customer-facing readiness: known issues, troubleshooting paths, support runbooks, incident intake, communication drafts, and feedback loops.
+- Ensures releases are operable by people who did not build them.
+- Connects post-release signals back into backlog refinement and incident learning.
+Activate when:
+- A customer-visible release changes workflows, support paths, or known limitations.
+- Workarounds, communications, or feedback intake are needed.
+- Support needs a runbook before go-live.
+Expected evidence:
+- Support runbook.
+- Known issues note.
+- Customer communication draft.
+- Feedback intake plan.
+## Prompt Registry Integration
+The setup-agents prompt registry pattern is now part of Open Orchestra as a stack-agnostic `.generated-prompts/` scaffold. Specialist roles should use it as follows:
+- Tech Lead reads `code.md`, `services.md`, and `docs.md` before coordinating implementation plans.
+- SDET reads `tests.md` before adding Playwright or regression coverage.
+- Platform Engineer reads `cicd.md` and `docs.md` before changing reusable developer workflows.
+- Frontend Specialist reads `ui.md` and `tests.md` before user-facing web changes.
+- Backend Specialist reads `services.md`, `code.md`, and `tests.md` before service or API changes.
+- Mobile Specialist reads `ui.md`, `tests.md`, and `docs.md` before mobile-specific flows or release readiness work.
+- AI Evaluation / Prompt Quality Engineer reads and updates `evals.md` for prompt, model routing, provider fallback, rubric, and eval changes.
+- Support / Customer Operations reads `docs.md` before support runbook, known issue, or customer communication work.

package/docs/mcp-oauth-proxy-evaluation.md ADDED Viewed

@@ -0,0 +1,44 @@
+# MCP OAuth Proxy Evaluation
+## Goal
+Evaluate an opt-in proxy pattern for HTTP MCP servers when a tool only supports stdio transport or cannot inject OAuth tokens safely.
+## Transport Options
+| Option | Fit | Trade-off |
+| --- | --- | --- |
+| stdio proxy | Best compatibility for stdio-only clients. | Proxy owns token injection, refresh, logging controls, and revocation handling. |
+| direct HTTP | Best when the tool supports HTTP MCP and auth natively. | Not portable to stdio-only tools. |
+| tool-native OAuth | Preferred security ownership when available. | Behavior differs by IDE/CLI and may not support every MCP server. |
+## Prototype Rules
+- The proxy is disabled by default and must be explicitly enabled.
+- Open Orchestra must not alter MCP config without user approval (`--enable --approve --approver <name>` in the evaluation command).
+- HTTP MCP server URLs must use `https://`.
+- Tokens can be stored only in OS secret storage (`keychain`, `libsecret`, `windows-credential`) or under approved local secure-file paths such as `~/.config/open-orchestra/secrets/`.
+- Logs must never include access tokens, refresh tokens, auth codes, token endpoint responses, or bearer headers.
+- Revocation is supported by deleting local token material and re-running authorization.
+## Refresh Behavior
+- Refresh is evaluated without real credentials by injecting a refresh function into `refreshMcpTokenIfNeeded`.
+- Tokens refresh only inside the configured refresh window.
+- The minimum refresh window is 30 seconds to avoid last-second token expiry during tool calls.
+## Security Review
+- Token storage: use OS secret storage first; secure files only by explicit local path approval.
+- Refresh: refresh before expiry and keep failures non-destructive so existing valid tokens are not overwritten by failed responses.
+- Logs: redact token-shaped values and avoid logging raw provider responses.
+- Revocation: document local token deletion and upstream OAuth app revocation.
+- Configuration: keep MCP changes opt-in and reviewable; dry-run/evaluate before writing any config.
+## CLI
+```bash
+orchestra mcp oauth-proxy evaluate --server-url https://mcp.example.com --enable --approve --approver <name> --json
+```
+The command returns risks and recommendations only; it does not write MCP configuration.

package/docs/multi-agent-orchestrator-backlog.md CHANGED Viewed

@@ -52,7 +52,7 @@ Acceptance criteria:
 As an administrator, I want to define roles and responsibilities so that each agent knows its ownership, authority, and handoff expectations.
 Acceptance criteria:
-- Supports roles such as Product Owner, Architect, Developer, QA, Security, DevOps, SRE, DBA, UX/UI Designer, Release Manager, Compliance/Privacy, Technical Writer, and Game Designer.
+- Supports roles such as Product Owner, Architect, Developer, Tech Lead, Frontend Specialist, Backend Specialist, Mobile Specialist, QA, SDET, Security, DevOps, Platform Engineer, SRE, DBA, UX/UI Designer, Release Manager, Compliance/Privacy, Technical Writer, AI Evaluation Engineer, Support/Customer Operations, and Game Designer.
 - Stores role purpose, responsibilities, required inputs, outputs, and blockers.
 - Allows project-specific role overrides.
@@ -72,6 +72,71 @@ Acceptance criteria:
 - Records role, rationale, severity, and affected artifact.
 - Prevents release when required role approval is missing.
+### Story ROLE-004: Activate Tech Lead Profile
+As a parent agent, I want to activate a Tech Lead when implementation coordination risk exists so that code ownership, sequencing, and integration quality are explicit.
+Acceptance criteria:
+- Activates Tech Lead for multi-developer, cross-module, or sequencing-sensitive work.
+- Records ownership boundaries, implementation plan, review focus, and integration risks.
+- Blocks integration when ownership or review evidence is missing for high-risk work.
+### Story ROLE-005: Activate SDET Profile
+As QA, I want an SDET profile for automation-heavy work so that Playwright coverage is reliable, maintainable, and evidence-based.
+Acceptance criteria:
+- Activates SDET for browser automation, flaky tests, critical regression paths, or CI test reliability risk.
+- Requires automation scope, locator strategy, fixtures/page objects, and Playwright evidence.
+- Flags brittle selectors, non-isolated tests, and missing trace/screenshot evidence.
+### Story ROLE-006: Activate Platform Engineer Profile
+As an administrator, I want a Platform Engineer profile so that reusable developer workflows become secure, governed golden paths.
+Acceptance criteria:
+- Activates Platform Engineer for reusable templates, CI/CD foundations, environment setup, or cross-repo standards.
+- Requires platform capability, developer experience, guardrails, and adoption plan.
+- Records template validation and self-service smoke evidence.
+### Story ROLE-007: Activate Frontend Specialist Profile
+As a parent agent, I want a Frontend Specialist profile so that user-facing web changes cover component architecture, responsive behavior, accessibility, and browser evidence.
+Acceptance criteria:
+- Activates Frontend Specialist for web UI, responsive, accessibility, component, or client-performance risk.
+- Requires user flow, UI states, accessibility notes, and browser evidence.
+- Coordinates with UX/UI, QA, and SDET for Playwright-visible behavior.
+### Story ROLE-008: Activate Backend Specialist Profile
+As a parent agent, I want a Backend Specialist profile so that services, APIs, data flow, concurrency, and failure modes are reviewed by the right capability.
+Acceptance criteria:
+- Activates Backend Specialist for API contracts, domain services, persistence, transactions, or server reliability risk.
+- Requires API contract, data flow, failure modes, and service test evidence.
+- Coordinates with Security, SRE, DBA, and Architect when impact areas overlap.
+### Story ROLE-009: Activate Mobile Specialist Profile
+As a parent agent, I want a Mobile Specialist profile so that mobile runtime, device, offline, permissions, and store-readiness constraints are not treated as generic web work.
+Acceptance criteria:
+- Activates Mobile Specialist for native, hybrid, offline, permissions, device capability, or app-store release impact.
+- Requires device matrix, mobile flow, offline behavior, and release constraints.
+- Records mobile screenshots, recordings, or device test evidence.
+### Story ROLE-010: Activate AI Evaluation Profile
+As a parent agent, I want an AI Evaluation / Prompt Quality profile so that prompt, model, and provider-routing changes are evaluated with explicit rubrics and regression evidence.
+Acceptance criteria:
+- Activates AI Evaluation for system prompt, role prompt, model routing, provider fallback, or LLM output quality changes.
+- Requires eval objective, dataset or cases, rubric, model behavior summary, and regression risk.
+- Supports model comparison and human-calibrated review for high-risk outputs.
+### Story ROLE-011: Activate Support / Customer Operations Profile
+As a Release Manager, I want Support / Customer Operations activated for customer-visible releases so that support readiness and feedback loops are ready before go-live.
+Acceptance criteria:
+- Activates Support for customer-visible releases, known issues, workarounds, support workflow changes, or feedback intake needs.
+- Requires customer impact, known issues, support runbook, and feedback loop.
+- Includes support readiness in release-readiness evidence.
 ## Epic 3: Multi-Model Provider Layer
 ### Story MODEL-001: Define Provider Interface
@@ -218,6 +283,15 @@ Acceptance criteria:
 - Highlights unresolved risks and follow-up items.
 - Can be used as PR body input.
+### Story ART-004: Maintain Prompt Registry
+As an agent, I want generated prompts stored by artifact type so that future agents can preserve project conventions and trace AI-generated work back to its prompt intent.
+Acceptance criteria:
+- `orchestra init` creates `.generated-prompts/` with stack-agnostic register files for code, UI, services, tests, CI/CD, docs, diagrams, and evals.
+- Existing register files are not overwritten unless `--force` is used.
+- Register files document the before/after protocol, substantial-change rules, entry format, task, role, key decisions, evidence, and prompt summary.
+- AI-assisted development rules require reading the relevant register before substantial generation and updating it after substantial changes.
 ## Epic 8: File Ownership and Locking
 ### Story LOCK-001: Assign File Ownership
@@ -443,3 +517,341 @@ Acceptance criteria:
 - Records risk, severity, owner, expiration, compensating controls, and approval.
 - Links risk acceptance to release decision.
 - Flags expired risk acceptances.
+## Epic 15: Dynamic Skills and Context Loading
+### Story SKILL-001: Define Skill Manifest
+As a parent agent, I want each skill to expose structured metadata so that the orchestrator can decide what to load without reading every skill file.
+Acceptance criteria:
+- Manifest includes id, name, summary, triggers, roles, capabilities, risk areas, entry file, assets, evidence types, and load budget.
+- Unknown skill IDs and missing entry files fail validation.
+- Skill metadata is available to CLI, web, VS Code, and future middleware adapters.
+### Story SKILL-002: Select Skills from Task Context
+As a parent agent, I want to select skills from task signals so that agents receive only the instructions needed for the current work.
+Acceptance criteria:
+- Selection uses task goal, touched paths, active roles, requested outputs, capabilities, and risk areas.
+- Explains selected and skipped skills with rationale.
+- Supports explicit user or project overrides.
+### Story SKILL-003: Scaffold Built-in Skills
+As an administrator, I want built-in skills for common delivery work so that teams can reuse consistent procedures without bloating main MD files.
+Acceptance criteria:
+- Provides prompt-registry, diagram-export, static-analysis, pr-review, playwright-evidence, backlog-sync, release-readiness, and model-evaluation skills.
+- Each skill has a focused `SKILL.md` plus optional assets, templates, or scripts.
+- Skills stay stack-agnostic unless explicitly scoped by project config.
+### Story SKILL-004: Record Loaded Skills in Evidence and Handoffs
+As a reviewer, I want loaded skills recorded in workflow artifacts so that I can audit why an agent used specific instructions.
+Acceptance criteria:
+- Task context includes selected skill IDs and rationale.
+- Handoffs and final summaries include materially used skills.
+- Evidence events can link to skill-driven commands, scripts, checks, or generated artifacts.
+### Story SKILL-005: Support Middleware Injection for Non-Skill LLMs
+As an administrator, I want Open Orchestra to inject selected skill text for LLMs that do not support native skills so that dynamic context loading works across providers.
+Acceptance criteria:
+- Parent orchestrator loads selected skill text and injects only that text into the child prompt.
+- Full skill catalog is never embedded in the base prompt.
+- Provider output records which skills were injected.
+### Story SKILL-007: Define Source of Truth Catalog
+As a parent agent, I want a configured source-of-truth catalog so that agents and subagents know which local files, workflow artifacts, and official docs are authoritative for each task.
+Acceptance criteria:
+- Catalog groups sources by project instructions, backlog, architecture, codebase, quality/security, DevOps/runtime, vendor docs, and agent memory.
+- Skill selection can reference source groups without loading every source eagerly.
+- Conflicting sources are surfaced as blockers or decisions instead of silently resolved.
+### Story SKILL-008: Record Agent Lessons Learned
+As a parent agent, I want reusable tool, syntax, escaping, permission, and workflow failures recorded so that agents do not repeat the same mistakes.
+Acceptance criteria:
+- Lessons are stored as append-only JSONL with task, actor, operation, failed action, error signature, root cause, fix, prevention, applicable tools, and verification.
+- Agents search relevant lessons before repeating risky operations.
+- Lessons never store secrets, raw credentials, or sensitive customer data.
+- Repeated lessons can be promoted into skills or always-loaded rules.
+### Story SKILL-006: Enforce Main Instruction File Budget
+As a maintainer, I want main MD files kept context-bounded so that long-term agent instructions remain fast, readable, and maintainable.
+Acceptance criteria:
+- Main files contain core rules, skill index, and activation rules only.
+- Detailed procedures move to skills or supporting docs.
+- Validation warns when primary instruction files exceed configured size or section budgets.
+### Story SKILL-009: Manage Generated Instruction File Updates
+As a maintainer, I want generated MD/MDC instruction sections updated through managed blocks and declarative profile imports so that Open Orchestra can refresh runtime adapters without overwriting user-authored documentation or inflating always-loaded context.
+Acceptance criteria:
+- Generated sections use explicit start/end markers with generator, version, source manifest, target, block ID, and content hash metadata.
+- Generators support `--dry-run`, `--check`, and `--force` behavior.
+- Rerunning without source changes produces no diff.
+- Manual edits inside managed blocks are detected as drift and reported before overwrite.
+- User-authored content outside managed blocks is preserved.
+- Profiles and skills can declare imports using Open Orchestra syntax, for example `@orchestra/import <profile-or-skill-id>`.
+- Import resolution validates missing references, prevents cycles, and renders target-compatible output for generic, Claude, Cursor, Codex, and VS Code runtimes.
+- Main instruction files keep only the import index or resolved minimal context needed by the selected target.
+## Epic 16: Consent-Based Telemetry
+### Story TEL-001: Configure Explicit Telemetry Consent
+As a user, I want telemetry disabled by default and enabled only through explicit consent so that prompt and workflow data are never collected silently.
+Acceptance criteria:
+- Default telemetry mode is `off`.
+- CLI supports telemetry status, enable, and disable commands.
+- Consent state records timestamp, actor, selected collection level, and policy version.
+- Telemetry status is visible in config and local workflow context.
+### Story TEL-002: Define Telemetry Collection Levels
+As a user, I want separate telemetry levels so that I can share metadata, prompt summaries, prompt samples, or eval datasets with different consent boundaries.
+Acceptance criteria:
+- Supports `off`, `metadata`, `prompt-summary`, `prompt-sample`, and `eval-dataset` levels.
+- Prompt sample and eval dataset modes require explicit opt-in distinct from metadata.
+- Each exported record includes the active telemetry level and consent reference.
+- Collection level can be downgraded or disabled without deleting local workflow data.
+### Story TEL-003: Redact Sensitive Data Locally
+As a privacy owner, I want telemetry redacted before export or submission so that secrets, PII, local paths, and sensitive customer data are not transmitted.
+Acceptance criteria:
+- Redaction runs locally before export or submit.
+- Secret-like values, tokens, keys, emails, and configured PII patterns are masked.
+- Raw `.env`, credentials, stack traces with secrets, and private customer data are blocked.
+- Redaction report lists fields changed without revealing original sensitive values.
+### Story TEL-004: Export Reviewable Telemetry Dataset
+As a user, I want telemetry exported locally before submission so that I can inspect exactly what would be shared.
+Acceptance criteria:
+- `orchestra telemetry export` writes JSONL under local runtime state.
+- Export includes task type, selected roles, selected skills, source groups, provider/model metadata, quality outcome, and allowed prompt summary/sample fields.
+- Export supports `--dry-run` and produces a review summary.
+- Export never submits data automatically.
+### Story TEL-005: Submit Telemetry with Audit Trail
+As a user, I want telemetry submission to be explicit and auditable so that shared data can be traced back to consent.
+Acceptance criteria:
+- `orchestra telemetry submit --file <path>` requires telemetry enabled and compatible collection level.
+- Submission records endpoint, file hash, consent reference, timestamp, and result.
+- Failed submissions keep the local export and do not retry indefinitely without user action.
+- User can request deletion/export references for submitted datasets.
+### Story TEL-006: Curate Eval Dataset from Approved Prompts
+As an AI Evaluation Engineer, I want approved prompts and outcomes curated into eval datasets so that model routing and future fine-tuning decisions use consented, high-quality examples.
+Acceptance criteria:
+- Dataset items require explicit consent and review status.
+- Each item includes prompt summary or prompt sample, expected behavior, selected skills, source groups, model/provider, outcome, and quality labels.
+- Dataset excludes secrets, PII, and customer-sensitive data after redaction.
+- Dataset can be exported separately from operational telemetry.
+## Epic 17: Standalone Web Console
+### Story WEB-001: Bundle Standalone Console Client
+As a user, I want the standalone console client bundled from shared code so that the web UI can grow without inline scripts or duplicated contracts.
+Acceptance criteria:
+- Build uses `tsc` for Node/CLI outputs and `esbuild` for browser bundle output.
+- Server serves the generated browser bundle from `/assets/web-console.js`.
+- Shared chart/data contracts are reused by the browser bundle and tests.
+- Package dry-run includes the generated bundle.
+### Story WEB-002: Expose Skills, Sources, and Lessons APIs
+As a web or IDE client, I want stable JSON APIs for skills, source-of-truth, and lessons so that clients can render orchestration context without parsing CLI output.
+Acceptance criteria:
+- Exposes skills list, validate, plan, and render endpoints.
+- Exposes source-of-truth list endpoint.
+- Exposes lessons list, add, and promote endpoints with JSON contracts.
+- Mutation endpoints use POST and validate required fields.
+- Tests cover success and error responses.
+### Story WEB-003: Render Skills in Standalone Console
+As a user, I want a Skills panel in the standalone console so that I can inspect selected skills and rendered target context for a task.
+Acceptance criteria:
+- UI lists available skills and source groups.
+- UI can select a task and display planned skills with rationale.
+- UI can preview render output for generic, Claude, Cursor, Codex, and VS Code targets.
+- UI handles empty task state and API errors cleanly.
+### Story WEB-004: Render Source of Truth and Lessons in Standalone Console
+As a user, I want the standalone console to show source-of-truth and lessons so that agents and humans can understand what context is authoritative and what failures have been learned.
+Acceptance criteria:
+- UI lists source groups and locations.
+- UI lists local lessons with operation and error signature.
+- UI supports adding a lesson with required fields.
+- UI supports promoting lessons to a reviewable artifact.
+## Epic 18: Setup Agents Feature Harvest
+### Story UPD-001: Detect Stale Generated Instruction Files
+As a maintainer, I want Open Orchestra to detect stale generated instruction files without touching user-authored files so that runtime adapters can be safely refreshed.
+Acceptance criteria:
+- Stale detection uses managed block markers, generator version, source manifest, target, block ID, and content hash.
+- Files without Open Orchestra markers are skipped and reported as user-owned.
+- Known generated files are derived from catalogs/manifests instead of hand-maintained maps.
+- CLI reports fresh, stale, drifted, missing, and user-owned files separately.
+- Tests cover stale, fresh, missing marker, and catalog drift scenarios.
+### Story UPD-002: Add Generated Instruction Metadata Frontmatter
+As a maintainer, I want generated MD/MDC files to include stable metadata so that humans and tools can understand provenance and update eligibility.
+Acceptance criteria:
+- Generated files include metadata for generator, version, target, source manifest, content hash, and updated timestamp.
+- Cursor MDC frontmatter and Markdown comment metadata use target-compatible syntax.
+- Metadata is idempotent on rerun when source content is unchanged.
+- Metadata never overwrites user-authored frontmatter outside managed sections.
+- Tests cover Markdown and MDC output formats.
+### Story UPD-003: Apply Instruction Updates with Dry Run and Drift Gates
+As a user, I want instruction updates to support dry-run, check, apply, and force modes so that I can refresh generated files without losing manual edits.
+Acceptance criteria:
+- CLI supports checking planned file changes without writing.
+- CLI supports dry-run output with per-file diff summary.
+- Apply mode updates only safe generated blocks.
+- Drifted blocks are blocked unless force is supplied.
+- Update results are exposed as JSON for web and extension clients.
+### Story DET-001: Auto-Detect Project Profiles and Skills
+As a parent agent, I want Open Orchestra to detect project signals so that relevant roles and skills are suggested automatically for each codebase.
+Acceptance criteria:
+- Detects Playwright, GitHub Actions, Docker/Compose, Terraform/IaC, package managers, frontend source layout, Python, and API/service signals.
+- Detection output maps signals to recommended roles, skills, and source groups.
+- Users can review and accept detected profiles before applying changes.
+- Detection never enables tools or telemetry without explicit user action.
+- Tests cover representative Node, Python, frontend, Playwright, and DevOps fixtures.
+### Story REL-001: Formalize Candidate Release Workflow
+As a release owner, I want a candidate release workflow so that production releases only proceed when stories, QA evidence, changelog, rollback, and smoke tests are ready.
+Acceptance criteria:
+- Candidate release collects stories from local workflow state and GitHub issues when available.
+- Story readiness gate checks acceptance criteria, QA evidence, reviews, docs, dependencies, rollback plan, and unresolved blockers.
+- Release plan generates changelog from approved stories only.
+- Tag/release actions require explicit user approval.
+- Post-release smoke test and rollback evidence can be attached.
+### Story PRR-001: Strengthen Pull Request Review Workflow
+As a reviewer, I want PR review to detect architectural scope, risk, conflicts, and evidence gaps so that risky changes get the right specialist review before merge.
+Acceptance criteria:
+- PR review summarizes behavior, risk, files changed, test evidence, and unresolved questions.
+- Architectural scope triggers route to Architect, Security, DevOps, DBA, UX, or QA when needed.
+- Review checklist adapts by file type and detected stack.
+- Merge remains blocked until explicit human approval is recorded.
+- Tests cover PR summary generation, risk routing, and required-review blockers.
+### Story DIA-001: Add Real Diagram Lint and Evidence
+As an architect, I want diagram exports to run a real Mermaid lint/render check so that diagrams are validated before they are shared as architecture evidence.
+Acceptance criteria:
+- Diagram skill can run lint-only mode without side effects.
+- Mermaid validation uses real tool exit codes when available and reports installation guidance when missing.
+- Diagram type decision matrix recommends diagram style by question/domain.
+- Render/lint evidence is attached to the workflow.
+- Tests cover lint success, lint failure, missing tool, and evidence generation.
+### Story HLT-001: Add Local Health Checks
+As a user, I want Open Orchestra to report local health checks so that CLI, package, browser, Playwright, Git, GitHub auth, and workflow readiness problems are actionable.
+Acceptance criteria:
+- Health check command reports installed CLI version, package root, Node/npm, Git, GitHub auth, Playwright/browser availability, and workflow validity.
+- Health checks produce actionable messages and remediation commands.
+- Web console can consume health status through JSON API.
+- Checks avoid network calls unless explicitly requested or already authenticated.
+- Tests cover healthy, missing optional tool, and invalid workflow states.
+### Story MCP-001: Evaluate MCP OAuth Proxy Integration
+As an integration user, I want Open Orchestra to evaluate an MCP OAuth proxy pattern so that HTTP MCP servers can be used from tools that only support stdio or need token injection.
+Acceptance criteria:
+- Research notes compare stdio proxy, direct HTTP, and tool-native OAuth behavior.
+- Prototype stores tokens only in approved local secret storage or explicitly configured secure paths.
+- Proxy refresh behavior is documented and tested without real credentials.
+- Integration remains opt-in and does not alter MCP config without user approval.
+- Security review covers token storage, refresh, logs, and revocation.
+## Epic 19: Runtime Subagent Orchestration
+### Story ORCH-011: Add Delegation Decision Engine
+As a parent agent, I want Open Orchestra to decide when work should stay local or be delegated so that subagents are used only when they reduce risk or unblock parallel progress.
+Acceptance criteria:
+- Delegation decision evaluates task complexity, impacted paths, risk areas, role fit, dependencies, locks, urgency, and available context.
+- Decision output recommends local execution, single-role delegation, parallel delegation, review-only delegation, or no delegation.
+- Decision includes rationale, required context bundle, expected outputs, disjoint write scopes, and blocking conditions.
+- Decision is recorded in task context and events before subagent work starts.
+- Tests cover local-only, single delegate, parallel delegates, review delegate, blocked-by-lock, and insufficient-context cases.
+### Story HAND-002: Render Portable Subagent Protocol
+As a user, I want Open Orchestra to render a subagent protocol from the active role catalog and task graph so that any LLM runtime can understand role routing without loading large generated rules.
+Acceptance criteria:
+- Protocol lists active roles, rule or skill references, task-to-role routing, conflict precedence, and handoff requirements.
+- Render targets support generic, Claude, Cursor, Codex, and VS Code output.
+- Output is generated from role/task/skill metadata, not hardcoded per platform.
+- Main instruction files can import or reference the protocol through managed blocks without overwriting user content.
+- Tests cover target rendering, role filtering, conflict precedence, and idempotent managed-block output.
+### Story ROLE-003: Add Role Collaboration Flow Catalog
+As a parent agent, I want role-to-role collaboration flows modeled as data so that handoffs are consistent across product, architecture, development, QA, security, DevOps, and release work.
+Acceptance criteria:
+- Catalog defines domain-agnostic chains such as Product -> Analyst, Analyst -> Architect, Architect -> Developer, Developer -> QA, QA -> Release, and risk-owner review flows.
+- Each flow declares trigger conditions, required artifacts, required context, exit criteria, and optional reviewers.
+- Task context exposes recommended collaboration flow for a task.
+- Handoff generation can include flow-specific required fields.
+- Tests cover flow selection, missing artifact detection, and multi-reviewer flows.
+### Story WFLOW-001: Add Portable Workflow Template Catalog
+As a user, I want reusable workflow templates that are not tied to a single vendor or IDE so that common agent workflows can run across CLI, web, VS Code, Cursor, Claude, and Codex.
+Acceptance criteria:
+- Workflow templates cover implementation, QA test plan, PR review, release readiness, incident/runbook, architecture decision, static analysis, and Playwright evidence.
+- Templates declare roles, triggers, inputs, outputs, evidence requirements, gates, and compatible skills.
+- Workflows render to Markdown for human/LLM execution and JSON for clients.
+- Templates can be selected from task metadata and risk areas.
+- Tests cover template validation, task-based selection, target rendering, and evidence requirements.
+### Story ROLE-004: Expand Domain-Agnostic Specialist Profiles
+As a parent agent, I want a richer domain-neutral specialist profile catalog so that delegation decisions can assign work to the right expert without assuming a specific platform.
+Acceptance criteria:
+- Profiles include product manager, product owner, business analyst, architect, tech lead, developer, frontend, backend, mobile, QA, SDET, UX/UI, accessibility, security, DevOps, SRE, DBA/data, release manager, technical writer, and compliance/privacy.
+- Each profile declares activation criteria, capabilities, risk areas, expected evidence, handoff fields, and blocking authority.
+- Profiles remain stack-agnostic and reference skills/source groups instead of vendor-specific implementation details.
+- Role activation and execution planning use these profiles without increasing always-loaded instruction size.
+- Tests cover activation metadata, risk owner mapping, execution-plan routing, and rendered protocol references.
+### Story DOC-002: Document Runtime LLM Flow
+As a user, I want a practical guide that explains how to use Open Orchestra from Claude, Codex, Cursor, VS Code, or another LLM runtime so that I understand which model is acting as the parent agent and how Open Orchestra coordinates context, skills, plans, evidence, and future provider routing.
+Acceptance criteria:
+- Guide explains that the active LLM runtime is the parent agent today, while Open Orchestra acts as the local control plane.
+- Guide includes startup flow for a new task: init, health, task/context/plan, skills plan, skills render by target, evidence, gates, and summary.
+- Guide provides copy-paste prompt examples for Claude, Codex, Cursor, and generic LLM runtimes.
+- Guide clarifies current limitations: no real provider execution yet beyond fake/provider-routing primitives, and no automatic subagent spawning until delegation runtime stories are implemented.
+- Guide explains model/provider routing concepts and how future multi-model delegation should work by role.
+- README links to the guide from the quick-start or usage section.
+- Tests or documentation checks verify that referenced CLI commands exist in the help text.
+Technical refinement:
+- Keep the guide stack-agnostic and runtime-neutral.
+- Cross-link skill loading strategy, orchestra MVP, model routing commands, and new runtime subagent orchestration backlog items.
+- Avoid promising real provider execution until implemented.