npm - valent-pipeline - Versions diffs - 0.2.7 → 0.2.9 - Mend

valent-pipeline 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/package.json +1 -1
package/pipeline/docs/pipeline-state-schema.md +3 -2
package/pipeline/prompts/lead.md +27 -12
package/pipeline/prompts/qa-a.md +2 -1
package/pipeline/steps/fend/handoff.md +2 -0
package/pipeline/steps/fend/write-tests.md +13 -1
package/pipeline/steps/orchestration/adopt-lead-and-create-team.md +9 -1
package/pipeline/steps/orchestration/sprint-groom.md +65 -21
package/pipeline/steps/orchestration/update-backlog-status.md +27 -0
package/pipeline/steps/qa-a/read-inputs.md +2 -1
package/pipeline/steps/qa-a/write-spec.md +4 -3
package/pipeline/steps/qa-b/execute-tests.md +8 -0
package/pipeline/steps/readiness/standalone-review.md +41 -2
package/pipeline/templates/sprint-status.template.yaml +3 -0
package/skills/valent-run-epic/SKILL.md +11 -2
package/skills/valent-run-project/SKILL.md +11 -2
package/skills/valent-run-story/SKILL.md +9 -0
package/src/commands/init.js +47 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "valent-pipeline",
-  "version": "0.2.7",
+  "version": "0.2.9",
   "description": "v3 multi-agent AI pipeline for software development lifecycle",
   "type": "module",
   "bin": {

package/pipeline/docs/pipeline-state-schema.md CHANGED Viewed

@@ -64,7 +64,8 @@ Defines the JSON schema for `pipeline-state.json`, the Lead agent's persistent s
 | Field | Type | Description | Read by | Written by |
 |-------|------|-------------|---------|------------|
 | `id` | string | Story identifier | Lead (for scheduling) | Lead (on user submission) |
-| `status` | enum | Granular phase statuses: `pending`, `requirements-spec`, `ux-spec`, `test-case-development`, `readiness-review`, `groomed`, `sizing`, `sprint-planned`, `development`, `code-review`, `qa-validation`, `final-review`, `shipped`, `blocked`, `blocked-on-user`, `cancelled` | Lead (to select next story) | Lead (on status changes) |
+| `status` | enum | Granular phase statuses: `pending`, `requirements-spec`, `ux-spec`, `ux-spec-in-progress`, `test-case-development`, `test-case-development-in-progress`, `readiness-review`, `readiness-review-in-progress`, `groomed`, `sizing`, `sprint-planned`, `development`, `code-review`, `qa-validation`, `final-review`, `shipped`, `blocked`, `blocked-on-user`, `cancelled` | Lead (to select next story) | Lead (on status changes) |
+| `testing_profiles` | string[] | Active testing profiles (e.g., `[api, ui]`). Determines which agents/steps to activate. | Lead, all Phase 1 agents (read-only) | Lead (during grooming Step 0) |
 | `depends_on` | string[] | Story IDs that must complete before this story can start | Lead (for dependency resolution) | Lead (on user submission) |
 | `blocked_reason` | string | Human-readable reason for blocked status; empty or absent when not blocked | Lead (for user reporting) | Lead (when blocking occurs) |
@@ -114,7 +115,7 @@ If the Lead restarts and finds `current_sprint.phase` is not `completed`:
 1. Read `current_sprint.phase` to determine the sprint sub-phase.
 2. Resume from the appropriate orchestration step file:
-   - `grooming` → resume `sprint-groom.md` from last un-groomed story
+   - `grooming` → resume `sprint-groom.md`; read backlog to reconstruct in-flight pipeline state from per-story statuses (`requirements-spec`, `ux-spec`, `test-case-development`, `readiness-review`). Respawn Phase 1 agents. Each agent resumes from its stage.
    - `sizing` → resume `sprint-size.md` from last un-sized story
    - `planning` → re-run `sprint-plan.md` (idempotent)
    - `executing` → resume `sprint-execute.md` from current story

package/pipeline/prompts/lead.md CHANGED Viewed

@@ -170,6 +170,7 @@ Each backlog entry has:
 - `type` — `story` or `bug`
 - `status` — one of the granular phase statuses (see Story Status Tracking below)
 - `priority` — integer, lower = higher priority
+- `testing_profiles` — list of active testing profiles (e.g., `[api, ui]`). Tagged by Lead during grooming Step 0 by analyzing ACs. Determines which agents/steps to activate (e.g., skip UXA if `ui` not in profiles). Read by all Phase 1 agents.
 - `depends_on` — list of item IDs that must be `shipped` before this item starts
 - `blocked_by_bugs` — list of bug IDs that must be resolved before this story starts (stories only)
 - `conditional_bugs` — list of bug IDs filed as conditional on ship (shipped stories only)
@@ -186,9 +187,12 @@ Update the story's `status` in `{backlog_path}` at each agent transition. These
 |--------|-------|-------|---------|
 | `pending` | Pending | — | Not yet started, available for grooming |
 | `requirements-spec` | Requirements Spec | REQS | REQS is writing the requirements brief |
-| `ux-spec` | UX Spec | UXA | UXA is writing the UX specification |
-| `test-case-development` | Test Case Development | QA-A | QA-A is writing the test plan |
-| `readiness-review` | Readiness Review | READINESS | READINESS is evaluating spec quality |
+| `ux-spec` | UX Spec | — | REQS complete, awaiting UXA pickup (UI stories only) |
+| `ux-spec-in-progress` | UX Spec (In Progress) | UXA | UXA is writing the UX specification |
+| `test-case-development` | Test Case Development | — | Upstream complete, awaiting QA-A pickup |
+| `test-case-development-in-progress` | Test Case Development (In Progress) | QA-A | QA-A is writing the test plan |
+| `readiness-review` | Readiness Review | — | QA-A complete, awaiting READINESS pickup |
+| `readiness-review-in-progress` | Readiness Review (In Progress) | READINESS | READINESS is evaluating spec quality |
 | `groomed` | Groomed | — | Passed READINESS, ready for sizing/execution |
 | `sizing` | Sizing | BEND/FEND | Dev agents are estimating story points (sprint mode only) |
 | `sprint-planned` | Sprint Planned | — | Packed into a sprint, awaiting execution (sprint mode only) |
@@ -203,17 +207,26 @@ Update the story's `status` in `{backlog_path}` at each agent transition. These
 **Status transitions:**
 ```
-Grooming:    pending → requirements-spec → ux-spec → test-case-development → readiness-review → groomed
-Planning:    groomed → sizing → sprint-planned
-Execution:   sprint-planned → development → code-review → qa-validation → final-review → shipped
+Grooming (UI story):      pending → requirements-spec → ux-spec → ux-spec-in-progress → test-case-development → test-case-development-in-progress → readiness-review → readiness-review-in-progress → groomed
+Grooming (non-UI story):  pending → requirements-spec → test-case-development → test-case-development-in-progress → readiness-review → readiness-review-in-progress → groomed
+Planning:                 groomed → sizing → sprint-planned
+Execution:                sprint-planned → development → code-review → qa-validation → final-review → shipped
 ```
 In standalone story mode (no sprint), the flow skips `sizing` and `sprint-planned`:
 ```
-pending → requirements-spec → ux-spec → test-case-development → readiness-review → groomed → development → code-review → qa-validation → final-review → shipped
+pending → requirements-spec → ... → groomed → development → code-review → qa-validation → final-review → shipped
 ```
-**When to update:** Update status when spawning each agent (not on handoff). This ensures the status reflects current activity. On rework (READINESS rejection, JUDGE rejection), revert to the responsible phase's status.
+**When to update status (grooming):** Lead updates status in two steps per agent:
+1. **On agent pickup:** Set `{phase}-in-progress` (e.g., `ux-spec-in-progress`) — prevents double-pickup in sprint pipeline mode.
+2. **On agent `[HANDOFF]`:** Advance to the next phase's intake status (e.g., `test-case-development`).
+Lead is the sole writer to `{backlog_path}`. Agents send `[HANDOFF]` to Lead, who updates the status. Agents read status to determine what to work on (read-only scan) but never write to the backlog directly.
+**On rework (READINESS rejection, JUDGE rejection):** Revert to the responsible agent's intake status so the agent re-picks it up.
+**Backend-only stories** (no `ui` in `testing_profiles`): skip `ux-spec` and `ux-spec-in-progress` entirely. REQS handoff advances directly to `test-case-development`.
 ---
@@ -389,9 +402,11 @@ From the manifest `reads_from` / `writes_to`, build the execution order for this
 ### Step 5: Spawn Teammates
-For each agent in the roster, spawn a teammate with:
+**Do NOT read agent prompt files or step files yourself.** Use the spawn template — substitute variables and pass it to the Agent tool. Each teammate reads its own prompt and steps after spawning. This keeps your context lean.
+For each agent in the roster, spawn a teammate with the filled spawn template containing:
 - Role assignment from manifest
-- Prompt template from manifest `prompt_template`
+- Prompt path from manifest `prompt_template` (the teammate reads it, not you)
 - Shared context references: story_id, story_output_dir, tech stack values, correction directives
 - Task assignment with dependency information
@@ -648,7 +663,7 @@ In sprint mode, the standard story-by-story loop (Steps 6-7 above) is replaced b
 Read each orchestration step file in sequence:
 1. `.valent-pipeline/steps/orchestration/sprint-init.md` — compute velocity, resolve candidates, set sprint state
-2. `.valent-pipeline/steps/orchestration/sprint-groom.md` — spawn Phase 1 agents, groom stories sequentially, READINESS gate with rework loop, index to SQLite
+2. `.valent-pipeline/steps/orchestration/sprint-groom.md` — spawn Phase 1 agents, pipeline stories through REQS → UXA → QA-A → READINESS (assembly-line parallelism), rework loop, index to SQLite
 3. `.valent-pipeline/steps/orchestration/sprint-size.md` — spawn BEND/FEND with estimation step files, assign Fibonacci points, kill estimation agents
 4. `.valent-pipeline/steps/orchestration/sprint-plan.md` — greedy packing by priority, write sprint plan + status YAML, validate, kill Phase 1 agents
 5. `.valent-pipeline/steps/orchestration/sprint-execute.md` — execute stories sequentially with budget enforcement, Phase 2 agents per story, update status YAML in real-time
@@ -664,7 +679,7 @@ Read each orchestration step file in sequence:
 ### Sprint Crash Recovery
 On crash recovery, read `pipeline-state.json` `current_sprint` to determine where to resume:
-- `phase: "grooming"` — resume sprint-groom.md from the last un-groomed story
+- `phase: "grooming"` — resume sprint-groom.md; reconstruct pipeline state from per-story backlog statuses (each story's status indicates its current pipeline stage)
 - `phase: "sizing"` — resume sprint-size.md from the last un-sized story
 - `phase: "planning"` — re-run sprint-plan.md (idempotent)
 - `phase: "executing"` — resume sprint-execute.md from the current story

package/pipeline/prompts/qa-a.md CHANGED Viewed

@@ -82,6 +82,7 @@ Before finalizing, verify:
 ## Error Handling
 - If `reqs-brief.md` is missing: set blocker, message lead with `[BLOCKER]`, STOP.
-- If `uxa-spec.md` is missing for a fullstack/frontend project: proceed without visual validation checkpoints, note in output. Do NOT block.
+- If `uxa-spec.md` is missing AND `ui` is in `{testing_profiles}`: set blocker, message Lead with `[BLOCKER] uxa-spec.md missing for UI story {story_id}. Cannot proceed without UXA spec.` **STOP.** UXA must complete before QA-A can write visual validation checkpoints for UI stories.
+- If `uxa-spec.md` is missing AND `ui` is NOT in `{testing_profiles}`: proceed without visual validation checkpoints, note "N/A — no UI profile" in output. Do NOT block.
 - If an AC is ambiguous: write test case for most likely interpretation, flag with `[AMBIGUOUS]`, note assumption.
 - If crash recovery detects partial output: resume from last completed step per frontmatter.

package/pipeline/steps/fend/handoff.md CHANGED Viewed

@@ -7,3 +7,5 @@ Complete all sections of the handoff document using the template at `.valent-pip
 ## Independent Verification Requirement
 You must independently verify: all tests pass against the combined, integrated codebase before marking your task complete. Do not rely on BEND or CRITIC to catch your failures.
+**Smoke test gate:** The app-level smoke test (Step 9b) must pass before sending `[DONE]`. If the smoke test fails, the app's entry point is not wired to your deliverable — fix the wiring before marking complete.

package/pipeline/steps/fend/write-tests.md CHANGED Viewed

@@ -6,7 +6,19 @@ Satisfy qa-test-spec for each AC. Every test case named in qa-test-spec must hav
 ## Step 9: Run tests, verify all pass
 Run the full frontend test suite. All tests must pass. Record results in `fend-handoff.md#test-results-summary`. If tests fail, fix the code -- do not skip or weaken tests.
+## Step 9b: App-Level Smoke Test
+Write one test that bootstraps the application from its **entry point** (e.g., `main.tsx`, `App.tsx`, or the root route — NOT a direct component import) and asserts the story's deliverable is present and reachable. This test:
+- Runs under `{tech_stack.test_framework_unit}` (e.g., Vitest + jsdom/happy-dom) — no browser, no MCP needed.
+- Imports from the app's entry point or root component, renders it, and verifies the story's primary UI deliverable is in the rendered output (e.g., a new page route resolves, a new component appears in the layout).
+- Catches the "unwired entry point" class of bugs — where a component exists but is never mounted in the app because the route, import, or registration was missed.
+This test is **mandatory** for the first UI story in a project (before any E2E regression suite exists) and **recommended** for all subsequent UI stories.
+Record in `fend-handoff.md#test-files-written`.
 ## Step 10: Signal integration readiness
-When your code is complete and all unit tests pass, send to BEND via inbox:
+When your code is complete, all unit tests pass, and the smoke test passes, send to BEND via inbox:
 `[INTEGRATION-READY] Frontend code complete. Run integration tests against my UI.`
 Wait for BEND's `[INTEGRATION-READY]` message before running integration verification. Once both sides are ready, verify that your tests run against BEND's running server. API calls resolve correctly. Error handling works end-to-end. Resolve integration issues before marking complete.

package/pipeline/steps/orchestration/adopt-lead-and-create-team.md CHANGED Viewed

@@ -75,7 +75,15 @@ This file is appended to incrementally during monitoring as agents complete phas
 **IMPORTANT: Use the Agent tool to spawn teammates onto the team — NOT subagents.** Every pipeline agent must be a named teammate so it can send and receive inbox messages with other teammates. Use the `name` parameter on the Agent tool to set the teammate's addressable name (e.g., `name: "REQS"`).
-Read the spawn templates:
+**DO NOT read the agent's prompt files, step files, or templates yourself.** The spawn template tells the teammate to read its own prompt and steps. Your job is ONLY to:
+1. Read the spawn template (a short 15-line template)
+2. Substitute `{{variables}}` with resolved values from config and the task graph
+3. Pass the filled template as the Agent tool's `prompt` parameter
+4. The teammate reads its own prompt, step files, and inputs after it starts
+This keeps the main thread lightweight. If you read every agent's prompt and steps before spawning, you waste context window on content that only the teammate needs.
+Read the spawn templates (these are short variable templates, NOT the agent prompts):
 - `.valent-pipeline/spawn-templates/knowledge-spawn.template.md` -- for the Knowledge Agent
 - `.valent-pipeline/spawn-templates/agent-spawn.template.md` -- for all other agents

package/pipeline/steps/orchestration/sprint-groom.md CHANGED Viewed

@@ -2,46 +2,90 @@
 **Condition:** Only execute in sprint mode (`{is_sprint_mode}` is true).
-Groom stories sequentially through Phase 1 agents. Phase 1 agents stay alive across the grooming batch for context continuity.
+Groom stories through Phase 1 agents using a **pipelined model** — agents process stories concurrently at different stages. Phase 1 agents stay alive across the grooming batch for context continuity.
+## Step 0: Pre-Grooming Profile Tagging
+Before spawning any grooming agents, Lead tags `testing_profiles` on each pending story in `{backlog_path}`.
+For each pending story in the grooming batch:
+1. Read the story's ACs and scope description
+2. Determine which testing profiles apply using the same logic as `.valent-pipeline/steps/orchestration/validate-story-inputs.md` Step 1b:
+   - `api` — story has API endpoints, backend logic, or database changes
+   - `ui` — story has UI components, pages, or visual elements
+   - `data-pipeline` — story has ETL, data transformation, or batch processing
+3. Write `testing_profiles: [api, ui]` (or whichever apply) to the story's backlog entry
+This must complete before Step 1. Downstream agents rely on `testing_profiles` to determine conditional steps.
 ## Step 1: Spawn Phase 1 Agents
-Spawn: REQS, UXA (if fullstack/frontend project), QA-A, READINESS, Knowledge.
+Spawn: REQS, UXA (if any story in batch has `ui` in `testing_profiles`), QA-A, READINESS, Knowledge.
 Pass `{is_sprint_mode}: true` to READINESS so it executes cross-story checks.
-## Step 2: Process Stories Sequentially
+## Step 2: Pipeline Stories Through Phase 1
-For each story in grooming candidates (up to `{groom_target}` from sprint-init):
+Process stories using assembly-line parallelism. Each agent moves to the next available story as soon as it finishes its current one — no agent idles while downstream work continues.
+**Per-story stage progression:**
 1. Update story status in `{backlog_path}` to `requirements-spec`
-2. Send story context to REQS — REQS writes `reqs-brief.md`
-3. On REQS handoff → update status to `ux-spec` → UXA writes `uxa-spec.md` (skip if backend-only)
-4. On UXA handoff → update status to `test-case-development` → QA-A writes `qa-test-spec.md`
-5. On QA-A handoff → index all artifacts to SQLite **working table**:
+2. REQS writes `reqs-brief.md` → sends `[HANDOFF]` to Lead
+3. Lead advances status to `ux-spec` (if `ui` in `testing_profiles`) or `test-case-development` (if not)
+4. UXA writes `uxa-spec.md` (only for stories with `ui` in `testing_profiles`) → sends `[HANDOFF]` to Lead
+5. Lead advances status to `test-case-development`
+6. QA-A writes `qa-test-spec.md` → sends `[HANDOFF]` to Lead
+7. Lead advances status to `readiness-review`
+8. Index artifacts to SQLite **working table**:
    ```bash
    node .valent-pipeline/bin/cli.js db index-working \
      --story-id {story_id} \
      --sprint-id {current_sprint_id}
    ```
-6. Update status to `readiness-review` → READINESS reviews specs + cross-story checks
+9. READINESS reviews specs + cross-story checks
+**Status-based self-selection with type filtering:**
+Each agent scans `{backlog_path}` (read-only) for its intake status to determine what to work on next. Lead updates statuses on agent pickup and handoff:
+| Agent | Intake Status | Type Filter | In-Progress Status | Handoff Status |
+|-------|--------------|-------------|-------------------|----------------|
+| REQS | `pending` | all | `requirements-spec` | `ux-spec` if `ui` in profiles, else `test-case-development` |
+| UXA | `ux-spec` | `ui` in `testing_profiles` | `ux-spec-in-progress` | `test-case-development` |
+| QA-A | `test-case-development` | all | `test-case-development-in-progress` | `readiness-review` |
+| READINESS | `readiness-review` | all | `readiness-review-in-progress` | `groomed` |
-**On READINESS approval:**
-- Update status to `groomed`
-- Move to next story
+**Protocol:** Agent finishes current story → scans backlog for next story at its intake status (+ type filter) → picks highest priority → sends pickup signal to Lead → Lead sets in-progress status → agent works → sends `[HANDOFF]` to Lead → Lead advances to handoff status.
+**Backlog write ownership:** Lead remains the sole writer to `{backlog_path}`. Agents do not update statuses directly. An agent's `[HANDOFF]` message to Lead triggers Lead to update the story's status.
+**Non-UI stories** skip `ux-spec` entirely. REQS handoff advances directly to `test-case-development`. UXA never sees them.
+## Step 3: Rework Handling
 **On READINESS rejection:**
-- Route to responsible agent (REQS, UXA, or QA-A) per rejection routing table
-- Agent revises, downstream re-processes
-- Re-index to working table (overwrites previous)
-- READINESS re-reviews
-- Cap at `{max_rejection_cycles}`. Stories exceeding cap: mark `blocked-on-user`, remove from grooming batch
-## Step 3: Context Pressure Management
+- Route to responsible agent (REQS, UXA, or QA-A) per rejection routing table.
+- The rework task enters that agent's queue at **highest priority** — it is processed before any new stories.
+- Agent revises, downstream agents re-process the reworked story (also at highest priority).
+- Re-index to working table (overwrites previous).
+- READINESS re-reviews.
+- Cap at `{max_rejection_cycles}`. Stories exceeding cap: mark `blocked-on-user`, remove from pipeline.
+**Rework priority rule:** An agent that receives a rework task finishes its current in-progress story first, then processes the rework before picking up any new story. This prevents context-switching mid-story while ensuring rework is not starved.
+## Step 4: Pipeline Completion
+All stories are groomed when every story in the batch has reached `groomed` or `blocked-on-user` status. Agents that finish their queue idle until all stories complete the full pipeline.
+## Step 5: Context Pressure Management
+After every `{sprint_max_groom_batch}` stories **entering REQS** (default: 10), kill and respawn Phase 1 agents to manage context window pressure. Knowledge agent is NOT killed (persists per epic/project).
-After every `{sprint_max_groom_batch}` stories (default: 10), kill and respawn Phase 1 agents to manage context window pressure. Knowledge agent is NOT killed (persists per epic/project).
+When respawning mid-pipeline, allow in-flight stories to complete their current agent stage before killing that agent. Resume the pipeline with fresh agents.
-## Step 4: Flush Working Table
+## Step 6: Flush Working Table
 After all stories groomed:
@@ -52,7 +96,7 @@ node .valent-pipeline/bin/cli.js db flush-working \
 This copies final post-READINESS specs from `artifacts_working` → `artifacts` (main table), then clears the working table.
-## Step 5: Update Sprint State
+## Step 7: Update Sprint State
 Update `pipeline-state.json`: `current_sprint.phase = "sizing"`.

package/pipeline/steps/orchestration/update-backlog-status.md CHANGED Viewed

@@ -26,3 +26,30 @@ Update the backlog item for `{story_id}` in `{backlog_path}`.
 1. Set the current item's `status` to `blocked-on-user`
 2. Note the reason in the item
+## On Grooming Phase Progression
+During sprint grooming (sprint-groom.md), Lead updates story statuses as agents progress through the pipeline:
+**On agent pickup:** Set the in-progress status to prevent double-pickup in assembly-line mode.
+| Event | New Status |
+|-------|-----------|
+| UXA picks up story | `ux-spec-in-progress` |
+| QA-A picks up story | `test-case-development-in-progress` |
+| READINESS picks up story | `readiness-review-in-progress` |
+**On agent `[HANDOFF]`:** Advance to the next phase's intake status.
+| Event | New Status |
+|-------|-----------|
+| REQS completes (`ui` in profiles) | `ux-spec` |
+| REQS completes (`ui` NOT in profiles) | `test-case-development` |
+| UXA completes | `test-case-development` |
+| QA-A completes | `readiness-review` |
+| READINESS approves | `groomed` |
+**On READINESS rejection:** Revert to the responsible agent's intake status so the agent re-picks it up:
+- Reject to REQS → `pending` (REQS re-processes)
+- Reject to UXA → `ux-spec` (UXA re-processes)
+- Reject to QA-A → `test-case-development` (QA-A re-processes)

package/pipeline/steps/qa-a/read-inputs.md CHANGED Viewed

@@ -28,5 +28,6 @@ Risk factors:
 ## Error Handling
 - `reqs-brief.md` missing: blocker, `[BLOCKER]` to lead, STOP.
-- `uxa-spec.md` missing (fullstack/frontend): proceed without visual checkpoints, note in output. Do NOT block.
+- `uxa-spec.md` missing + `ui` in `{testing_profiles}`: `[BLOCKER]` to Lead, STOP. UXA must complete first.
+- `uxa-spec.md` missing + `ui` NOT in `{testing_profiles}`: proceed without visual checkpoints, note "N/A — no UI profile". Do NOT block.
 - Ambiguous AC: write test for most likely interpretation, flag `[AMBIGUOUS]`, note assumption.

package/pipeline/steps/qa-a/write-spec.md CHANGED Viewed

@@ -65,11 +65,12 @@ Per error test case: error code (HTTP status or app code), error message pattern
 For each NFR-sensitive path: `[NFR-PERF]` response time + load patterns; `[NFR-SEC]` auth boundaries + input validation (SQLi, XSS); `[NFR-REL]` partial failure + data consistency + retry. Skip if no NFR targets; note "No NFR-sensitive paths identified."
-## Step 9: Visual Validation Checkpoints (Conditional)
+## Step 9: Visual Validation Checkpoints (Conditional — Keyed on Profile)
-If `uxa-spec.md` available, for each page state define: Checkpoint ID (VV-{NNN}), Page/Route, State (Default/Loading/Empty/Error/Success or custom), AC Reference, Area labels in scope, Screenshot filename (`{story_id}_VV-{NNN}_{page}_{state}.png`), Expected visual elements, Setup instructions, Pass criteria.
+**Trigger:** `ui` is in `{testing_profiles}` (NOT file existence).
-Write to `{story_output_dir}/visual-validation-checklist.md`. If no UXA spec: skip, note "N/A -- no UI components."
+- If `ui` in `{testing_profiles}` → **MANDATORY.** Read `uxa-spec.md`. If `uxa-spec.md` is missing, send `[BLOCKER]` to Lead — do NOT proceed without it. For each page state define: Checkpoint ID (VV-{NNN}), Page/Route, State (Default/Loading/Empty/Error/Success or custom), AC Reference, Area labels in scope, Screenshot filename (`{story_id}_VV-{NNN}_{page}_{state}.png`), Expected visual elements, Setup instructions, Pass criteria. Write to `{story_output_dir}/visual-validation-checklist.md`.
+- If `ui` NOT in `{testing_profiles}` → skip, note "N/A — no UI profile."
 ## Step 10: Write Final Outputs

package/pipeline/steps/qa-b/execute-tests.md CHANGED Viewed

@@ -34,6 +34,14 @@ Execute complete test suite against real infrastructure:
 Record per test: pass/fail/skip, execution time, error output (failures). Record exact commands for reproducibility.
+### Step 4a: UI Regression Suite (Every Story)
+Once E2E tests exist in the project (from any prior shipped UI story), run the **full** `{tech_stack.test_framework_e2e}` suite on **every** story — including `api`-only stories. This catches regressions where backend changes break existing UI flows.
+- **Zero mocks, zero interception** — real browser, real API, real DB. Standard automated test execution, not PMCP.
+- If a previously-passing E2E test now fails: file as minimum **P2** bug against the current story.
+- **Skip only if** no E2E tests exist yet in the project (pre-first-UI-story). In that case, note "No E2E regression suite exists yet — skipped."
 ## Step 4b: Load and Execute Testing Profile Steps
 Read testing profile step file(s) from `.valent-pipeline/steps/qa-b/` based on `{testing_profiles}`:

package/pipeline/steps/readiness/standalone-review.md CHANGED Viewed

@@ -1,6 +1,32 @@
 # Standalone Review
-**STRICT order:** REQS -> UXA (if `fullstack-web`/`frontend-only`) -> QA-A. **Stop on first failure.** Update `stepsCompleted`/`pendingSteps` after each.
+**STRICT order:** Step 0 (profile + artifacts) -> REQS -> UXA (if `ui` in `testing_profiles`) -> QA-A. **Stop on first failure.** Update `stepsCompleted`/`pendingSteps` after each.
+## Step 0: Validate Profile and Derive Artifact Matrix
+### 0a — Validate Testing Profile
+Read the story's ACs and scope. Independently assess which `testing_profiles` should apply:
+- `api` — story has API endpoints, backend logic, or database changes
+- `ui` — story has UI components, pages, or visual elements
+- `data-pipeline` — story has ETL, data transformation, or batch processing
+Compare your assessment against the `testing_profiles` tagged on the backlog entry:
+- **Missing profile that should be present** → reject to Lead: `[READINESS-REJECTION] Story {story_id}: testing_profiles missing '{profile}'. Re-tag and re-groom.` **STOP.**
+- **Over-tagging** (profile present but not needed) → advisory only, note in review but do not reject.
+### 0b — Derive Artifact Matrix from Validated Profile
+| Artifact | Required When | Responsible Agent |
+|----------|--------------|-------------------|
+| `reqs-brief.md` | Always | REQS |
+| `uxa-spec.md` | `ui` in `testing_profiles` | UXA |
+| `qa-test-spec.md` | Always | QA-A |
+| `visual-validation-checklist.md` | `ui` in `testing_profiles` | QA-A |
+Check each required artifact exists in `{story_output_dir}`. Missing required artifact → reject to responsible agent: `[READINESS-REJECTION] Story {story_id}: missing {artifact}. See readiness-review.md#missing-artifacts.` Send to responsible agent AND Lead. **STOP.**
+All subsequent checks are gated by this matrix — only validate artifacts that are required per profile.
 ## Step 1: Read REQS Brief
@@ -22,7 +48,7 @@ Read `{story_output_dir}/reqs-brief.md`. Record in `inputsRead`.
 **If ANY fails:** Reject to `readiness-review.md#reqs-rejection-reasons`. Send `[READINESS-REJECTION]` to **REQS** AND to Lead: `[READINESS-REJECTION] Story {story_id}: ACs need rework. See readiness-review.md#reqs-rejection-reasons.` **STOP**.
-## Steps 3-4: Read + Validate UXA (SKIP if backend-only)
+## Steps 3-4: Read + Validate UXA (SKIP if `ui` NOT in `testing_profiles`)
 Read `{story_output_dir}/uxa-spec.md`. Record in `inputsRead`. Only validate if REQS passed.
@@ -61,6 +87,19 @@ Only if REQS passed and UXA passed (or skipped).
 **If ANY fails:** Reject to `readiness-review.md#qa-spec-rejection-reasons`. Send `[READINESS-REJECTION]` to **QA-A** AND to Lead: `[READINESS-REJECTION] Story {story_id}: Traceability gaps. See readiness-review.md#qa-spec-rejection-reasons.` **STOP**.
+### Step 6b: Validate Visual Validation Checklist (if `ui` in `testing_profiles`)
+Only if QA-A spec passed and `ui` in `testing_profiles`:
+| Check | Criteria |
+|-------|----------|
+| Checklist exists | `visual-validation-checklist.md` present in `{story_output_dir}` |
+| Checkpoint coverage | Every page/route in `uxa-spec.md` has at least one checkpoint |
+| 5-state coverage | Each page has checkpoints for all 5 states: Default, Loading, Empty, Error, Success |
+| AC traceability | Every visual checkpoint references an AC from `reqs-brief.md` |
+**If ANY fails:** Reject to `readiness-review.md#visual-checklist-rejection-reasons`. Send `[READINESS-REJECTION]` to **QA-A** AND to Lead. **STOP**.
 ## Step 7: Red Team Analysis
 Only if ALL specs passed. Probe each test case as a "lazy dev who wants green tests with minimal effort":

package/pipeline/templates/sprint-status.template.yaml CHANGED Viewed

@@ -14,8 +14,11 @@ status_labels:
   cancelled: Cancelled
   requirements-spec: Requirements Spec
   ux-spec: UX Spec
+  ux-spec-in-progress: UX Spec (In Progress)
   test-case-development: Test Case Development
+  test-case-development-in-progress: Test Case Development (In Progress)
   readiness-review: Readiness Review
+  readiness-review-in-progress: Readiness Review (In Progress)
   groomed: Groomed
   sizing: Sizing
   sprint-planned: Sprint Planned

package/skills/valent-run-epic/SKILL.md CHANGED Viewed

@@ -6,7 +6,7 @@ argument-hint: '<epic-id>'
 # valent-run-epic
-Run all stories tagged with an epic through the v3 multiagent pipeline using sprint-based planning and execution. Stories are groomed, sized with Fibonacci points, packed into sprints by velocity, and executed sequentially. Each sprint ends with a retrospective that calibrates future estimates.
+Run all stories tagged with an epic through the v3 multiagent pipeline using sprint-based planning and execution. Stories are groomed (pipelined through Phase 1 agents), sized with Fibonacci points, packed into sprints by velocity, and executed sequentially. Each sprint ends with a retrospective that calibrates future estimates.
 ## Arguments
@@ -20,6 +20,15 @@ Use the standard 200k context window. Per `pipeline-config.yaml` `orchestration.
 ## Execution Steps
+### Step 0: Pre-Flight MCP Check
+After loading `pipeline-config.yaml`, check if visual validation infrastructure is required:
+1. If `project.type` is `fullstack-web` or `frontend-only`, verify `{tech_stack.browser_automation_mcp}` is accessible (e.g., `npx @anthropic-ai/playwright-mcp --version` or check `.claude/settings.json` for the MCP registration).
+2. If not accessible → **STOP** with: `Browser automation MCP ({tech_stack.browser_automation_mcp}) is not installed. Visual validation cannot run. Install it with: npx valent-pipeline init --force (or npm install -g @anthropic-ai/{tech_stack.browser_automation_mcp}). Then re-run this epic.`
+This is an infrastructure prerequisite. Don't waste tokens spawning Lead into a pipeline that can't run visual validation for UI projects.
 ### Step 1: Load Pipeline Config
 Read and follow `.valent-pipeline/steps/orchestration/load-pipeline-config.md`.
@@ -82,7 +91,7 @@ Set `{epic_filter}` = `{epic_id}` to scope story resolution to this epic.
 Read and follow `.valent-pipeline/steps/orchestration/sprint-groom.md`.
-Phase 1 agents (REQS, UXA, QA-A, READINESS) stay alive across the grooming batch. Stories are processed sequentially through the grooming pipeline with READINESS performing cross-story checks.
+Phase 1 agents (REQS, UXA, QA-A, READINESS) stay alive across the grooming batch. Stories are pipelined through agents using assembly-line parallelism (see sprint-groom.md) with READINESS performing cross-story checks.
 #### 4e. Sprint Sizing

package/skills/valent-run-project/SKILL.md CHANGED Viewed

@@ -5,7 +5,7 @@ description: 'Run all stories across all epics with sprint planning, ordered by
 # valent-run-project
-Run every pending story in the backlog, across all epics, using sprint-based planning and execution. Stories are groomed, sized with Fibonacci points, packed into sprints by velocity, and executed sequentially. Cross-epic dependency resolution ensures stories from any epic can run when their dependencies are met. Each sprint ends with a retrospective that calibrates future estimates.
+Run every pending story in the backlog, across all epics, using sprint-based planning and execution. Stories are groomed (pipelined through Phase 1 agents), sized with Fibonacci points, packed into sprints by velocity, and executed sequentially. Cross-epic dependency resolution ensures stories from any epic can run when their dependencies are met. Each sprint ends with a retrospective that calibrates future estimates.
 ## Context Window Advisory
@@ -13,6 +13,15 @@ Use the standard 200k context window. Auto-compression fires every 2-3 stories.
 ## Execution Steps
+### Step 0: Pre-Flight MCP Check
+After loading `pipeline-config.yaml`, check if visual validation infrastructure is required:
+1. If `project.type` is `fullstack-web` or `frontend-only`, verify `{tech_stack.browser_automation_mcp}` is accessible (e.g., `npx @anthropic-ai/playwright-mcp --version` or check `.claude/settings.json` for the MCP registration).
+2. If not accessible → **STOP** with: `Browser automation MCP ({tech_stack.browser_automation_mcp}) is not installed. Visual validation cannot run. Install it with: npx valent-pipeline init --force (or npm install -g @anthropic-ai/{tech_stack.browser_automation_mcp}). Then re-run.`
+This is an infrastructure prerequisite. Don't waste tokens spawning Lead into a pipeline that can't run visual validation for UI projects.
 ### Step 1: Load Pipeline Config
 Read and follow `.valent-pipeline/steps/orchestration/load-pipeline-config.md`.
@@ -96,7 +105,7 @@ Do NOT set `{epic_filter}` — sprint planning pulls from the full cross-epic ba
 Read and follow `.valent-pipeline/steps/orchestration/sprint-groom.md`.
-Phase 1 agents (REQS, UXA, QA-A, READINESS) stay alive across the grooming batch. Stories are processed sequentially through the grooming pipeline with READINESS performing cross-story checks.
+Phase 1 agents (REQS, UXA, QA-A, READINESS) stay alive across the grooming batch. Stories are pipelined through agents using assembly-line parallelism (see sprint-groom.md) with READINESS performing cross-story checks.
 #### 4e. Sprint Sizing

package/skills/valent-run-story/SKILL.md CHANGED Viewed

@@ -18,6 +18,15 @@ If no argument is provided, resolve the next work item from the backlog (see Ste
 ## Execution Steps
+### Step 0: Pre-Flight MCP Check
+After loading `pipeline-config.yaml`, check if visual validation infrastructure is required:
+1. If `project.type` is `fullstack-web` or `frontend-only`, verify `{tech_stack.browser_automation_mcp}` is accessible (e.g., `npx @anthropic-ai/playwright-mcp --version` or check `.claude/settings.json` for the MCP registration).
+2. If not accessible → **STOP** with: `Browser automation MCP ({tech_stack.browser_automation_mcp}) is not installed. Visual validation cannot run. Install it with: npx valent-pipeline init --force (or npm install -g @anthropic-ai/{tech_stack.browser_automation_mcp}). Then re-run this story.`
+This is an infrastructure prerequisite — same category as "does `pipeline-config.yaml` exist?" Don't waste tokens spawning Lead into a pipeline that can't run visual validation for UI projects.
 ### Step 1: Load Pipeline Config
 Read and follow `.valent-pipeline/steps/orchestration/load-pipeline-config.md`.

package/src/commands/init.js CHANGED Viewed

@@ -99,12 +99,50 @@ export async function init(options = {}) {
     console.log('  Run "valent-pipeline db init" to create the database.');
   }
+  // 7b. Install browser automation MCP for UI projects
+  const projectType = config.project?.type || 'fullstack-web';
+  const uiProjectTypes = ['fullstack-web', 'frontend-only'];
+  if (uiProjectTypes.includes(projectType)) {
+    const mcpName = config.tech_stack?.browser_automation_mcp || 'playwright-mcp';
+    const installMcp = options.yes || await confirmPrompt(
+      `Install browser automation MCP (${mcpName}) for visual validation?`,
+      true
+    );
+    if (installMcp) {
+      console.log(`  Installing ${mcpName}...`);
+      const { execSync } = await import('child_process');
+      try {
+        execSync(`npm install -g @anthropic-ai/${mcpName}`, { stdio: 'pipe' });
+        console.log(`  Installed ${mcpName}`);
+      } catch (err) {
+        console.warn(`  Warning: Failed to install ${mcpName}. Run "npm install -g @anthropic-ai/${mcpName}" manually.`);
+      }
+      // Register in .claude/settings.json mcpServers
+      const mcpSettingsPath = join(projectRoot, '.claude', 'settings.json');
+      let mcpSettings = {};
+      if (fileExists(mcpSettingsPath)) {
+        try {
+          mcpSettings = JSON.parse(readFileSync(mcpSettingsPath, 'utf-8'));
+        } catch { /* start fresh if parse fails */ }
+      }
+      if (!mcpSettings.mcpServers) mcpSettings.mcpServers = {};
+      if (!mcpSettings.mcpServers[mcpName]) {
+        mcpSettings.mcpServers[mcpName] = {
+          command: 'npx',
+          args: [`@anthropic-ai/${mcpName}`]
+        };
+        writeFileSafe(mcpSettingsPath, JSON.stringify(mcpSettings, null, 2) + '\n');
+        console.log(`  Registered ${mcpName} in .claude/settings.json`);
+      }
+    }
+  }
   // 8. Configure Claude settings for agent teams
   const claudeSettingsPath = join(projectRoot, '.claude', 'settings.json');
   let claudeSettings = {};
   if (fileExists(claudeSettingsPath)) {
     try {
-      claudeSettings = JSON.parse(readFile(claudeSettingsPath));
+      claudeSettings = JSON.parse(readFileSync(claudeSettingsPath, 'utf-8'));
     } catch { /* start fresh if parse fails */ }
   }
   if (!claudeSettings.env) claudeSettings.env = {};
@@ -129,6 +167,14 @@ export async function init(options = {}) {
   console.log('');
 }
+async function confirmPrompt(message, defaultValue = true) {
+  const inquirer = (await import('inquirer')).default;
+  const { answer } = await inquirer.prompt([{
+    type: 'confirm', name: 'answer', message, default: defaultValue,
+  }]);
+  return answer;
+}
 async function runWizard() {
   const inquirer = (await import('inquirer')).default;
   const config = JSON.parse(JSON.stringify(defaults));