valent-pipeline 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "valent-pipeline",
3
- "version": "0.2.8",
3
+ "version": "0.2.9",
4
4
  "description": "v3 multi-agent AI pipeline for software development lifecycle",
5
5
  "type": "module",
6
6
  "bin": {
@@ -64,7 +64,8 @@ Defines the JSON schema for `pipeline-state.json`, the Lead agent's persistent s
64
64
  | Field | Type | Description | Read by | Written by |
65
65
  |-------|------|-------------|---------|------------|
66
66
  | `id` | string | Story identifier | Lead (for scheduling) | Lead (on user submission) |
67
- | `status` | enum | Granular phase statuses: `pending`, `requirements-spec`, `ux-spec`, `test-case-development`, `readiness-review`, `groomed`, `sizing`, `sprint-planned`, `development`, `code-review`, `qa-validation`, `final-review`, `shipped`, `blocked`, `blocked-on-user`, `cancelled` | Lead (to select next story) | Lead (on status changes) |
67
+ | `status` | enum | Granular phase statuses: `pending`, `requirements-spec`, `ux-spec`, `ux-spec-in-progress`, `test-case-development`, `test-case-development-in-progress`, `readiness-review`, `readiness-review-in-progress`, `groomed`, `sizing`, `sprint-planned`, `development`, `code-review`, `qa-validation`, `final-review`, `shipped`, `blocked`, `blocked-on-user`, `cancelled` | Lead (to select next story) | Lead (on status changes) |
68
+ | `testing_profiles` | string[] | Active testing profiles (e.g., `[api, ui]`). Determines which agents/steps to activate. | Lead, all Phase 1 agents (read-only) | Lead (during grooming Step 0) |
68
69
  | `depends_on` | string[] | Story IDs that must complete before this story can start | Lead (for dependency resolution) | Lead (on user submission) |
69
70
  | `blocked_reason` | string | Human-readable reason for blocked status; empty or absent when not blocked | Lead (for user reporting) | Lead (when blocking occurs) |
70
71
 
@@ -114,7 +115,7 @@ If the Lead restarts and finds `current_sprint.phase` is not `completed`:
114
115
 
115
116
  1. Read `current_sprint.phase` to determine the sprint sub-phase.
116
117
  2. Resume from the appropriate orchestration step file:
117
- - `grooming` → resume `sprint-groom.md` from last un-groomed story
118
+ - `grooming` → resume `sprint-groom.md`; read backlog to reconstruct in-flight pipeline state from per-story statuses (`requirements-spec`, `ux-spec`, `test-case-development`, `readiness-review`). Respawn Phase 1 agents. Each agent resumes from its stage.
118
119
  - `sizing` → resume `sprint-size.md` from last un-sized story
119
120
  - `planning` → re-run `sprint-plan.md` (idempotent)
120
121
  - `executing` → resume `sprint-execute.md` from current story
@@ -170,6 +170,7 @@ Each backlog entry has:
170
170
  - `type` — `story` or `bug`
171
171
  - `status` — one of the granular phase statuses (see Story Status Tracking below)
172
172
  - `priority` — integer, lower = higher priority
173
+ - `testing_profiles` — list of active testing profiles (e.g., `[api, ui]`). Tagged by Lead during grooming Step 0 by analyzing ACs. Determines which agents/steps to activate (e.g., skip UXA if `ui` not in profiles). Read by all Phase 1 agents.
173
174
  - `depends_on` — list of item IDs that must be `shipped` before this item starts
174
175
  - `blocked_by_bugs` — list of bug IDs that must be resolved before this story starts (stories only)
175
176
  - `conditional_bugs` — list of bug IDs filed as conditional on ship (shipped stories only)
@@ -186,9 +187,12 @@ Update the story's `status` in `{backlog_path}` at each agent transition. These
186
187
  |--------|-------|-------|---------|
187
188
  | `pending` | Pending | — | Not yet started, available for grooming |
188
189
  | `requirements-spec` | Requirements Spec | REQS | REQS is writing the requirements brief |
189
- | `ux-spec` | UX Spec | UXA | UXA is writing the UX specification |
190
- | `test-case-development` | Test Case Development | QA-A | QA-A is writing the test plan |
191
- | `readiness-review` | Readiness Review | READINESS | READINESS is evaluating spec quality |
190
+ | `ux-spec` | UX Spec | | REQS complete, awaiting UXA pickup (UI stories only) |
191
+ | `ux-spec-in-progress` | UX Spec (In Progress) | UXA | UXA is writing the UX specification |
192
+ | `test-case-development` | Test Case Development | | Upstream complete, awaiting QA-A pickup |
193
+ | `test-case-development-in-progress` | Test Case Development (In Progress) | QA-A | QA-A is writing the test plan |
194
+ | `readiness-review` | Readiness Review | — | QA-A complete, awaiting READINESS pickup |
195
+ | `readiness-review-in-progress` | Readiness Review (In Progress) | READINESS | READINESS is evaluating spec quality |
192
196
  | `groomed` | Groomed | — | Passed READINESS, ready for sizing/execution |
193
197
  | `sizing` | Sizing | BEND/FEND | Dev agents are estimating story points (sprint mode only) |
194
198
  | `sprint-planned` | Sprint Planned | — | Packed into a sprint, awaiting execution (sprint mode only) |
@@ -203,17 +207,26 @@ Update the story's `status` in `{backlog_path}` at each agent transition. These
203
207
 
204
208
  **Status transitions:**
205
209
  ```
206
- Grooming: pending → requirements-spec → ux-spec → test-case-development → readiness-review → groomed
207
- Planning: groomedsizingsprint-planned
208
- Execution: sprint-planneddevelopmentcode-review → qa-validation → final-review → shipped
210
+ Grooming (UI story): pending → requirements-spec → ux-spec → ux-spec-in-progress → test-case-development → test-case-development-in-progress → readiness-review → readiness-review-in-progress → groomed
211
+ Grooming (non-UI story): pendingrequirements-spectest-case-development → test-case-development-in-progress → readiness-review → readiness-review-in-progress → groomed
212
+ Planning: groomedsizingsprint-planned
213
+ Execution: sprint-planned → development → code-review → qa-validation → final-review → shipped
209
214
  ```
210
215
 
211
216
  In standalone story mode (no sprint), the flow skips `sizing` and `sprint-planned`:
212
217
  ```
213
- pending → requirements-spec → ux-spectest-case-development → readiness-review → groomed → development → code-review → qa-validation → final-review → shipped
218
+ pending → requirements-spec → ... → groomed → development → code-review → qa-validation → final-review → shipped
214
219
  ```
215
220
 
216
- **When to update:** Update status when spawning each agent (not on handoff). This ensures the status reflects current activity. On rework (READINESS rejection, JUDGE rejection), revert to the responsible phase's status.
221
+ **When to update status (grooming):** Lead updates status in two steps per agent:
222
+ 1. **On agent pickup:** Set `{phase}-in-progress` (e.g., `ux-spec-in-progress`) — prevents double-pickup in sprint pipeline mode.
223
+ 2. **On agent `[HANDOFF]`:** Advance to the next phase's intake status (e.g., `test-case-development`).
224
+
225
+ Lead is the sole writer to `{backlog_path}`. Agents send `[HANDOFF]` to Lead, who updates the status. Agents read status to determine what to work on (read-only scan) but never write to the backlog directly.
226
+
227
+ **On rework (READINESS rejection, JUDGE rejection):** Revert to the responsible agent's intake status so the agent re-picks it up.
228
+
229
+ **Backend-only stories** (no `ui` in `testing_profiles`): skip `ux-spec` and `ux-spec-in-progress` entirely. REQS handoff advances directly to `test-case-development`.
217
230
 
218
231
  ---
219
232
 
@@ -650,7 +663,7 @@ In sprint mode, the standard story-by-story loop (Steps 6-7 above) is replaced b
650
663
  Read each orchestration step file in sequence:
651
664
 
652
665
  1. `.valent-pipeline/steps/orchestration/sprint-init.md` — compute velocity, resolve candidates, set sprint state
653
- 2. `.valent-pipeline/steps/orchestration/sprint-groom.md` — spawn Phase 1 agents, groom stories sequentially, READINESS gate with rework loop, index to SQLite
666
+ 2. `.valent-pipeline/steps/orchestration/sprint-groom.md` — spawn Phase 1 agents, pipeline stories through REQS → UXA → QA-A → READINESS (assembly-line parallelism), rework loop, index to SQLite
654
667
  3. `.valent-pipeline/steps/orchestration/sprint-size.md` — spawn BEND/FEND with estimation step files, assign Fibonacci points, kill estimation agents
655
668
  4. `.valent-pipeline/steps/orchestration/sprint-plan.md` — greedy packing by priority, write sprint plan + status YAML, validate, kill Phase 1 agents
656
669
  5. `.valent-pipeline/steps/orchestration/sprint-execute.md` — execute stories sequentially with budget enforcement, Phase 2 agents per story, update status YAML in real-time
@@ -666,7 +679,7 @@ Read each orchestration step file in sequence:
666
679
  ### Sprint Crash Recovery
667
680
 
668
681
  On crash recovery, read `pipeline-state.json` `current_sprint` to determine where to resume:
669
- - `phase: "grooming"` — resume sprint-groom.md from the last un-groomed story
682
+ - `phase: "grooming"` — resume sprint-groom.md; reconstruct pipeline state from per-story backlog statuses (each story's status indicates its current pipeline stage)
670
683
  - `phase: "sizing"` — resume sprint-size.md from the last un-sized story
671
684
  - `phase: "planning"` — re-run sprint-plan.md (idempotent)
672
685
  - `phase: "executing"` — resume sprint-execute.md from the current story
@@ -82,6 +82,7 @@ Before finalizing, verify:
82
82
  ## Error Handling
83
83
 
84
84
  - If `reqs-brief.md` is missing: set blocker, message lead with `[BLOCKER]`, STOP.
85
- - If `uxa-spec.md` is missing for a fullstack/frontend project: proceed without visual validation checkpoints, note in output. Do NOT block.
85
+ - If `uxa-spec.md` is missing AND `ui` is in `{testing_profiles}`: set blocker, message Lead with `[BLOCKER] uxa-spec.md missing for UI story {story_id}. Cannot proceed without UXA spec.` **STOP.** UXA must complete before QA-A can write visual validation checkpoints for UI stories.
86
+ - If `uxa-spec.md` is missing AND `ui` is NOT in `{testing_profiles}`: proceed without visual validation checkpoints, note "N/A — no UI profile" in output. Do NOT block.
86
87
  - If an AC is ambiguous: write test case for most likely interpretation, flag with `[AMBIGUOUS]`, note assumption.
87
88
  - If crash recovery detects partial output: resume from last completed step per frontmatter.
@@ -7,3 +7,5 @@ Complete all sections of the handoff document using the template at `.valent-pip
7
7
 
8
8
  ## Independent Verification Requirement
9
9
  You must independently verify: all tests pass against the combined, integrated codebase before marking your task complete. Do not rely on BEND or CRITIC to catch your failures.
10
+
11
+ **Smoke test gate:** The app-level smoke test (Step 9b) must pass before sending `[DONE]`. If the smoke test fails, the app's entry point is not wired to your deliverable — fix the wiring before marking complete.
@@ -6,7 +6,19 @@ Satisfy qa-test-spec for each AC. Every test case named in qa-test-spec must hav
6
6
  ## Step 9: Run tests, verify all pass
7
7
  Run the full frontend test suite. All tests must pass. Record results in `fend-handoff.md#test-results-summary`. If tests fail, fix the code -- do not skip or weaken tests.
8
8
 
9
+ ## Step 9b: App-Level Smoke Test
10
+
11
+ Write one test that bootstraps the application from its **entry point** (e.g., `main.tsx`, `App.tsx`, or the root route — NOT a direct component import) and asserts the story's deliverable is present and reachable. This test:
12
+
13
+ - Runs under `{tech_stack.test_framework_unit}` (e.g., Vitest + jsdom/happy-dom) — no browser, no MCP needed.
14
+ - Imports from the app's entry point or root component, renders it, and verifies the story's primary UI deliverable is in the rendered output (e.g., a new page route resolves, a new component appears in the layout).
15
+ - Catches the "unwired entry point" class of bugs — where a component exists but is never mounted in the app because the route, import, or registration was missed.
16
+
17
+ This test is **mandatory** for the first UI story in a project (before any E2E regression suite exists) and **recommended** for all subsequent UI stories.
18
+
19
+ Record in `fend-handoff.md#test-files-written`.
20
+
9
21
  ## Step 10: Signal integration readiness
10
- When your code is complete and all unit tests pass, send to BEND via inbox:
22
+ When your code is complete, all unit tests pass, and the smoke test passes, send to BEND via inbox:
11
23
  `[INTEGRATION-READY] Frontend code complete. Run integration tests against my UI.`
12
24
  Wait for BEND's `[INTEGRATION-READY]` message before running integration verification. Once both sides are ready, verify that your tests run against BEND's running server. API calls resolve correctly. Error handling works end-to-end. Resolve integration issues before marking complete.
@@ -2,46 +2,90 @@
2
2
 
3
3
  **Condition:** Only execute in sprint mode (`{is_sprint_mode}` is true).
4
4
 
5
- Groom stories sequentially through Phase 1 agents. Phase 1 agents stay alive across the grooming batch for context continuity.
5
+ Groom stories through Phase 1 agents using a **pipelined model** — agents process stories concurrently at different stages. Phase 1 agents stay alive across the grooming batch for context continuity.
6
+
7
+ ## Step 0: Pre-Grooming Profile Tagging
8
+
9
+ Before spawning any grooming agents, Lead tags `testing_profiles` on each pending story in `{backlog_path}`.
10
+
11
+ For each pending story in the grooming batch:
12
+ 1. Read the story's ACs and scope description
13
+ 2. Determine which testing profiles apply using the same logic as `.valent-pipeline/steps/orchestration/validate-story-inputs.md` Step 1b:
14
+ - `api` — story has API endpoints, backend logic, or database changes
15
+ - `ui` — story has UI components, pages, or visual elements
16
+ - `data-pipeline` — story has ETL, data transformation, or batch processing
17
+ 3. Write `testing_profiles: [api, ui]` (or whichever apply) to the story's backlog entry
18
+
19
+ This must complete before Step 1. Downstream agents rely on `testing_profiles` to determine conditional steps.
6
20
 
7
21
  ## Step 1: Spawn Phase 1 Agents
8
22
 
9
- Spawn: REQS, UXA (if fullstack/frontend project), QA-A, READINESS, Knowledge.
23
+ Spawn: REQS, UXA (if any story in batch has `ui` in `testing_profiles`), QA-A, READINESS, Knowledge.
10
24
 
11
25
  Pass `{is_sprint_mode}: true` to READINESS so it executes cross-story checks.
12
26
 
13
- ## Step 2: Process Stories Sequentially
27
+ ## Step 2: Pipeline Stories Through Phase 1
14
28
 
15
- For each story in grooming candidates (up to `{groom_target}` from sprint-init):
29
+ Process stories using assembly-line parallelism. Each agent moves to the next available story as soon as it finishes its current one — no agent idles while downstream work continues.
30
+
31
+ **Per-story stage progression:**
16
32
 
17
33
  1. Update story status in `{backlog_path}` to `requirements-spec`
18
- 2. Send story context to REQS — REQS writes `reqs-brief.md`
19
- 3. On REQS handoff → update status to `ux-spec` UXA writes `uxa-spec.md` (skip if backend-only)
20
- 4. On UXA handoff update status to `test-case-development` → QA-A writes `qa-test-spec.md`
21
- 5. On QA-A handoff → index all artifacts to SQLite **working table**:
34
+ 2. REQS writes `reqs-brief.md` → sends `[HANDOFF]` to Lead
35
+ 3. Lead advances status to `ux-spec` (if `ui` in `testing_profiles`) or `test-case-development` (if not)
36
+ 4. UXA writes `uxa-spec.md` (only for stories with `ui` in `testing_profiles`) sends `[HANDOFF]` to Lead
37
+ 5. Lead advances status to `test-case-development`
38
+ 6. QA-A writes `qa-test-spec.md` → sends `[HANDOFF]` to Lead
39
+ 7. Lead advances status to `readiness-review`
40
+ 8. Index artifacts to SQLite **working table**:
22
41
  ```bash
23
42
  node .valent-pipeline/bin/cli.js db index-working \
24
43
  --story-id {story_id} \
25
44
  --sprint-id {current_sprint_id}
26
45
  ```
27
- 6. Update status to `readiness-review` → READINESS reviews specs + cross-story checks
46
+ 9. READINESS reviews specs + cross-story checks
47
+
48
+ **Status-based self-selection with type filtering:**
49
+
50
+ Each agent scans `{backlog_path}` (read-only) for its intake status to determine what to work on next. Lead updates statuses on agent pickup and handoff:
51
+
52
+ | Agent | Intake Status | Type Filter | In-Progress Status | Handoff Status |
53
+ |-------|--------------|-------------|-------------------|----------------|
54
+ | REQS | `pending` | all | `requirements-spec` | `ux-spec` if `ui` in profiles, else `test-case-development` |
55
+ | UXA | `ux-spec` | `ui` in `testing_profiles` | `ux-spec-in-progress` | `test-case-development` |
56
+ | QA-A | `test-case-development` | all | `test-case-development-in-progress` | `readiness-review` |
57
+ | READINESS | `readiness-review` | all | `readiness-review-in-progress` | `groomed` |
28
58
 
29
- **On READINESS approval:**
30
- - Update status to `groomed`
31
- - Move to next story
59
+ **Protocol:** Agent finishes current story → scans backlog for next story at its intake status (+ type filter) → picks highest priority → sends pickup signal to Lead → Lead sets in-progress status → agent works → sends `[HANDOFF]` to Lead → Lead advances to handoff status.
60
+
61
+ **Backlog write ownership:** Lead remains the sole writer to `{backlog_path}`. Agents do not update statuses directly. An agent's `[HANDOFF]` message to Lead triggers Lead to update the story's status.
62
+
63
+ **Non-UI stories** skip `ux-spec` entirely. REQS handoff advances directly to `test-case-development`. UXA never sees them.
64
+
65
+ ## Step 3: Rework Handling
32
66
 
33
67
  **On READINESS rejection:**
34
- - Route to responsible agent (REQS, UXA, or QA-A) per rejection routing table
35
- - Agent revises, downstream re-processes
36
- - Re-index to working table (overwrites previous)
37
- - READINESS re-reviews
38
- - Cap at `{max_rejection_cycles}`. Stories exceeding cap: mark `blocked-on-user`, remove from grooming batch
39
68
 
40
- ## Step 3: Context Pressure Management
69
+ - Route to responsible agent (REQS, UXA, or QA-A) per rejection routing table.
70
+ - The rework task enters that agent's queue at **highest priority** — it is processed before any new stories.
71
+ - Agent revises, downstream agents re-process the reworked story (also at highest priority).
72
+ - Re-index to working table (overwrites previous).
73
+ - READINESS re-reviews.
74
+ - Cap at `{max_rejection_cycles}`. Stories exceeding cap: mark `blocked-on-user`, remove from pipeline.
75
+
76
+ **Rework priority rule:** An agent that receives a rework task finishes its current in-progress story first, then processes the rework before picking up any new story. This prevents context-switching mid-story while ensuring rework is not starved.
77
+
78
+ ## Step 4: Pipeline Completion
79
+
80
+ All stories are groomed when every story in the batch has reached `groomed` or `blocked-on-user` status. Agents that finish their queue idle until all stories complete the full pipeline.
81
+
82
+ ## Step 5: Context Pressure Management
83
+
84
+ After every `{sprint_max_groom_batch}` stories **entering REQS** (default: 10), kill and respawn Phase 1 agents to manage context window pressure. Knowledge agent is NOT killed (persists per epic/project).
41
85
 
42
- After every `{sprint_max_groom_batch}` stories (default: 10), kill and respawn Phase 1 agents to manage context window pressure. Knowledge agent is NOT killed (persists per epic/project).
86
+ When respawning mid-pipeline, allow in-flight stories to complete their current agent stage before killing that agent. Resume the pipeline with fresh agents.
43
87
 
44
- ## Step 4: Flush Working Table
88
+ ## Step 6: Flush Working Table
45
89
 
46
90
  After all stories groomed:
47
91
 
@@ -52,7 +96,7 @@ node .valent-pipeline/bin/cli.js db flush-working \
52
96
 
53
97
  This copies final post-READINESS specs from `artifacts_working` → `artifacts` (main table), then clears the working table.
54
98
 
55
- ## Step 5: Update Sprint State
99
+ ## Step 7: Update Sprint State
56
100
 
57
101
  Update `pipeline-state.json`: `current_sprint.phase = "sizing"`.
58
102
 
@@ -26,3 +26,30 @@ Update the backlog item for `{story_id}` in `{backlog_path}`.
26
26
 
27
27
  1. Set the current item's `status` to `blocked-on-user`
28
28
  2. Note the reason in the item
29
+
30
+ ## On Grooming Phase Progression
31
+
32
+ During sprint grooming (sprint-groom.md), Lead updates story statuses as agents progress through the pipeline:
33
+
34
+ **On agent pickup:** Set the in-progress status to prevent double-pickup in assembly-line mode.
35
+
36
+ | Event | New Status |
37
+ |-------|-----------|
38
+ | UXA picks up story | `ux-spec-in-progress` |
39
+ | QA-A picks up story | `test-case-development-in-progress` |
40
+ | READINESS picks up story | `readiness-review-in-progress` |
41
+
42
+ **On agent `[HANDOFF]`:** Advance to the next phase's intake status.
43
+
44
+ | Event | New Status |
45
+ |-------|-----------|
46
+ | REQS completes (`ui` in profiles) | `ux-spec` |
47
+ | REQS completes (`ui` NOT in profiles) | `test-case-development` |
48
+ | UXA completes | `test-case-development` |
49
+ | QA-A completes | `readiness-review` |
50
+ | READINESS approves | `groomed` |
51
+
52
+ **On READINESS rejection:** Revert to the responsible agent's intake status so the agent re-picks it up:
53
+ - Reject to REQS → `pending` (REQS re-processes)
54
+ - Reject to UXA → `ux-spec` (UXA re-processes)
55
+ - Reject to QA-A → `test-case-development` (QA-A re-processes)
@@ -28,5 +28,6 @@ Risk factors:
28
28
  ## Error Handling
29
29
 
30
30
  - `reqs-brief.md` missing: blocker, `[BLOCKER]` to lead, STOP.
31
- - `uxa-spec.md` missing (fullstack/frontend): proceed without visual checkpoints, note in output. Do NOT block.
31
+ - `uxa-spec.md` missing + `ui` in `{testing_profiles}`: `[BLOCKER]` to Lead, STOP. UXA must complete first.
32
+ - `uxa-spec.md` missing + `ui` NOT in `{testing_profiles}`: proceed without visual checkpoints, note "N/A — no UI profile". Do NOT block.
32
33
  - Ambiguous AC: write test for most likely interpretation, flag `[AMBIGUOUS]`, note assumption.
@@ -65,11 +65,12 @@ Per error test case: error code (HTTP status or app code), error message pattern
65
65
 
66
66
  For each NFR-sensitive path: `[NFR-PERF]` response time + load patterns; `[NFR-SEC]` auth boundaries + input validation (SQLi, XSS); `[NFR-REL]` partial failure + data consistency + retry. Skip if no NFR targets; note "No NFR-sensitive paths identified."
67
67
 
68
- ## Step 9: Visual Validation Checkpoints (Conditional)
68
+ ## Step 9: Visual Validation Checkpoints (Conditional — Keyed on Profile)
69
69
 
70
- If `uxa-spec.md` available, for each page state define: Checkpoint ID (VV-{NNN}), Page/Route, State (Default/Loading/Empty/Error/Success or custom), AC Reference, Area labels in scope, Screenshot filename (`{story_id}_VV-{NNN}_{page}_{state}.png`), Expected visual elements, Setup instructions, Pass criteria.
70
+ **Trigger:** `ui` is in `{testing_profiles}` (NOT file existence).
71
71
 
72
- Write to `{story_output_dir}/visual-validation-checklist.md`. If no UXA spec: skip, note "N/A -- no UI components."
72
+ - If `ui` in `{testing_profiles}` → **MANDATORY.** Read `uxa-spec.md`. If `uxa-spec.md` is missing, send `[BLOCKER]` to Lead — do NOT proceed without it. For each page state define: Checkpoint ID (VV-{NNN}), Page/Route, State (Default/Loading/Empty/Error/Success or custom), AC Reference, Area labels in scope, Screenshot filename (`{story_id}_VV-{NNN}_{page}_{state}.png`), Expected visual elements, Setup instructions, Pass criteria. Write to `{story_output_dir}/visual-validation-checklist.md`.
73
+ - If `ui` NOT in `{testing_profiles}` → skip, note "N/A — no UI profile."
73
74
 
74
75
  ## Step 10: Write Final Outputs
75
76
 
@@ -34,6 +34,14 @@ Execute complete test suite against real infrastructure:
34
34
 
35
35
  Record per test: pass/fail/skip, execution time, error output (failures). Record exact commands for reproducibility.
36
36
 
37
+ ### Step 4a: UI Regression Suite (Every Story)
38
+
39
+ Once E2E tests exist in the project (from any prior shipped UI story), run the **full** `{tech_stack.test_framework_e2e}` suite on **every** story — including `api`-only stories. This catches regressions where backend changes break existing UI flows.
40
+
41
+ - **Zero mocks, zero interception** — real browser, real API, real DB. Standard automated test execution, not PMCP.
42
+ - If a previously-passing E2E test now fails: file as minimum **P2** bug against the current story.
43
+ - **Skip only if** no E2E tests exist yet in the project (pre-first-UI-story). In that case, note "No E2E regression suite exists yet — skipped."
44
+
37
45
  ## Step 4b: Load and Execute Testing Profile Steps
38
46
 
39
47
  Read testing profile step file(s) from `.valent-pipeline/steps/qa-b/` based on `{testing_profiles}`:
@@ -1,6 +1,32 @@
1
1
  # Standalone Review
2
2
 
3
- **STRICT order:** REQS -> UXA (if `fullstack-web`/`frontend-only`) -> QA-A. **Stop on first failure.** Update `stepsCompleted`/`pendingSteps` after each.
3
+ **STRICT order:** Step 0 (profile + artifacts) -> REQS -> UXA (if `ui` in `testing_profiles`) -> QA-A. **Stop on first failure.** Update `stepsCompleted`/`pendingSteps` after each.
4
+
5
+ ## Step 0: Validate Profile and Derive Artifact Matrix
6
+
7
+ ### 0a — Validate Testing Profile
8
+
9
+ Read the story's ACs and scope. Independently assess which `testing_profiles` should apply:
10
+ - `api` — story has API endpoints, backend logic, or database changes
11
+ - `ui` — story has UI components, pages, or visual elements
12
+ - `data-pipeline` — story has ETL, data transformation, or batch processing
13
+
14
+ Compare your assessment against the `testing_profiles` tagged on the backlog entry:
15
+ - **Missing profile that should be present** → reject to Lead: `[READINESS-REJECTION] Story {story_id}: testing_profiles missing '{profile}'. Re-tag and re-groom.` **STOP.**
16
+ - **Over-tagging** (profile present but not needed) → advisory only, note in review but do not reject.
17
+
18
+ ### 0b — Derive Artifact Matrix from Validated Profile
19
+
20
+ | Artifact | Required When | Responsible Agent |
21
+ |----------|--------------|-------------------|
22
+ | `reqs-brief.md` | Always | REQS |
23
+ | `uxa-spec.md` | `ui` in `testing_profiles` | UXA |
24
+ | `qa-test-spec.md` | Always | QA-A |
25
+ | `visual-validation-checklist.md` | `ui` in `testing_profiles` | QA-A |
26
+
27
+ Check each required artifact exists in `{story_output_dir}`. Missing required artifact → reject to responsible agent: `[READINESS-REJECTION] Story {story_id}: missing {artifact}. See readiness-review.md#missing-artifacts.` Send to responsible agent AND Lead. **STOP.**
28
+
29
+ All subsequent checks are gated by this matrix — only validate artifacts that are required per profile.
4
30
 
5
31
  ## Step 1: Read REQS Brief
6
32
 
@@ -22,7 +48,7 @@ Read `{story_output_dir}/reqs-brief.md`. Record in `inputsRead`.
22
48
 
23
49
  **If ANY fails:** Reject to `readiness-review.md#reqs-rejection-reasons`. Send `[READINESS-REJECTION]` to **REQS** AND to Lead: `[READINESS-REJECTION] Story {story_id}: ACs need rework. See readiness-review.md#reqs-rejection-reasons.` **STOP**.
24
50
 
25
- ## Steps 3-4: Read + Validate UXA (SKIP if backend-only)
51
+ ## Steps 3-4: Read + Validate UXA (SKIP if `ui` NOT in `testing_profiles`)
26
52
 
27
53
  Read `{story_output_dir}/uxa-spec.md`. Record in `inputsRead`. Only validate if REQS passed.
28
54
 
@@ -61,6 +87,19 @@ Only if REQS passed and UXA passed (or skipped).
61
87
 
62
88
  **If ANY fails:** Reject to `readiness-review.md#qa-spec-rejection-reasons`. Send `[READINESS-REJECTION]` to **QA-A** AND to Lead: `[READINESS-REJECTION] Story {story_id}: Traceability gaps. See readiness-review.md#qa-spec-rejection-reasons.` **STOP**.
63
89
 
90
+ ### Step 6b: Validate Visual Validation Checklist (if `ui` in `testing_profiles`)
91
+
92
+ Only if QA-A spec passed and `ui` in `testing_profiles`:
93
+
94
+ | Check | Criteria |
95
+ |-------|----------|
96
+ | Checklist exists | `visual-validation-checklist.md` present in `{story_output_dir}` |
97
+ | Checkpoint coverage | Every page/route in `uxa-spec.md` has at least one checkpoint |
98
+ | 5-state coverage | Each page has checkpoints for all 5 states: Default, Loading, Empty, Error, Success |
99
+ | AC traceability | Every visual checkpoint references an AC from `reqs-brief.md` |
100
+
101
+ **If ANY fails:** Reject to `readiness-review.md#visual-checklist-rejection-reasons`. Send `[READINESS-REJECTION]` to **QA-A** AND to Lead. **STOP**.
102
+
64
103
  ## Step 7: Red Team Analysis
65
104
 
66
105
  Only if ALL specs passed. Probe each test case as a "lazy dev who wants green tests with minimal effort":
@@ -14,8 +14,11 @@ status_labels:
14
14
  cancelled: Cancelled
15
15
  requirements-spec: Requirements Spec
16
16
  ux-spec: UX Spec
17
+ ux-spec-in-progress: UX Spec (In Progress)
17
18
  test-case-development: Test Case Development
19
+ test-case-development-in-progress: Test Case Development (In Progress)
18
20
  readiness-review: Readiness Review
21
+ readiness-review-in-progress: Readiness Review (In Progress)
19
22
  groomed: Groomed
20
23
  sizing: Sizing
21
24
  sprint-planned: Sprint Planned
@@ -6,7 +6,7 @@ argument-hint: '<epic-id>'
6
6
 
7
7
  # valent-run-epic
8
8
 
9
- Run all stories tagged with an epic through the v3 multiagent pipeline using sprint-based planning and execution. Stories are groomed, sized with Fibonacci points, packed into sprints by velocity, and executed sequentially. Each sprint ends with a retrospective that calibrates future estimates.
9
+ Run all stories tagged with an epic through the v3 multiagent pipeline using sprint-based planning and execution. Stories are groomed (pipelined through Phase 1 agents), sized with Fibonacci points, packed into sprints by velocity, and executed sequentially. Each sprint ends with a retrospective that calibrates future estimates.
10
10
 
11
11
  ## Arguments
12
12
 
@@ -20,6 +20,15 @@ Use the standard 200k context window. Per `pipeline-config.yaml` `orchestration.
20
20
 
21
21
  ## Execution Steps
22
22
 
23
+ ### Step 0: Pre-Flight MCP Check
24
+
25
+ After loading `pipeline-config.yaml`, check if visual validation infrastructure is required:
26
+
27
+ 1. If `project.type` is `fullstack-web` or `frontend-only`, verify `{tech_stack.browser_automation_mcp}` is accessible (e.g., `npx @anthropic-ai/playwright-mcp --version` or check `.claude/settings.json` for the MCP registration).
28
+ 2. If not accessible → **STOP** with: `Browser automation MCP ({tech_stack.browser_automation_mcp}) is not installed. Visual validation cannot run. Install it with: npx valent-pipeline init --force (or npm install -g @anthropic-ai/{tech_stack.browser_automation_mcp}). Then re-run this epic.`
29
+
30
+ This is an infrastructure prerequisite. Don't waste tokens spawning Lead into a pipeline that can't run visual validation for UI projects.
31
+
23
32
  ### Step 1: Load Pipeline Config
24
33
 
25
34
  Read and follow `.valent-pipeline/steps/orchestration/load-pipeline-config.md`.
@@ -82,7 +91,7 @@ Set `{epic_filter}` = `{epic_id}` to scope story resolution to this epic.
82
91
 
83
92
  Read and follow `.valent-pipeline/steps/orchestration/sprint-groom.md`.
84
93
 
85
- Phase 1 agents (REQS, UXA, QA-A, READINESS) stay alive across the grooming batch. Stories are processed sequentially through the grooming pipeline with READINESS performing cross-story checks.
94
+ Phase 1 agents (REQS, UXA, QA-A, READINESS) stay alive across the grooming batch. Stories are pipelined through agents using assembly-line parallelism (see sprint-groom.md) with READINESS performing cross-story checks.
86
95
 
87
96
  #### 4e. Sprint Sizing
88
97
 
@@ -5,7 +5,7 @@ description: 'Run all stories across all epics with sprint planning, ordered by
5
5
 
6
6
  # valent-run-project
7
7
 
8
- Run every pending story in the backlog, across all epics, using sprint-based planning and execution. Stories are groomed, sized with Fibonacci points, packed into sprints by velocity, and executed sequentially. Cross-epic dependency resolution ensures stories from any epic can run when their dependencies are met. Each sprint ends with a retrospective that calibrates future estimates.
8
+ Run every pending story in the backlog, across all epics, using sprint-based planning and execution. Stories are groomed (pipelined through Phase 1 agents), sized with Fibonacci points, packed into sprints by velocity, and executed sequentially. Cross-epic dependency resolution ensures stories from any epic can run when their dependencies are met. Each sprint ends with a retrospective that calibrates future estimates.
9
9
 
10
10
  ## Context Window Advisory
11
11
 
@@ -13,6 +13,15 @@ Use the standard 200k context window. Auto-compression fires every 2-3 stories.
13
13
 
14
14
  ## Execution Steps
15
15
 
16
+ ### Step 0: Pre-Flight MCP Check
17
+
18
+ After loading `pipeline-config.yaml`, check if visual validation infrastructure is required:
19
+
20
+ 1. If `project.type` is `fullstack-web` or `frontend-only`, verify `{tech_stack.browser_automation_mcp}` is accessible (e.g., `npx @anthropic-ai/playwright-mcp --version` or check `.claude/settings.json` for the MCP registration).
21
+ 2. If not accessible → **STOP** with: `Browser automation MCP ({tech_stack.browser_automation_mcp}) is not installed. Visual validation cannot run. Install it with: npx valent-pipeline init --force (or npm install -g @anthropic-ai/{tech_stack.browser_automation_mcp}). Then re-run.`
22
+
23
+ This is an infrastructure prerequisite. Don't waste tokens spawning Lead into a pipeline that can't run visual validation for UI projects.
24
+
16
25
  ### Step 1: Load Pipeline Config
17
26
 
18
27
  Read and follow `.valent-pipeline/steps/orchestration/load-pipeline-config.md`.
@@ -96,7 +105,7 @@ Do NOT set `{epic_filter}` — sprint planning pulls from the full cross-epic ba
96
105
 
97
106
  Read and follow `.valent-pipeline/steps/orchestration/sprint-groom.md`.
98
107
 
99
- Phase 1 agents (REQS, UXA, QA-A, READINESS) stay alive across the grooming batch. Stories are processed sequentially through the grooming pipeline with READINESS performing cross-story checks.
108
+ Phase 1 agents (REQS, UXA, QA-A, READINESS) stay alive across the grooming batch. Stories are pipelined through agents using assembly-line parallelism (see sprint-groom.md) with READINESS performing cross-story checks.
100
109
 
101
110
  #### 4e. Sprint Sizing
102
111
 
@@ -18,6 +18,15 @@ If no argument is provided, resolve the next work item from the backlog (see Ste
18
18
 
19
19
  ## Execution Steps
20
20
 
21
+ ### Step 0: Pre-Flight MCP Check
22
+
23
+ After loading `pipeline-config.yaml`, check if visual validation infrastructure is required:
24
+
25
+ 1. If `project.type` is `fullstack-web` or `frontend-only`, verify `{tech_stack.browser_automation_mcp}` is accessible (e.g., `npx @anthropic-ai/playwright-mcp --version` or check `.claude/settings.json` for the MCP registration).
26
+ 2. If not accessible → **STOP** with: `Browser automation MCP ({tech_stack.browser_automation_mcp}) is not installed. Visual validation cannot run. Install it with: npx valent-pipeline init --force (or npm install -g @anthropic-ai/{tech_stack.browser_automation_mcp}). Then re-run this story.`
27
+
28
+ This is an infrastructure prerequisite — same category as "does `pipeline-config.yaml` exist?" Don't waste tokens spawning Lead into a pipeline that can't run visual validation for UI projects.
29
+
21
30
  ### Step 1: Load Pipeline Config
22
31
 
23
32
  Read and follow `.valent-pipeline/steps/orchestration/load-pipeline-config.md`.
@@ -99,12 +99,50 @@ export async function init(options = {}) {
99
99
  console.log(' Run "valent-pipeline db init" to create the database.');
100
100
  }
101
101
 
102
+ // 7b. Install browser automation MCP for UI projects
103
+ const projectType = config.project?.type || 'fullstack-web';
104
+ const uiProjectTypes = ['fullstack-web', 'frontend-only'];
105
+ if (uiProjectTypes.includes(projectType)) {
106
+ const mcpName = config.tech_stack?.browser_automation_mcp || 'playwright-mcp';
107
+ const installMcp = options.yes || await confirmPrompt(
108
+ `Install browser automation MCP (${mcpName}) for visual validation?`,
109
+ true
110
+ );
111
+ if (installMcp) {
112
+ console.log(` Installing ${mcpName}...`);
113
+ const { execSync } = await import('child_process');
114
+ try {
115
+ execSync(`npm install -g @anthropic-ai/${mcpName}`, { stdio: 'pipe' });
116
+ console.log(` Installed ${mcpName}`);
117
+ } catch (err) {
118
+ console.warn(` Warning: Failed to install ${mcpName}. Run "npm install -g @anthropic-ai/${mcpName}" manually.`);
119
+ }
120
+ // Register in .claude/settings.json mcpServers
121
+ const mcpSettingsPath = join(projectRoot, '.claude', 'settings.json');
122
+ let mcpSettings = {};
123
+ if (fileExists(mcpSettingsPath)) {
124
+ try {
125
+ mcpSettings = JSON.parse(readFileSync(mcpSettingsPath, 'utf-8'));
126
+ } catch { /* start fresh if parse fails */ }
127
+ }
128
+ if (!mcpSettings.mcpServers) mcpSettings.mcpServers = {};
129
+ if (!mcpSettings.mcpServers[mcpName]) {
130
+ mcpSettings.mcpServers[mcpName] = {
131
+ command: 'npx',
132
+ args: [`@anthropic-ai/${mcpName}`]
133
+ };
134
+ writeFileSafe(mcpSettingsPath, JSON.stringify(mcpSettings, null, 2) + '\n');
135
+ console.log(` Registered ${mcpName} in .claude/settings.json`);
136
+ }
137
+ }
138
+ }
139
+
102
140
  // 8. Configure Claude settings for agent teams
103
141
  const claudeSettingsPath = join(projectRoot, '.claude', 'settings.json');
104
142
  let claudeSettings = {};
105
143
  if (fileExists(claudeSettingsPath)) {
106
144
  try {
107
- claudeSettings = JSON.parse(readFile(claudeSettingsPath));
145
+ claudeSettings = JSON.parse(readFileSync(claudeSettingsPath, 'utf-8'));
108
146
  } catch { /* start fresh if parse fails */ }
109
147
  }
110
148
  if (!claudeSettings.env) claudeSettings.env = {};
@@ -129,6 +167,14 @@ export async function init(options = {}) {
129
167
  console.log('');
130
168
  }
131
169
 
170
+ async function confirmPrompt(message, defaultValue = true) {
171
+ const inquirer = (await import('inquirer')).default;
172
+ const { answer } = await inquirer.prompt([{
173
+ type: 'confirm', name: 'answer', message, default: defaultValue,
174
+ }]);
175
+ return answer;
176
+ }
177
+
132
178
  async function runWizard() {
133
179
  const inquirer = (await import('inquirer')).default;
134
180
  const config = JSON.parse(JSON.stringify(defaults));