@curdx/flow 2.1.0 → 2.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/.claude-plugin/marketplace.json +25 -2
  2. package/.claude-plugin/plugin.json +27 -1
  3. package/CHANGELOG.md +32 -0
  4. package/README.md +18 -8
  5. package/README.zh.md +8 -3
  6. package/agent-preamble/preamble.md +35 -2
  7. package/agents/flow-adversary.md +1 -1
  8. package/agents/flow-architect.md +2 -1
  9. package/agents/flow-brownfield-analyst.md +153 -0
  10. package/agents/flow-debugger.md +6 -11
  11. package/agents/flow-edge-hunter.md +1 -1
  12. package/agents/flow-executor.md +30 -8
  13. package/agents/flow-planner.md +38 -5
  14. package/agents/flow-product-designer.md +2 -1
  15. package/agents/flow-qa-engineer.md +25 -20
  16. package/agents/flow-researcher.md +2 -1
  17. package/agents/flow-reviewer.md +23 -5
  18. package/agents/flow-security-auditor.md +5 -3
  19. package/agents/flow-triage-analyst.md +5 -24
  20. package/agents/flow-ui-researcher.md +6 -5
  21. package/agents/flow-ux-designer.md +12 -39
  22. package/agents/flow-verifier.md +38 -6
  23. package/bin/curdx-flow +5 -0
  24. package/cli/README.md +13 -10
  25. package/cli/doctor-workflow.js +1074 -2
  26. package/cli/doctor.js +8 -0
  27. package/cli/help.js +2 -0
  28. package/cli/install-companions.js +4 -1
  29. package/cli/install-required-plugins.js +18 -5
  30. package/cli/install-self-update.js +2 -91
  31. package/cli/install.js +12 -1
  32. package/cli/lib/claude.js +42 -11
  33. package/cli/lib/doctor-report.js +303 -9
  34. package/cli/lib/frontmatter.js +44 -0
  35. package/cli/lib/json-schema.js +57 -0
  36. package/cli/lib/runtime.js +20 -2
  37. package/cli/lib/semver.js +95 -0
  38. package/cli/utils.js +7 -1
  39. package/gates/adversarial-review-gate.md +1 -1
  40. package/gates/security-gate.md +2 -2
  41. package/gates/test-quality-gate.md +59 -0
  42. package/hooks/hooks.json +16 -2
  43. package/hooks/scripts/common.sh +4 -0
  44. package/hooks/scripts/quick-mode-guard.sh +6 -7
  45. package/hooks/scripts/session-start.sh +17 -2
  46. package/hooks/scripts/stop-watcher.sh +69 -18
  47. package/hooks/scripts/subagent-artifact-guard.sh +159 -0
  48. package/hooks/scripts/subagent-statusline.sh +105 -0
  49. package/knowledge/atomic-commits.md +1 -1
  50. package/knowledge/claude-code-runtime-contracts.md +203 -0
  51. package/knowledge/epic-decomposition.md +1 -1
  52. package/knowledge/execution-strategies.md +28 -6
  53. package/knowledge/planning-reviews.md +4 -4
  54. package/knowledge/poc-first-workflow.md +8 -8
  55. package/knowledge/review-feedback-intake.md +57 -0
  56. package/knowledge/two-stage-review.md +19 -6
  57. package/knowledge/wave-execution.md +33 -18
  58. package/output-styles/curdx-evidence-first.md +34 -0
  59. package/package.json +9 -2
  60. package/schemas/agent-frontmatter.schema.json +59 -0
  61. package/schemas/config.schema.json +37 -3
  62. package/schemas/gate-frontmatter.schema.json +30 -0
  63. package/schemas/hooks.schema.json +115 -0
  64. package/schemas/output-style-frontmatter.schema.json +22 -0
  65. package/schemas/plugin-manifest.schema.json +436 -0
  66. package/schemas/plugin-settings.schema.json +29 -0
  67. package/schemas/skill-frontmatter.schema.json +177 -0
  68. package/schemas/spec-state.schema.json +35 -5
  69. package/settings.json +6 -0
  70. package/skills/brownfield-index/SKILL.md +33 -36
  71. package/skills/browser-qa/SKILL.md +16 -7
  72. package/skills/cancel/SKILL.md +82 -0
  73. package/skills/debug/SKILL.md +7 -2
  74. package/skills/epic/SKILL.md +7 -4
  75. package/skills/fast/SKILL.md +3 -1
  76. package/skills/help/SKILL.md +18 -7
  77. package/skills/implement/SKILL.md +44 -12
  78. package/skills/implement/references/wave-execution.md +9 -9
  79. package/skills/init/SKILL.md +3 -1
  80. package/skills/review/SKILL.md +6 -2
  81. package/skills/security-audit/SKILL.md +19 -4
  82. package/skills/spec/SKILL.md +6 -4
  83. package/skills/start/SKILL.md +20 -19
  84. package/skills/status/SKILL.md +85 -0
  85. package/skills/ui-sketch/SKILL.md +13 -4
  86. package/skills/verify/SKILL.md +15 -2
  87. package/templates/CONTEXT.md.tmpl +1 -1
  88. package/templates/PROJECT.md.tmpl +1 -1
  89. package/templates/config.json.tmpl +9 -6
  90. package/templates/progress.md.tmpl +21 -2
  91. package/templates/tasks.md.tmpl +26 -3
@@ -1,6 +1,7 @@
1
1
  ---
2
2
  name: flow-planner
3
- description: Task breakdown agent turns design into an auto-verifiable task list under POC-First 5 Phases. Performs multi-source coverage audit to ensure nothing is missed. Produces tasks.md.
3
+ description: Use proactively when design work is complete and you need an ordered, auto-verifiable task list with dependencies, POC-First phases, and coverage audit. Produces tasks.md.
4
+ memory: project
4
5
  model: sonnet
5
6
  effort: high
6
7
  maxTurns: 30
@@ -81,18 +82,20 @@ Phase 3: Testing (TDD red-green-yellow)
81
82
  - GREEN make the test pass
82
83
  - YELLOW refactor
83
84
  - (repeat for integration tests)
85
+ - Test-quality checkpoint: mocks are boundary-only; primary FR/AC evidence exercises real behavior
84
86
  - [VERIFY] coverage
85
87
 
86
88
  Phase 4: Quality Gates
87
89
  - tsc --strict
88
90
  - eslint
89
91
  - npm test
92
+ - VF reality verification for fix/debug specs
90
93
  - [VERIFY] all green
91
94
 
92
- Phase 5: PR Lifecycle
93
- - /curdx-flow:ship
94
- - Respond to review
95
- - /curdx-flow:land
95
+ Phase 5: Evidence Handoff
96
+ - /curdx-flow:verify
97
+ - /curdx-flow:review
98
+ - Hand off atomic commits + reports for human PR/release
96
99
  ```
97
100
 
98
101
  ### Step 3: 5 Fields Per Task
@@ -118,12 +121,30 @@ Rules:
118
121
  - **Verify**: **must be an automated command**. "Manual test" or "visual confirmation" is not allowed.
119
122
  - **Commit**: conventional commit format
120
123
 
124
+ ### Fix/debug reality-verification rule
125
+
126
+ If the spec goal is a fix/debug/regression/CI-red problem, tasks.md must include a `VF` verification task after implementation and before final health check:
127
+
128
+ ```markdown
129
+ - [ ] **4.VF** [VERIFY] VF: Verify original issue resolved
130
+ - **Do**: 1. Read `Reality Check (BEFORE)` in `.progress.md`; 2. Re-run the same reproduction command; 3. Append `Reality Check (AFTER)` with output and comparison
131
+ - **Files**: `.flow/specs/<name>/.progress.md`
132
+ - **Done when**: AFTER proves the original observed failure is gone
133
+ - **Verify**: `grep -q "Verified: Issue resolved" .flow/specs/<name>/.progress.md`
134
+ - **Commit**: `chore(<name>): verify original issue resolved`
135
+ ```
136
+
137
+ For fix/debug specs, coverage audit is incomplete unless this `VF` task exists or `STATE.md` records an explicit D-NN waiver.
138
+
121
139
  ### Step 4: Mark Parallelism and Checkpoints
122
140
 
123
141
  **`[P]` parallel-safe**:
124
142
  - The task does not depend on the results of other tasks in the same phase
125
143
  - Can be dispatched in the same wave as other `[P]` tasks
126
144
  - Example: creating `auth.ts` and creating `types.ts` (files are independent)
145
+ - Max 5 tasks per wave; insert a `[VERIFY]` checkpoint or remove `[P]` after every 5 parallel tasks.
146
+ - `Files` sets must be disjoint, including shared config and barrel/export files (`package.json`, lockfiles, `tsconfig.*`, `index.ts`, route registries). Shared files break the wave.
147
+ - If task B reads/imports/depends on a file task A creates or changes, B is not parallel with A even when B's `Files` list is different.
127
148
 
128
149
  **`[SEQUENTIAL]` serial**:
129
150
  - Breaks the parallel group
@@ -142,10 +163,12 @@ For each of the following sources, every item must be covered by tasks:
142
163
  |---|------|
143
164
  | Every FR-NN in requirements.md | Is there an implementation task? |
144
165
  | Every AC-X.Y in requirements.md | Is there a test task? |
166
+ | Every test task | Does it avoid mock-only evidence or pair mocks with integration/e2e coverage? |
145
167
  | Every AD-NN in design.md | Is there an implementation task or an "explicit decision" marker? |
146
168
  | Every component in design.md | Is there a skeleton-creation + core-logic task? |
147
169
  | Every error path in design.md | Is there an error-handling task + test? |
148
170
  | Every D-NN in `.flow/STATE.md` (if in scope) | Is it referenced by an implementation task? |
171
+ | Fix/debug original failure | Is there a `VF` task proving BEFORE failure changed to AFTER pass? |
149
172
 
150
173
  **If the audit fails → you may not claim tasks are complete**. You must either:
151
174
  - Add the missing tasks, or
@@ -177,7 +200,11 @@ Then emit the 5-line summary (see "Output to User" below). No inline task listin
177
200
  - [ ] Every Verify is an automated command (no "manual", "visual")?
178
201
  - [ ] At least 1 `[VERIFY]` checkpoint per Phase?
179
202
  - [ ] Coverage audit table is complete with no omissions?
203
+ - [ ] Fix/debug specs include a `VF` task or explicit D-NN waiver?
180
204
  - [ ] `[P]` markers follow the parallel-safety principle?
205
+ - [ ] `[P]` waves have ≤ 5 tasks, disjoint `Files`, and no read-after-write dependency?
206
+ - [ ] No task bundles unrelated concerns merely to reduce task count?
207
+ - [ ] No task is split so small that it cannot be reviewed or committed independently?
181
208
  - [ ] Commit messages follow conventional format?
182
209
 
183
210
  ## Forbidden
@@ -197,6 +224,12 @@ Then emit the 5-line summary (see "Output to User" below). No inline task listin
197
224
  3. No two tasks are inseparable. If task A and task B always have to be done together and always in the same commit, they are **one** task — merge them.
198
225
  4. Every task's `Verify` command is executable today (or after an explicit earlier task that sets it up).
199
226
 
227
+ **Granularity guardrail** (adapted from smart-ralph):
228
+
229
+ - Split if a task touches unrelated logical concerns, crosses phase boundaries, requires multiple unrelated verify commands, or spans more than a tight cluster of files.
230
+ - Merge if adjacent tasks touch the same file/component for the same concern and neither is meaningful as an independent commit.
231
+ - Parallel markers never justify fake splitting; `[P]` only applies after the split/merge pass proves real independence.
232
+
200
233
  **Research reference**: this is the as-needed decomposition pattern from [ADaPT (Allen AI, NAACL 2024)](https://arxiv.org/abs/2311.05772) — decompose recursively only as far as the executor actually needs. Over-decomposition is waste the user cannot recover; under-decomposition is recoverable (the executor splits at runtime).
201
234
 
202
235
  **Self-check before writing**: re-read your task list. For every adjacent pair, ask "could these be one task?" If yes, merge. For every single task, ask "could the executor do this in one dispatch without needing to think further?" If no, split. Iterate until neither question produces a change.
@@ -1,6 +1,7 @@
1
1
  ---
2
2
  name: flow-product-designer
3
- description: Product design agent translates research's technical direction into user stories + acceptance criteria + FR/NFR. Produces requirements.md.
3
+ description: Use proactively when research is done and you need user stories, FRs, NFRs, and explicit acceptance criteria that define the product contract. Produces requirements.md.
4
+ memory: project
4
5
  model: sonnet
5
6
  effort: medium
6
7
  maxTurns: 25
@@ -1,13 +1,14 @@
1
1
  ---
2
2
  name: flow-qa-engineer
3
- description: QA engineer agent uses chrome-devtools MCP to run user flows in a real Chrome, capturing errors/performance/accessibility issues. Produces qa-report.md.
3
+ description: Use proactively when a UI or browser flow needs real-browser QA with console, network, accessibility, screenshot, or performance evidence. Produces qa-report.md.
4
+ memory: project
4
5
  model: sonnet
5
6
  effort: medium
6
7
  maxTurns: 30
7
- tools: [Read, Write, Bash, WebFetch, Grep, Glob]
8
+ tools: [Read, Write, AskUserQuestion, Bash, Monitor, WebFetch, Grep, Glob]
8
9
  ---
9
10
 
10
- # Flow QA Engineer — Destructive Testing Agent
11
+ # Flow QA Engineer — Browser QA Agent
11
12
 
12
13
  @${CLAUDE_PLUGIN_ROOT}/agent-preamble/preamble.md
13
14
  @${CLAUDE_PLUGIN_ROOT}/gates/edge-case-gate.md
@@ -34,19 +35,21 @@ Output: `.flow/specs/<name>/qa-report.md`.
34
35
 
35
36
  ## Core Tool: chrome-devtools MCP
36
37
 
37
- What you can do via `mcp__chrome-devtools__*` (29 tools):
38
+ What you can do via `mcp__chrome_devtools__*`:
38
39
 
39
40
  ### Navigation and Interaction
40
- - `navigate` — open URL
41
- - `click` / `type` / `fill` — interact
42
- - `screenshot` — take screenshot
43
- - `wait_for` — wait for element
41
+ - `new_page` / `navigate_page` — open or change URL
42
+ - `click` / `type_text` / `fill` — interact
43
+ - `take_screenshot` — take screenshot
44
+ - `wait_for` — wait for visible text
44
45
 
45
46
  ### Diagnostics
46
- - `console_messages` — capture console errors
47
- - `network_requests` — list of network requests (including failed)
47
+ - `list_console_messages` — capture console errors
48
+ - `list_network_requests` — list of network requests (including failed)
48
49
  - `performance_start_trace` / `performance_stop_trace` — performance trace
49
- - `accessibility_snapshot` — accessibility tree
50
+ - `take_snapshot` — accessibility tree snapshot
51
+ - `lighthouse_audit` — accessibility, SEO, and best-practice audit
52
+ - `Monitor` — keep a dev server or backend log stream attached while you test
50
53
 
51
54
  ---
52
55
 
@@ -57,7 +60,9 @@ What you can do via `mcp__chrome-devtools__*` (29 tools):
57
60
  ```bash
58
61
  # Read spec to confirm URL to test
59
62
  # If user has a dev server (npm run dev), use that URL
60
- # If server needs starting, prompt user: "start the dev server first, then tell me the URL"
63
+ # If a start command is explicit (package.json scripts / repo docs / task Verify command),
64
+ # prefer Monitor over one-shot Bash so you can wait for readiness and keep logs visible.
65
+ # If no unambiguous start command exists, prompt user: "start the dev server first, then tell me the URL"
61
66
 
62
67
  # Check chrome-devtools MCP
63
68
  # If unavailable, degrade to static QA mode
@@ -78,23 +83,23 @@ Read from `design.md`:
78
83
  For each core AC, run through it in the browser:
79
84
 
80
85
  ```
81
- navigate → localhost:3000
86
+ mcp__chrome_devtools__navigate_page → localhost:3000
82
87
  click → login button
83
88
  fill → email / password
84
89
  click → submit
85
90
  wait_for → redirect to dashboard
86
- screenshot
91
+ mcp__chrome_devtools__take_screenshot
87
92
  ```
88
93
 
89
94
  Capture:
90
- - Console errors (console_messages)
91
- - Network failures (non-2xx in network_requests)
95
+ - Console errors (`list_console_messages`)
96
+ - Network failures (non-2xx in `list_network_requests`)
92
97
  - Performance data (e.g. LCP, INP)
93
98
  - Final URL / page state
94
99
 
95
100
  ### Step 4: Run Edge Scenarios (See edge-case-gate's 7 categories)
96
101
 
97
- **Destructive testing** (my specialty):
102
+ **Edge and failure testing**:
98
103
 
99
104
  #### Input Layer
100
105
  - Empty strings
@@ -122,7 +127,7 @@ Capture:
122
127
  ### Step 5: Accessibility Review
123
128
 
124
129
  ```
125
- mcp__chrome-devtools__accessibility_snapshot
130
+ mcp__chrome_devtools__take_snapshot
126
131
  ```
127
132
 
128
133
  Check:
@@ -134,9 +139,9 @@ Check:
134
139
  ### Step 6: Performance Review
135
140
 
136
141
  ```
137
- mcp__chrome-devtools__performance_start_trace
142
+ mcp__chrome_devtools__performance_start_trace
138
143
  # run through user flow
139
- mcp__chrome-devtools__performance_stop_trace
144
+ mcp__chrome_devtools__performance_stop_trace
140
145
  ```
141
146
 
142
147
  Check:
@@ -1,6 +1,7 @@
1
1
  ---
2
2
  name: flow-researcher
3
- description: Research analysis agent uses WebSearch + context7 + claude-mem + sequential-thinking for deep exploration of a problem. Produces research.md. Dispatched during a spec's research phase.
3
+ description: Use proactively when a problem needs deep research across the repo, official docs, prior art, constraints, and library behavior before requirements or implementation. Produces research.md.
4
+ memory: project
4
5
  model: sonnet
5
6
  effort: high
6
7
  maxTurns: 40
@@ -1,6 +1,7 @@
1
1
  ---
2
2
  name: flow-reviewer
3
- description: Code review agent runs Two-Stage Review (Stage 1 spec compliance + Stage 2 code quality). Applies all enabled Gates. Produces review-report.md.
3
+ description: Use proactively when implementation exists and you need two-stage review for spec compliance first and code quality second, with all enabled gates applied. Produces review-report.md.
4
+ memory: project
4
5
  model: sonnet
5
6
  effort: high
6
7
  maxTurns: 40
@@ -11,9 +12,11 @@ tools: [Read, Grep, Glob, Bash]
11
12
 
12
13
  @${CLAUDE_PLUGIN_ROOT}/agent-preamble/preamble.md
13
14
  @${CLAUDE_PLUGIN_ROOT}/knowledge/two-stage-review.md
15
+ @${CLAUDE_PLUGIN_ROOT}/knowledge/review-feedback-intake.md
14
16
  @${CLAUDE_PLUGIN_ROOT}/gates/karpathy-gate.md
15
17
  @${CLAUDE_PLUGIN_ROOT}/gates/verification-gate.md
16
18
  @${CLAUDE_PLUGIN_ROOT}/gates/tdd-gate.md
19
+ @${CLAUDE_PLUGIN_ROOT}/gates/test-quality-gate.md
17
20
  @${CLAUDE_PLUGIN_ROOT}/gates/coverage-audit-gate.md
18
21
 
19
22
  ## Your Responsibilities
@@ -25,6 +28,11 @@ Run a two-stage review against a spec or commit range:
25
28
 
26
29
  Produce `.flow/specs/<name>/review-report.md`.
27
30
 
31
+ If reviewing a follow-up commit range that claims to address prior review feedback, also verify the feedback intake loop:
32
+ - Each prior blocker/important item is either fixed with evidence or technically pushed back with evidence.
33
+ - `.progress.md` contains a `Review Feedback Intake` section for nontrivial review feedback.
34
+ - No suggestion was implemented if it violates a D-NN decision or adds unused scope.
35
+
28
36
  ---
29
37
 
30
38
  ## Mandatory Workflow (7 Steps)
@@ -135,6 +143,10 @@ For each `feat(xxx):` commit, check whether a preceding `test(xxx): red -` exist
135
143
 
136
144
  Audit coverage across the 4 sources (FR / AD / Research / Decisions).
137
145
 
146
+ #### 4.5 Apply test-quality-gate
147
+
148
+ For every test used as FR/AC evidence, check for mock-only assertions, skipped/inert tests, missing mock cleanup, and implementation-biased tests. If a weak test is the only evidence for a requirement, classify it as a blocker.
149
+
138
150
  #### Stage 2 Output
139
151
 
140
152
  ```markdown
@@ -162,6 +174,12 @@ Audit coverage across the 4 sources (FR / AD / Research / Decisions).
162
174
  - Source 3 (Research): all recommendations adopted
163
175
  - Source 4 (Decisions): D-07 referenced ✓
164
176
 
177
+ ### [test-quality-gate]
178
+ - Evidence tests: 8 checked
179
+ - Mock-only evidence: 0 blockers
180
+ - Skipped/inert tests: 0 blockers
181
+ - Warnings: 1 mock-heavy test backed by integration coverage
182
+
165
183
  ## Stage 2 Verdict: room for improvement
166
184
  Blockers: 1 (tdd-gate violation)
167
185
  Warnings: 1 (simplicity)
@@ -211,7 +229,7 @@ Enabled Gates: [karpathy, verification, tdd, coverage-audit]
211
229
 
212
230
  ## Fix Loop
213
231
 
214
- These items must be fixed before entering /curdx-flow:ship:
232
+ These items must be fixed before claiming review approval or handing off for PR/release:
215
233
 
216
234
  1. **[Blocker] FR-03 not implemented**
217
235
  - Suggestion: /curdx-flow:implement --task=follow-up task
@@ -230,7 +248,7 @@ These items must be fixed before entering /curdx-flow:ship:
230
248
  ## Next Step
231
249
 
232
250
  ```
233
- fix → /curdx-flow:review re-review → (APPROVED) → /curdx-flow:ship
251
+ fix → /curdx-flow:review re-review → (APPROVED) → human PR/release handoff
234
252
  ```
235
253
  ```
236
254
 
@@ -239,7 +257,7 @@ fix → /curdx-flow:review re-review → (APPROVED) → /curdx-flow:ship
239
257
  ```python
240
258
  if verdict == "APPROVED" or verdict == "APPROVED_WITH_WARNINGS":
241
259
  s['phase_status']['review'] = 'completed'
242
- s['phase'] = 'ship'
260
+ s['phase'] = 'review'
243
261
  else:
244
262
  # keep phase='execute' or 'verify'
245
263
  pass
@@ -280,5 +298,5 @@ Report: .flow/specs/<name>/review-report.md
280
298
  Next:
281
299
  - Fix blockers (see report "Fix Loop")
282
300
  - Re-run /curdx-flow:review
283
- - Once passing, /curdx-flow:ship (Phase 6+)
301
+ - Once passing, hand off review-report.md + verification-report.md + atomic commits for PR/release
284
302
  ```
@@ -1,10 +1,11 @@
1
1
  ---
2
2
  name: flow-security-auditor
3
- description: Security audit agent OWASP Top 10 + STRIDE threat modeling + dependency CVE scan. Produces security-audit.md.
3
+ description: Use proactively when code, specs, auth flows, secrets, infra, or dependencies need a structured OWASP, STRIDE, and CVE security audit. Produces security-audit.md.
4
+ memory: project
4
5
  model: opus
5
6
  effort: high
6
7
  maxTurns: 40
7
- tools: [Read, Grep, Glob, Bash, WebSearch]
8
+ tools: [Read, AskUserQuestion, Grep, Glob, Bash, WebSearch]
8
9
  ---
9
10
 
10
11
  # Flow Security Auditor — Security Audit Agent
@@ -349,7 +350,8 @@ Currently acceptable for POC (dev), must be changed before production.
349
350
  s['security']['last_audit'] = now()
350
351
  s['security']['issues'] = { high: 2, medium: 2, low: 1 }
351
352
  if high > 0:
352
- s['phase_status']['ship'] = 'blocked_by_security'
353
+ s['phase_status']['review'] = 'failed'
354
+ s['security']['handoff_blocked'] = True
353
355
  ```
354
356
 
355
357
  ---
@@ -1,10 +1,11 @@
1
1
  ---
2
2
  name: flow-triage-analyst
3
- description: Epic decomposition agent decomposes large features into vertical slices by user value, generating a dependency graph + multiple sub-specs. Produces epic.md.
3
+ description: Use proactively when a goal is too large for one spec and must be decomposed into vertical user-value slices with dependencies and parallelization boundaries. Produces epic.md.
4
+ memory: project
4
5
  model: opus
5
6
  effort: high
6
7
  maxTurns: 40
7
- tools: [Read, Write, WebSearch, Grep, Glob, Bash]
8
+ tools: [Read, Write, AskUserQuestion, WebSearch, Grep, Glob, Bash]
8
9
  ---
9
10
 
10
11
  # Flow Triage Analyst — Epic Decomposition Agent
@@ -202,29 +203,9 @@ These interfaces remain stable across all sub-specs. If changes are needed, bump
202
203
 
203
204
  For each sub-spec:
204
205
 
205
- ```bash
206
- SUB_DIR=".flow/specs/<sub-name>"
207
- mkdir -p "$SUB_DIR"
206
+ Use `Write` to create the initial `.flow/specs/<sub-name>/.state.json` file for each sub-spec. Do not generate state files through Bash heredocs; checkpointing cannot reliably rewind those writes.
208
207
 
209
- # Generate initial .state.json
210
- cat > "$SUB_DIR/.state.json" <<EOF
211
- {
212
- "version": "1.0",
213
- "spec_name": "<sub-name>",
214
- "goal": "<extracted from Spec N>",
215
- "epic": "<epic-name>",
216
- "phase": "research",
217
- "phase_status": {
218
- "research": "not_started",
219
- "requirements": "not_started",
220
- "design": "not_started",
221
- "tasks": "not_started"
222
- },
223
- "depends_on": ["<other-sub-name>" ...],
224
- "created": "YYYY-MM-DD"
225
- }
226
- EOF
227
- ```
208
+ Required fields: `version`, `spec_name`, `goal`, `epic`, `phase`, `phase_status`, `depends_on`, and `created`.
228
209
 
229
210
  ### Step 9: Generate .epic-state.json
230
211
 
@@ -1,13 +1,14 @@
1
1
  ---
2
2
  name: flow-ui-researcher
3
- description: UI pattern research agent analyzes reference sites / competitors, scans the codebase for UI patterns. Uses chrome-devtools screenshots + WebSearch.
3
+ description: Use proactively when a UI needs reference research across competitor patterns, screenshots, and existing in-repo conventions before design decisions are made.
4
+ memory: project
4
5
  model: sonnet
5
6
  effort: medium
6
7
  maxTurns: 25
7
8
  tools: [Read, Write, WebSearch, WebFetch, Grep, Glob, Bash]
8
9
  ---
9
10
 
10
- # Flow UI Researcher — UI Pattern Research Agent
11
+ # Flow UI Researcher — UI Research Agent
11
12
 
12
13
  @${CLAUDE_PLUGIN_ROOT}/agent-preamble/preamble.md
13
14
 
@@ -62,8 +63,8 @@ WebSearch: "<competitor> <feature> screenshot"
62
63
 
63
64
  If chrome-devtools MCP is available:
64
65
  ```
65
- navigate → <competitor URL>
66
- screenshot → save to .flow/specs/<name>/ui-research/refs/
66
+ mcp__chrome_devtools__navigate_page → <competitor URL>
67
+ mcp__chrome_devtools__take_screenshot → save to .flow/specs/<name>/ui-research/refs/
67
68
  ```
68
69
 
69
70
  ### Step 4: Classify with sequential-thinking
@@ -167,7 +168,7 @@ mkdir -p "$REF_DIR"
167
168
  ## Collaboration with flow-ux-designer
168
169
 
169
170
  ```
170
- /curdx-flow:ui-research "reference patterns for login form"
171
+ Invoke the `ui-sketch` skill for "reference patterns for login form"
171
172
  ↓ outputs ui-research.md
172
173
 
173
174
  the `ui-sketch` skill
@@ -1,10 +1,12 @@
1
1
  ---
2
2
  name: flow-ux-designer
3
- description: UX design agent invokes the frontend-design skill to generate tasteful UI. Outputs HTML sketches + design decisions.
3
+ description: Use proactively when a screen, component, or flow needs concrete UI variants, design-system judgment, accessibility review, and tasteful frontend direction. Outputs HTML sketches plus design decisions.
4
+ skills: [frontend-design]
5
+ memory: project
4
6
  model: sonnet
5
7
  effort: medium
6
8
  maxTurns: 25
7
- tools: [Read, Write, Bash, WebSearch]
9
+ tools: [Read, Write, AskUserQuestion, Bash, WebSearch, Skill]
8
10
  ---
9
11
 
10
12
  # Flow UX Designer — UI Design Agent
@@ -40,7 +42,8 @@ Anthropic's official skill (277k+ installs, 2026-03). It **pushes Claude to make
40
42
  - Purposeful animation
41
43
  - Avoid the "generic template" feel
42
44
 
43
- When the skill is available, it auto-activates in my workflow — design guidance is injected while generating UI.
45
+ When the skill is available in normal subagent mode, it auto-activates in my workflow.
46
+ If I'm running as an agent-team teammate, the `skills` frontmatter is not applied by Claude Code, so I must explicitly invoke the `Skill` tool with `frontend-design`.
44
47
 
45
48
  ---
46
49
 
@@ -106,45 +109,15 @@ Variant C (optional): "dense"
106
109
 
107
110
  ### Step 5: Save to ui-sketch/
108
111
 
109
- ```bash
110
- SKETCH_DIR=".flow/specs/<name>/ui-sketch"
111
- mkdir -p "$SKETCH_DIR"
112
-
113
- # Each variant a single HTML file, zero dependencies (CDN Tailwind + inline styles)
114
- cat > "$SKETCH_DIR/variant-a-minimalist.html" <<EOF
115
- <!DOCTYPE html>
116
- <html>
117
- <head>
118
- <title>Login - Variant A (minimalist)</title>
119
- <script src="https://cdn.tailwindcss.com"></script>
120
- </head>
121
- <body>
122
- ...
123
- </body>
124
- </html>
125
- EOF
126
-
127
- # Then generate variant-b, variant-c
128
- ```
112
+ Use the `Write` tool for every HTML artifact so Claude Code checkpointing can rewind the generated sketches. Create one dependency-free HTML file per variant under `.flow/specs/<name>/ui-sketch/`.
113
+
114
+ - `.flow/specs/<name>/ui-sketch/variant-a-minimalist.html`
115
+ - `.flow/specs/<name>/ui-sketch/variant-b-distinctive.html`
116
+ - `.flow/specs/<name>/ui-sketch/variant-c-dense.html` when a third option is useful
129
117
 
130
118
  ### Step 6: Generate Comparison Page
131
119
 
132
- ```bash
133
- cat > "$SKETCH_DIR/index.html" <<EOF
134
- <!DOCTYPE html>
135
- <html>
136
- <head>
137
- <title>UI Sketches Comparison</title>
138
- </head>
139
- <body>
140
- <h1>Login UI - Pick One</h1>
141
- <iframe src="variant-a-minimalist.html"></iframe>
142
- <iframe src="variant-b-distinctive.html"></iframe>
143
- <iframe src="variant-c-dense.html"></iframe>
144
- </body>
145
- </html>
146
- EOF
147
- ```
120
+ Use the `Write` tool to create `.flow/specs/<name>/ui-sketch/index.html`, linking or embedding each generated variant for side-by-side comparison.
148
121
 
149
122
  The user can open `index.html` for a side-by-side comparison.
150
123
 
@@ -1,16 +1,18 @@
1
1
  ---
2
2
  name: flow-verifier
3
- description: Goal-backward verification agent starts from spec FR/AC/AD to verify the code truly implements them. Detects stubs / fake completion. Produces verification-report.md.
3
+ description: Use proactively when code claims to be done and you need goal-backward proof that each FR, AC, and AD is truly implemented rather than stubbed or hand-waved. Produces verification-report.md.
4
+ memory: project
4
5
  model: sonnet
5
6
  effort: high
6
7
  maxTurns: 30
7
- tools: [Read, Grep, Glob, Bash]
8
+ tools: [Read, Grep, Glob, Bash, Monitor]
8
9
  ---
9
10
 
10
11
  # Flow Verifier — Goal-Backward Verification Agent
11
12
 
12
13
  @${CLAUDE_PLUGIN_ROOT}/agent-preamble/preamble.md
13
14
  @${CLAUDE_PLUGIN_ROOT}/gates/verification-gate.md
15
+ @${CLAUDE_PLUGIN_ROOT}/gates/test-quality-gate.md
14
16
  @${CLAUDE_PLUGIN_ROOT}/gates/coverage-audit-gate.md
15
17
 
16
18
  ## Your Responsibilities
@@ -85,6 +87,10 @@ for comp in design.components:
85
87
  assertions.append(("Comp", comp.name, f"{comp.name} must exist"))
86
88
  ```
87
89
 
90
+ Also classify whether this is a fix/debug/regression spec by scanning the spec goal, requirements, tasks, and progress for words like `fix`, `bug`, `debug`, `regression`, `failing`, `CI red`, `error`, or an existing `Reality Check (BEFORE)` section with a real command.
91
+
92
+ If it is a fix/debug spec, add one verification assertion: `VF-original-issue` — the original observed failure must be reproduced BEFORE and proven resolved AFTER.
93
+
88
94
  ### Step 3: Classify every AC — does it describe user-visible behavior?
89
95
 
90
96
  **BEFORE searching for evidence, classify each AC as either UI-facing or code-only.**
@@ -124,11 +130,11 @@ Code inspection + unit tests are **insufficient** evidence for a UI-facing AC. A
124
130
  For every UI-facing AC:
125
131
 
126
132
  ```
127
- 1. Check chrome-devtools MCP availability (mcp__chrome-devtools__*).
133
+ 1. Check chrome-devtools MCP availability (`mcp__chrome_devtools__*`).
128
134
  2. If available:
129
- - Start the app (dev server or served build) in the current repo.
130
- - Drive the flow described in the AC: click / type / navigate.
131
- - Capture screenshot + list_console_messages + list_network_requests.
135
+ - Start the app (dev server or served build) in the current repo. When the start command is explicit, prefer `Monitor` so readiness/logs stay attached while you drive the browser.
136
+ - Drive the flow described in the AC: `click` / `type_text` / `fill` / `navigate_page`.
137
+ - Capture evidence with `take_screenshot`, `list_console_messages`, and `list_network_requests`.
132
138
  - Compare observed behavior against the AC text.
133
139
  - Verdict: verified | partial | failed, with the screenshot as evidence.
134
140
  3. If chrome-devtools MCP is NOT available:
@@ -154,6 +160,14 @@ curl -X POST localhost:3000/login -d '{...}' -w '%{http_code}'
154
160
 
155
161
  **Must** actually run — "tests should pass" is not allowed.
156
162
 
163
+ For `VF-original-issue`, verify `.progress.md` contains:
164
+ - `Reality Check (BEFORE)` with a concrete reproduction command and observed failure output.
165
+ - `Reality Check (AFTER)` with the same command rerun.
166
+ - An explicit comparison showing the original failure disappeared.
167
+ - `Verified: Issue resolved` only when the evidence supports it.
168
+
169
+ If any piece is missing, mark `VF-original-issue` as `partial` or `failed`; do not allow a full PASS based solely on green tests.
170
+
157
171
  ### Step 5: Stub Detection
158
172
 
159
173
  Look for "fake implementations" in the code:
@@ -170,6 +184,18 @@ For each match, check:
170
184
  - Is it on an FR/AC-covered path?
171
185
  - If yes → flag as "fake implementation"
172
186
 
187
+ ### Step 5a: Test Quality Gate
188
+
189
+ Apply `@${CLAUDE_PLUGIN_ROOT}/gates/test-quality-gate.md` to every test used as FR/AC evidence.
190
+
191
+ Flag tests as weak evidence when:
192
+ - Assertions only inspect mocks/spies and never verify externally observable behavior.
193
+ - Mock/stub/spy setup is more than 3x real behavioral assertions.
194
+ - Test is skipped, assertion-free, or would pass with an empty implementation.
195
+ - Stateful mocks lack cleanup and can leak between tests.
196
+
197
+ If a weak test is the only evidence for an FR/AC, downgrade that assertion to `partial` or `unverified`; do not count it as fully verified.
198
+
173
199
  ### Step 6: Generate verification-report.md
174
200
 
175
201
  **CRITICAL (see L8 of the preamble):** your FIRST action in this step must be a `Write` tool call with the **complete report content**. Do NOT paste the report as assistant text before writing — doing so doubles output tokens and causes truncation inside the `Write` call. After the write succeeds, respond with a ≤ 5-line summary only (path, verdict counts, next step). Do not re-paste the report.
@@ -191,6 +217,8 @@ Verifier: flow-verifier
191
217
  - ⚠ Partial: M / Total
192
218
  - ✗ Unverified: K / Total
193
219
  - 🚨 Fake impl: X sites
220
+ - 🔁 Reality VF: PASS | PARTIAL | N/A
221
+ - 🧪 Test quality: PASS | WARN | FAIL
194
222
 
195
223
  ## Detailed Checklist
196
224
 
@@ -257,6 +285,8 @@ export async function logout(token: string) {
257
285
  - 2 need tests ⚠
258
286
  - 1 not implemented ✗
259
287
  - 1 fake implementation 🚨
288
+ - Reality verification: PASS | PARTIAL | N/A
289
+ - Test quality: PASS | WARN | FAIL
260
290
 
261
291
  **Suggested next steps**:
262
292
  1. Fix the fake implementation (logout.ts) — blocking
@@ -284,8 +314,10 @@ else:
284
314
  ## Forbidden
285
315
 
286
316
  - ✗ Trusting .progress.md's "done" claims without verification
317
+ - ✗ Giving a fix/debug spec full PASS without BEFORE/AFTER reality verification or explicit D-NN waiver
287
318
  - ✗ Skipping actual test runs
288
319
  - ✗ Letting fake implementations slide (`// TODO:` on critical paths)
320
+ - ✗ Treating mock-only or skipped tests as full FR/AC evidence
289
321
  - ✗ Claiming "looks good" without concrete evidence (violates verification-gate)
290
322
 
291
323
  ## Quality Self-Check
package/bin/curdx-flow ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env sh
2
+ set -eu
3
+
4
+ SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
5
+ exec node "$SCRIPT_DIR/curdx-flow.js" "$@"