@curdx/flow 2.0.0-beta.1 → 2.0.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/.claude-plugin/marketplace.json +1 -1
  2. package/.claude-plugin/plugin.json +3 -10
  3. package/CHANGELOG.md +20 -0
  4. package/README.zh.md +2 -2
  5. package/agent-preamble/preamble.md +81 -11
  6. package/agents/flow-adversary.md +40 -55
  7. package/agents/flow-architect.md +23 -10
  8. package/agents/flow-debugger.md +2 -2
  9. package/agents/flow-edge-hunter.md +20 -6
  10. package/agents/flow-executor.md +3 -3
  11. package/agents/flow-planner.md +51 -48
  12. package/agents/flow-product-designer.md +14 -1
  13. package/agents/flow-qa-engineer.md +1 -1
  14. package/agents/flow-researcher.md +17 -2
  15. package/agents/flow-reviewer.md +5 -1
  16. package/agents/flow-security-auditor.md +1 -1
  17. package/agents/flow-triage-analyst.md +1 -1
  18. package/agents/flow-ui-researcher.md +2 -2
  19. package/agents/flow-ux-designer.md +1 -1
  20. package/agents/flow-verifier.md +47 -14
  21. package/bin/curdx-flow.js +13 -1
  22. package/cli/doctor.js +28 -13
  23. package/cli/install.js +62 -36
  24. package/cli/protocols.js +63 -10
  25. package/cli/registry.js +73 -0
  26. package/cli/uninstall.js +9 -11
  27. package/cli/upgrade.js +6 -10
  28. package/cli/utils.js +104 -56
  29. package/commands/fast.md +1 -1
  30. package/commands/implement.md +4 -4
  31. package/commands/init.md +14 -3
  32. package/commands/review.md +14 -5
  33. package/commands/spec.md +26 -2
  34. package/commands/start.md +47 -17
  35. package/commands/verify.md +13 -0
  36. package/gates/adversarial-review-gate.md +19 -19
  37. package/gates/devex-gate.md +4 -5
  38. package/gates/edge-case-gate.md +1 -1
  39. package/hooks/hooks.json +0 -11
  40. package/hooks/scripts/quick-mode-guard.sh +12 -9
  41. package/hooks/scripts/session-start.sh +1 -1
  42. package/hooks/scripts/stop-watcher.sh +25 -15
  43. package/knowledge/execution-strategies.md +6 -5
  44. package/knowledge/spec-driven-development.md +8 -7
  45. package/knowledge/two-stage-review.md +4 -3
  46. package/package.json +4 -2
  47. package/skills/brownfield-index/SKILL.md +62 -0
  48. package/skills/browser-qa/SKILL.md +50 -0
  49. package/skills/epic/SKILL.md +68 -0
  50. package/skills/security-audit/SKILL.md +50 -0
  51. package/skills/ui-sketch/SKILL.md +49 -0
  52. package/templates/config.json.tmpl +1 -1
  53. package/templates/design.md.tmpl +32 -112
  54. package/templates/requirements.md.tmpl +25 -43
  55. package/templates/research.md.tmpl +37 -68
  56. package/templates/tasks.md.tmpl +27 -84
  57. package/hooks/scripts/fail-tracker.sh +0 -31
@@ -27,18 +27,21 @@ Output:
27
27
 
28
28
  ## Mandatory Workflow (6 steps)
29
29
 
30
- ### Step 1: Load Prerequisites + Environment Probe
30
+ ### Step 1: Load Prerequisites + Environment Probe (conditional)
31
+
32
+ Always read the spec inputs (`research.md`, `requirements.md`, `design.md`, `.flow/CONTEXT.md`).
33
+
34
+ For the environment probe, **check existence first — do not read files that don't exist**:
31
35
 
32
36
  ```
33
- Read prerequisite spec files
34
- Check project root:
35
- package.json confirm test / lint / build commands
36
- tsconfig.json → TypeScript strictness
37
- .eslintrc.* → lint rules
38
- vitest.config.* → test framework
37
+ For each of: package.json, tsconfig.json, .eslintrc.*, vitest.config.*
38
+ if Glob finds it → Read it to capture concrete test/lint/build commands
39
+ else skip silently (this is a greenfield project or a non-JS stack)
39
40
  ```
40
41
 
41
- **Use the actual detected commands** in each task's `Verify` field, do not assume.
42
+ For greenfield projects (no `package.json` yet), use the tech stack declared in `design.md` to infer commands. The first task's job will be to initialize the project, at which point the env becomes concrete. Do not fabricate `npm test` commands if there's no package.json yet — instead write the task as "initialize package.json and install vitest; `Verify`: `npm test --silent` produces 'no tests found'".
43
+
44
+ **Use actually detected commands** in each task's `Verify` field. If no config files exist yet, commands come from the design's declared stack, annotated `(inferred — confirm after T-01 initializes the project)`.
42
45
 
43
46
  ### Step 2: Break Down by POC-First 5 Phases
44
47
 
@@ -132,31 +135,23 @@ For each of the following sources, every item must be covered by tasks:
132
135
 
133
136
  ### Step 6: Write tasks.md + State
134
137
 
135
- Based on `${CLAUDE_PLUGIN_ROOT}/templates/tasks.md.tmpl`.
136
-
137
- Must include a **coverage audit table** at the end (from Step 5):
138
-
139
- ```markdown
140
- ## Coverage Audit
141
-
142
- | Requirement ID | Corresponding Tasks | Status |
143
- |--------|---------|------|
144
- | FR-01 | 1.2, 3.1 | ✓ |
145
- | FR-02 | 3.2 | ✓ |
146
- | AC-1.1 | 3.1 | ✓ |
147
- | AD-03 | 1.1, 2.1 | ✓ |
148
- ```
138
+ **CRITICAL (see L8 of the preamble — long-artifact handling):**
139
+ - Your FIRST action in this step must be a `Write` tool call with the full `tasks.md` content. Do NOT paste the file content as assistant text before writing.
140
+ - Do NOT preview the tasks list in the response. The file itself is the deliverable.
141
+ - If a single `Write` call would approach the sub-agent output-token budget (judge by section density, not line count — see preamble L8), split into `tasks-phase-<n>.md` files and make `tasks.md` a short index linking to them.
149
142
 
150
- Then:
143
+ Based on `${CLAUDE_PLUGIN_ROOT}/templates/tasks.md.tmpl`. Must include a **coverage audit table** at the end (from Step 5).
151
144
 
152
- ```
153
- .flow/specs/<name>/.state.json:
154
- phase_status.tasks = "completed"
155
- total_tasks = <N>
145
+ After the `Write` succeeds:
146
+ 1. Update `.flow/specs/<name>/.state.json`:
147
+ ```
148
+ phase_status.tasks = "completed"
149
+ total_tasks = <N>
150
+ ```
151
+ 2. Append to `.flow/specs/<name>/.progress.md`:
152
+ `## tasks phase complete, total N tasks`
156
153
 
157
- .flow/specs/<name>/.progress.md:
158
- Append "## tasks phase complete, total N tasks"
159
- ```
154
+ Then emit the 5-line summary (see "Output to User" below). No inline task listing.
160
155
 
161
156
  ## Output Quality Bar (Self-Check)
162
157
 
@@ -175,30 +170,38 @@ Then:
175
170
  - ✗ Skipping the coverage audit
176
171
  - ✗ Proactively skipping some FRs in requirements for the sake of "simplification" (overreach)
177
172
 
178
- ## Task Granularity Rules
173
+ ## Task decomposition (as-needed, no numeric quota)
179
174
 
180
- - **fine** (default): 2-15 minutes per task. Total 40-60+
181
- - **coarse**: 15-60 minutes per task. Total 10-20
175
+ **Stop condition, not task count.** Do not aim for a number of tasks. Produce tasks until these are true, then stop:
182
176
 
183
- Based on `_` in `.flow/specs/<name>/.state.json` or `specs.default_task_size` in `.flow/config.json`.
177
+ 1. Every FR, AC, AD, and component in the spec is covered by at least one concrete, executable task.
178
+ 2. Each task is one **cohesive unit of work** the executor can finish in a **single sub-agent dispatch** without needing to replan internally. If a task would require the executor to think "first I need to decide X, then do Y, then come back and do Z", that task is too big — split it.
179
+ 3. No two tasks are inseparable. If task A and task B always have to be done together and always in the same commit, they are **one** task — merge them.
180
+ 4. Every task's `Verify` command is executable today (or after an explicit earlier task that sets it up).
184
181
 
185
- ## Output to User
182
+ **Research reference**: this is the as-needed decomposition pattern from [ADaPT (Allen AI, NAACL 2024)](https://arxiv.org/abs/2311.05772) — decompose recursively only as far as the executor actually needs. Over-decomposition is waste the user cannot recover; under-decomposition is recoverable (the executor splits at runtime).
186
183
 
187
- ```
188
- ✓ Task breakdown complete: .flow/specs/<name>/tasks.md
184
+ **Self-check before writing**: re-read your task list. For every adjacent pair, ask "could these be one task?" If yes, merge. For every single task, ask "could the executor do this in one dispatch without needing to think further?" If no, split. Iterate until neither question produces a change.
185
+
186
+ ### Symptoms of over-decomposition (stop and merge)
189
187
 
190
- N tasks total, across 5 Phases:
191
- Phase 1 (POC): X tasks
192
- Phase 2 (Refactor): Y tasks
193
- Phase 3 (Testing): Z tasks
194
- Phase 4 (Quality): W tasks
195
- Phase 5 (PR): V tasks
188
+ - "Create file X" + "Add imports to X" + "Write function body in X" → one task.
189
+ - "Add field to schema" + "Run migration" → one task (schema change is atomic).
190
+ - "Write test" + "Make test pass" → this is TDD red+green; one task marked with TDD stage in commits, not two.
196
191
 
197
- Coverage audit: FR (A/B) | AC (C/D) | AD (E/F) all covered ✓
192
+ ### Symptoms of under-decomposition (split)
198
193
 
199
- Estimated effort: N tasks × 5 minutes M minutes
194
+ - The executor's Verify command would be three separate `npm test` runs → three tasks.
195
+ - The task touches > ~3 unrelated files or modules → split by module.
196
+ - The task's `Do` field has numbered steps > 5 that each produce a distinct observable result → split.
200
197
 
201
- Next:
202
- - Review tasks.md
203
- - /curdx-flow:implement — start execution (after Phase 2 is released)
198
+ ## Output to User (5 lines max, after Write succeeds)
199
+
200
+ ```
201
+ ✓ Wrote .flow/specs/<name>/tasks.md
202
+ N tasks across 5 Phases (X/Y/Z/W/V)
203
+ Coverage: FR A/B | AC C/D | AD E/F
204
+ Next: /curdx-flow:implement
204
205
  ```
206
+
207
+ **Do not re-paste the tasks.md content inline. Do not list every task. Just the summary.**
@@ -56,7 +56,7 @@ AC-N.M: Given [precondition], when [action], then [expected result]
56
56
 
57
57
  Must:
58
58
  - **Be testable** (can be written as E2E or integration test)
59
- - **Cover happy path + at least 1 edge case**
59
+ - **Cover happy path + real edge cases that actually apply (omit categories that do not apply to this feature)**
60
60
  - **Cover error handling** (when input is invalid / network breaks / permissions insufficient)
61
61
 
62
62
  ### Step 4: FR / NFR Extraction
@@ -144,3 +144,16 @@ Out of Scope: K items explicitly excluded
144
144
 
145
145
  Next step: /curdx-flow:spec --phase=design
146
146
  ```
147
+
148
+ ## Requirements discipline (stop-condition, not length-target)
149
+
150
+ Produce user stories and acceptance criteria that cover every distinct user-visible behavior ONCE. No target length. Stop when:
151
+
152
+ 1. Every distinct user goal is expressed as one user story (US-NN). Stories that always happen together and share every AC → merge into one.
153
+ 2. Every AC-N.N is **observable from outside the code** — a test can determine pass/fail without reading the implementation. If you cannot write the AC observably, delete it rather than ship it vague.
154
+ 3. Every FR-NN is stated once, in the US block where it first appears; do not duplicate it in a separate FR section unless the FR genuinely spans multiple user stories.
155
+ 4. NFRs are written ONLY for risks that actually apply to this feature's context. No "supports 10,000 users" for a localhost single-user Todo. If the feature has no real non-functional risk, NFR section collapses to one line: "standard for this domain".
156
+
157
+ Length emerges from real content: a 3-story CRUD produces a short document; a 20-story multi-role workflow a long one. The template structure is not a length target.
158
+
159
+ Forbidden padding: restating the goal, describing sections you are about to fill, repeating an AC under both US and FR, writing NFRs for imaginary risks.
@@ -239,7 +239,7 @@ s['qa']['issues_found'] = len(bugs)
239
239
  ## Quality Self-Check
240
240
 
241
241
  - [ ] Ran every core AC?
242
- - [ ] Covered at least 4 of the 7 edge categories?
242
+ - [ ] Covered every edge category that genuinely applies to this feature (categories that do not apply are marked N/A)?
243
243
  - [ ] Screenshots or logs saved?
244
244
  - [ ] Performance data measured (not estimated)?
245
245
  - [ ] Accessibility scanned at least once?
@@ -118,9 +118,9 @@ Before finalizing research.md, ask yourself:
118
118
 
119
119
  - [ ] Are all assumptions explicitly listed? (Karpathy principle 1)
120
120
  - [ ] Did every technical solution go through context7 / WebSearch? No relying on memory?
121
- - [ ] Did the codebase scan cover at least 3 relevant keywords?
121
+ - [ ] Did the codebase scan cover every relevant keyword raised by the requirements?
122
122
  - [ ] Does the feasibility judgment have evidence (not "should work" but "confirmed feasible based on XX")?
123
- - [ ] Are there 1 open questions for the user to answer? (Unless research is fully unambiguous)
123
+ - [ ] Are there any open questions for the user to answer? (If research is fully unambiguous, say so explicitly)
124
124
 
125
125
  If any answer is "no", redo it before writing.
126
126
 
@@ -153,3 +153,18 @@ Open questions (please answer before entering requirements phase):
153
153
 
154
154
  Next step: /curdx-flow:spec --phase=requirements
155
155
  ```
156
+
157
+ ## Research discipline (stop-condition, not length-target)
158
+
159
+ Research answers the real questions for THIS feature. There is no target length. Stop when:
160
+
161
+ 1. Every non-obvious technical question raised by the requirements has an answer with a concrete recommendation.
162
+ 2. Every version-sensitive library or API you cite has at least one fact sourced from `context7` (or WebSearch), not from memory.
163
+ 3. Every alternative you rejected has a one-line reason UNLESS the rejection turns on a subtle tradeoff worth documenting.
164
+ 4. No section exists to restate the goal, describe the template, or pad for "thoroughness".
165
+
166
+ Length emerges naturally from real content. A well-known CRUD domain (Todo / blog / basic REST) produces sections that honestly compress to "standard stack, no novelty, no version risk"; anything longer is padding. A novel architecture with real library unknowns produces a much longer document because the information content is higher.
167
+
168
+ **Forbidden padding**: restating the goal in your own words, describing structure you are about to fill, copying upstream content, listing obviously-rejected alternatives.
169
+
170
+ Self-check before `Write`: for every paragraph, ask "does this change a reader's decision?" If no, delete. Iterate until deleting any more leaves a real question unanswered.
@@ -187,7 +187,11 @@ else:
187
187
 
188
188
  ### Step 6: Generate review-report.md
189
189
 
190
- Full structure:
190
+ **CRITICAL (see L8 of the preamble):** your FIRST action in this step must be a `Write` tool call with the **complete report content**. Do NOT paste the report as assistant text before writing. After the write succeeds, respond with a ≤ 5-line summary only (path, verdict, blocker count, next step). Do not re-paste the report.
191
+
192
+ If a single `Write` call would approach the sub-agent output-token budget (judge by section density, not line count), split into `review-report.md` (short index + verdict) and `review-details.md` (full findings) — two `Write` calls. See preamble L8.
193
+
194
+ Full structure (use this as the content passed to `Write`, not as preview text):
191
195
 
192
196
  ```markdown
193
197
  # Review Report: <spec-name>
@@ -181,7 +181,7 @@ npm audit
181
181
 
182
182
  ### Step 4: Threat Modeling (sequential-thinking)
183
183
 
184
- Use sequential-thinking for 6 rounds on core entities:
184
+ Use sequential-thinking on core entities proportional to real threat-model complexity:
185
185
 
186
186
  ```
187
187
  Round 1: User — ask S/T/R/I/D/E each
@@ -44,7 +44,7 @@ Output: `.flow/_epics/<epic-name>/epic.md` + multiple `.flow/specs/<sub-name>/`
44
44
 
45
45
  ## Mandatory Workflow
46
46
 
47
- ### Step 1: Explore + Understand (sequential-thinking 5 rounds)
47
+ ### Step 1: Explore + Understand (sequential-thinking proportional to epic complexity)
48
48
 
49
49
  ```
50
50
  Round 1: What does the user really want? What's the biggest goal?
@@ -185,13 +185,13 @@ Division of labor:
185
185
 
186
186
  - ✗ Doing actual UI design (that's flow-ux-designer's job)
187
187
  - ✗ Listing references from memory (must WebSearch or scan the codebase)
188
- - ✗ Providing only one reference (at least 3 categories)
188
+ - ✗ Providing only one reference aim for enough breadth across reference categories that the user has genuine alternatives to pick from
189
189
  - ✗ Ignoring CONTEXT.md preferences
190
190
 
191
191
  ## Quality Self-Check
192
192
 
193
193
  - [ ] Scanned codebase for existing patterns?
194
- - [ ] WebSearch covered at least 3 categories of references?
194
+ - [ ] WebSearch covered enough reference categories that the user has genuine design alternatives?
195
195
  - [ ] sequential-thinking used to classify references?
196
196
  - [ ] Recommendation considers CONTEXT.md?
197
197
  - [ ] Asset files saved?
@@ -237,7 +237,7 @@ The sketch stage = HTML prototype. Convert to React/Vue/Svelte components only a
237
237
  ## Quality Self-Check
238
238
 
239
239
  - [ ] Invoked the frontend-design skill (if available)?
240
- - [ ] 2 variants?
240
+ - [ ] Enough variants for the user to pick meaningful alternatives (omit if the brief clearly calls for one direction only)?
241
241
  - [ ] Each variant a single HTML file, zero dependencies?
242
242
  - [ ] decisions.md explains rationale for choices?
243
243
  - [ ] Considered CONTEXT.md user preferences?
@@ -85,33 +85,60 @@ for comp in design.components:
85
85
  assertions.append(("Comp", comp.name, f"{comp.name} must exist"))
86
86
  ```
87
87
 
88
- ### Step 3: Find Evidence for Each Assertion
88
+ ### Step 3: Classify every AC does it describe user-visible behavior?
89
+
90
+ **BEFORE searching for evidence, classify each AC as either UI-facing or code-only.**
91
+
92
+ An AC is **UI-facing** if any of these is true:
93
+ - Contains words: "user sees", "displays", "renders", "shown", "visible", "click", "type into", "press", "hover", "select"
94
+ - Names a UI element: "button", "input", "checkbox", "link", "list", "form", "label", "modal", "banner"
95
+ - Describes a user flow: "the user can do X", "after X the user sees Y"
96
+ - References a visual state: "strikethrough", "highlighted", "disabled", "focus ring"
97
+
98
+ An AC is **code-only** if it describes internal behavior:
99
+ - Schema shape, API response structure, data transformations
100
+ - Performance ("p95 < 50ms"), reliability, security properties
101
+ - Error-envelope shapes, database constraints
102
+
103
+ ### Step 3a: Find evidence for code-only ACs
89
104
 
90
105
  ```python
91
- for source, id, text in assertions:
106
+ for source, id, text in code_only_assertions:
92
107
  evidence = []
93
-
94
- # Evidence 1: code implementation
95
108
  relevant_files = grep_codebase(extract_keywords(text))
96
109
  if relevant_files:
97
110
  evidence.append(("code", relevant_files))
98
-
99
- # Evidence 2: tests
100
111
  test_files = find_tests_mentioning(id)
101
112
  if test_files:
102
113
  evidence.append(("test", test_files))
103
-
104
- # Evidence 3: commit references
105
114
  commits = git_log_grep(id)
106
115
  if commits:
107
116
  evidence.append(("commit", commits))
108
-
109
- # Verdict
110
- if evidence:
111
- status = "verified" if all_evidence_strong(evidence) else "partial"
112
- else:
113
- status = "missing"
117
+ status = "verified" if evidence and all_evidence_strong(evidence) else ("partial" if evidence else "missing")
118
+ ```
119
+
120
+ ### Step 3b: UI-facing ACs REQUIRE browser verification (hard rule)
121
+
122
+ Code inspection + unit tests are **insufficient** evidence for a UI-facing AC. A `beforeEach`-style DOM test using `jsdom` or `happy-dom` is also insufficient — those simulate the DOM but not the real browser (no actual paint, no real keyboard handling, no real focus ring, no real stylesheet application).
123
+
124
+ For every UI-facing AC:
125
+
114
126
  ```
127
+ 1. Check chrome-devtools MCP availability (mcp__chrome-devtools__*).
128
+ 2. If available:
129
+ - Start the app (dev server or served build) in the current repo.
130
+ - Drive the flow described in the AC: click / type / navigate.
131
+ - Capture screenshot + list_console_messages + list_network_requests.
132
+ - Compare observed behavior against the AC text.
133
+ - Verdict: verified | partial | failed, with the screenshot as evidence.
134
+ 3. If chrome-devtools MCP is NOT available:
135
+ - Mark the AC as "unverified — browser MCP missing".
136
+ - Add a CRITICAL section in verification-report.md listing the UI-facing ACs that could not be verified.
137
+ - Do NOT silently pass the AC based on code reading.
138
+ - Do NOT accept "manual smoke" as sufficient evidence unless the user explicitly logged a D-NN decision in STATE.md waiving automated browser verification.
139
+ ```
140
+
141
+ Manual-smoke evidence (comments in tasks.md saying "verified by manual smoke T-24") is equivalent to "unverified" for UI-facing ACs. Flag it. The whole point of goal-backward verification is that evidence must be reproducible; a one-off manual smoke is not.
115
142
 
116
143
  ### Step 4: Run Actual Tests (Decisive)
117
144
 
@@ -145,6 +172,12 @@ For each match, check:
145
172
 
146
173
  ### Step 6: Generate verification-report.md
147
174
 
175
+ **CRITICAL (see L8 of the preamble):** your FIRST action in this step must be a `Write` tool call with the **complete report content**. Do NOT paste the report as assistant text before writing — doing so doubles output tokens and causes truncation inside the `Write` call. After the write succeeds, respond with a ≤ 5-line summary only (path, verdict counts, next step). Do not re-paste the report.
176
+
177
+ If a single `Write` call would approach the sub-agent output-token budget (judge by section density, not line count), split into `verification-report.md` (short index + verdict) and `verification-details.md` (full findings table) — two `Write` calls. See preamble L8.
178
+
179
+ Required structure (use this as the content passed to `Write`, not as preview text):
180
+
148
181
  ```markdown
149
182
  # Verification Report: <spec-name>
150
183
 
package/bin/curdx-flow.js CHANGED
@@ -20,6 +20,8 @@
20
20
  * for the full command/workflow reference)
21
21
  */
22
22
 
23
+ import { pathToFileURL } from "node:url";
24
+
23
25
  import { install } from "../cli/install.js";
24
26
  import { doctor } from "../cli/doctor.js";
25
27
  import { upgrade } from "../cli/upgrade.js";
@@ -128,4 +130,14 @@ async function main() {
128
130
  }
129
131
  }
130
132
 
131
- main();
133
+ // Only execute main() when invoked directly (`node bin/curdx-flow.js ...`
134
+ // or via the npm bin shim). When the file is imported by tests or tooling,
135
+ // we want the module graph to load without side-effects. This idiom is
136
+ // the ESM equivalent of Python's `if __name__ == "__main__"`.
137
+ const invokedDirectly =
138
+ process.argv[1] &&
139
+ import.meta.url === pathToFileURL(process.argv[1]).href;
140
+
141
+ if (invokedDirectly) {
142
+ main();
143
+ }
package/cli/doctor.js CHANGED
@@ -13,6 +13,7 @@ import {
13
13
  listMcps,
14
14
  ensureClaudeMemRuntimes,
15
15
  } from "./utils.js";
16
+ import { RECOMMENDED_PLUGINS } from "./registry.js";
16
17
 
17
18
  export async function doctor(args = []) {
18
19
  const verbose = args.includes("--verbose") || args.includes("-v");
@@ -50,13 +51,28 @@ export async function doctor(args = []) {
50
51
  }
51
52
 
52
53
  // ---------- MCPs ----------
54
+ // Bundled by curdx-flow plugin via .claude-plugin/plugin.json mcpServers.
55
+ // chrome-devtools is NOT here anymore — it was extracted into its own
56
+ // recommended plugin (see below) to align with the "each MCP owned by one
57
+ // plugin" model and avoid double-spawning the chrome-devtools-mcp process.
53
58
  console.log(`\n${color.bold("MCP Servers:")}`);
54
59
  const mcps = cv ? listMcps() : [];
55
- const expectedMcps = ["context7", "sequential-thinking", "chrome-devtools"];
60
+ const expectedMcps = ["context7", "sequential-thinking"];
56
61
  for (const m of expectedMcps) {
57
- const found = mcps.find((x) => x.name === m);
62
+ // `claude mcp list` reports plugin-bundled MCPs as
63
+ // "plugin:curdx-flow:<name>" and standalone MCPs as "<name>". Accept
64
+ // either form — previously this was a bare .name === m check, so a
65
+ // plugin MCP named "plugin:curdx-flow:context7" would silently report
66
+ // as not-installed even though it was running (the same class of bug
67
+ // that bit chrome-devtools in beta.7).
68
+ const found = mcps.find(
69
+ (x) =>
70
+ x.name === m &&
71
+ (x.plugin === null || x.plugin === "curdx-flow")
72
+ );
58
73
  if (found) {
59
- log.ok(`${m.padEnd(22)} ${color.dim("auto-loaded")}`);
74
+ const via = found.plugin ? `via plugin:${found.plugin}` : "standalone";
75
+ log.ok(`${m.padEnd(22)} ${color.dim(`auto-loaded (${via})`)}`);
60
76
  } else {
61
77
  if (curdx) {
62
78
  log.warn(`${m.padEnd(22)} not shown in claude mcp list (restart Claude Code may fix)`);
@@ -67,24 +83,21 @@ export async function doctor(args = []) {
67
83
  }
68
84
  }
69
85
 
70
- // ---------- Recommended plugins ----------
86
+ // ---------- Recommended plugins (single registry; see cli/registry.js) ----------
71
87
  console.log(`\n${color.bold("Recommended plugins:")}`);
72
- const recommended = [
73
- { name: "pua", installCmd: "claude plugin install pua@pua-skills" },
74
- { name: "claude-mem", installCmd: "claude plugin install claude-mem@thedotmack" },
75
- { name: "frontend-design", installCmd: "claude plugin install frontend-design@claude-plugins-official" },
76
- ];
77
88
  let claudeMemEnabled = false;
78
- for (const r of recommended) {
89
+ for (const r of RECOMMENDED_PLUGINS) {
79
90
  const p = plugins.find((x) => x.name === r.name);
80
91
  if (p && p.status === "enabled") {
81
92
  log.ok(`${r.name.padEnd(22)} ${color.dim(`v${p.version}`)}`);
82
- if (r.name === "claude-mem") claudeMemEnabled = true;
93
+ if (r.postInstall === "claude-mem-runtimes") claudeMemEnabled = true;
83
94
  } else if (p && p.status === "failed") {
84
95
  log.err(`${r.name.padEnd(22)} load failed`);
85
96
  errors++;
86
97
  } else {
87
- log.warn(`${r.name.padEnd(22)} not installed ${color.dim("(run: curdx-flow install --all)")}`);
98
+ log.warn(
99
+ `${r.name.padEnd(22)} not installed ${color.dim(`(run: claude plugin install ${r.installSpec})`)}`
100
+ );
88
101
  warnings++;
89
102
  }
90
103
  }
@@ -147,7 +160,9 @@ export async function doctor(args = []) {
147
160
  console.log(color.green("Summary: all healthy ✓"));
148
161
  }
149
162
 
150
- if (verbose) {
163
+ if (verbose && cv) {
164
+ // Only call claude when it is actually on PATH; otherwise we spawn a
165
+ // child that fails silently and print a blank block.
151
166
  console.log(`\n${color.bold("Details:")}`);
152
167
  console.log(color.dim(` Plugins raw:`));
153
168
  console.log(runSync("claude", ["plugin", "list"]).stdout);
package/cli/install.js CHANGED
@@ -2,7 +2,7 @@
2
2
  * install command — install curdx-flow plugin + optional recommended plugins.
3
3
  */
4
4
 
5
- import { existsSync } from "node:fs";
5
+ import { existsSync, readFileSync } from "node:fs";
6
6
  import { dirname, join } from "node:path";
7
7
  import { fileURLToPath } from "node:url";
8
8
 
@@ -17,6 +17,7 @@ import {
17
17
  ensureClaudeMemRuntimes,
18
18
  } from "./utils.js";
19
19
  import { injectGlobalProtocols, GLOBAL_CLAUDE_MD } from "./protocols.js";
20
+ import { RECOMMENDED_PLUGINS } from "./registry.js";
20
21
 
21
22
  // When installed via npm, this CLI file lives at <pkg-root>/cli/install.js.
22
23
  // The npm package bundles the full plugin body (.claude-plugin/, agents/,
@@ -28,27 +29,11 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
28
29
  const PKG_ROOT = dirname(__dirname);
29
30
  const LOCAL_MARKETPLACE_MANIFEST = join(PKG_ROOT, ".claude-plugin", "marketplace.json");
30
31
 
31
- // Recommended plugins with their marketplace source + install identifier
32
- const RECOMMENDED = [
33
- {
34
- name: "pua",
35
- marketplace: "tanweai/pua",
36
- installSpec: "pua@pua-skills",
37
- hint: "no-give-up + three red lines",
38
- },
39
- {
40
- name: "claude-mem",
41
- marketplace: "thedotmack/claude-mem",
42
- installSpec: "claude-mem@thedotmack",
43
- hint: "automatic cross-session memory",
44
- },
45
- {
46
- name: "frontend-design",
47
- marketplace: null, // already in default marketplace claude-plugins-official
48
- installSpec: "frontend-design@claude-plugins-official",
49
- hint: "Anthropic official UI skill",
50
- },
51
- ];
32
+ // Recommended plugins: single source of truth is cli/registry.js.
33
+ // See registry.js for the rationale — this list used to drift across
34
+ // install/uninstall/upgrade/doctor, producing the chrome-devtools-mcp
35
+ // orphan-plugin bug (installable but uninstallable).
36
+ const RECOMMENDED = RECOMMENDED_PLUGINS;
52
37
 
53
38
  export async function install(args = []) {
54
39
  const all = args.includes("--all");
@@ -64,7 +49,7 @@ export async function install(args = []) {
64
49
  log.title("🚀 CurDX-Flow Installer");
65
50
 
66
51
  // ---------- Step 1: Check claude CLI ----------
67
- log.step(1, 4, "Checking claude CLI...");
52
+ log.step(1, 5, "Checking claude CLI...");
68
53
  const ver = claudeVersion();
69
54
  if (!ver) {
70
55
  log.err("claude CLI not found. Install Claude Code from https://code.claude.com first.");
@@ -78,7 +63,7 @@ export async function install(args = []) {
78
63
  const marketplaceLabel = useOffline
79
64
  ? `local npm package (${PKG_ROOT})`
80
65
  : "GitHub curdx/curdx-flow";
81
- log.step(2, 4, `Adding curdx-flow marketplace from ${marketplaceLabel}...`);
66
+ log.step(2, 5, `Adding curdx-flow marketplace from ${marketplaceLabel}...`);
82
67
 
83
68
  // Remove any existing marketplace with the same name so we get a clean
84
69
  // rebind to the chosen source. Errors are non-fatal (marketplace may
@@ -105,10 +90,38 @@ export async function install(args = []) {
105
90
 
106
91
  // ---------- Step 3: Install curdx-flow plugin ----------
107
92
  log.blank();
108
- log.step(3, 4, "Installing curdx-flow plugin (3 MCPs will auto-start)...");
93
+ log.step(3, 5, "Installing curdx-flow plugin (2 MCPs will auto-start)...");
94
+ // Read the version the marketplace is shipping so we can decide whether an
95
+ // already-installed plugin needs an update (same name but stale version
96
+ // previously silently skipped the upgrade — caused the beta.1 → beta.7 drift).
97
+ let shippedVersion = null;
98
+ try {
99
+ const mf = JSON.parse(
100
+ readFileSync(LOCAL_MARKETPLACE_MANIFEST, "utf-8")
101
+ );
102
+ shippedVersion = mf?.metadata?.version || null;
103
+ } catch {
104
+ // marketplace not local (online install) or unreadable — fall through
105
+ }
106
+
109
107
  const installed = listPlugins();
110
108
  const already = installed.find((p) => p.name === "curdx-flow");
111
- if (already) {
109
+ if (already && shippedVersion && already.version !== shippedVersion) {
110
+ log.info(
111
+ `curdx-flow installed at v${already.version}, marketplace ships v${shippedVersion} — updating...`
112
+ );
113
+ const r = await run(
114
+ "claude",
115
+ ["plugin", "update", "curdx-flow@curdx-flow-marketplace"],
116
+ { silent: true }
117
+ );
118
+ if (r.code !== 0) {
119
+ log.warn(`Update returned non-zero: ${r.stderr.trim() || r.stdout.trim()}`);
120
+ log.info(`If the version stays on v${already.version}, run: claude plugin uninstall curdx-flow@curdx-flow-marketplace && retry`);
121
+ } else {
122
+ log.ok(`curdx-flow updated to v${shippedVersion}`);
123
+ }
124
+ } else if (already) {
112
125
  log.ok(`curdx-flow already installed (v${already.version}, ${already.status})`);
113
126
  } else {
114
127
  const r = await run(
@@ -125,7 +138,7 @@ export async function install(args = []) {
125
138
 
126
139
  // ---------- Step 4: Recommended plugins ----------
127
140
  log.blank();
128
- log.step(4, 4, "Recommended plugins");
141
+ log.step(4, 5, "Recommended plugins");
129
142
 
130
143
  if (noDeps) {
131
144
  log.info("Skipping recommended plugins (--no-deps)");
@@ -161,7 +174,7 @@ export async function install(args = []) {
161
174
  for (const pluginName of toInstall) {
162
175
  const rec = RECOMMENDED.find((r) => r.name === pluginName);
163
176
  log.blank();
164
- console.log(` ${color.cyan("")} Installing ${color.bold(rec.name)}...`);
177
+ console.log(` ${color.cyan("")} Installing ${color.bold(rec.name)}...`);
165
178
 
166
179
  // 1. Add marketplace (if needed)
167
180
  if (rec.marketplace) {
@@ -186,7 +199,7 @@ export async function install(args = []) {
186
199
  // 3. Post-install hook for claude-mem: its .mcp.json hard-codes `bun`,
187
200
  // but ~/.bun/bin is not on PATH when Claude Code spawns the MCP server.
188
201
  // Auto-create a PATH-visible symlink to fix it.
189
- if (rec.name === "claude-mem") {
202
+ if (rec.postInstall === "claude-mem-runtimes") {
190
203
  const r = ensureClaudeMemRuntimes();
191
204
  for (const [name, res] of Object.entries(r)) {
192
205
  if (res.status === "linked") {
@@ -219,11 +232,13 @@ export async function install(args = []) {
219
232
 
220
233
  // ---------- Step 5: inject global protocols ----------
221
234
  log.blank();
222
- console.log(color.dim("Injecting global protocols into ~/.claude/CLAUDE.md..."));
235
+ log.step(5, 5, "Injecting global protocols into ~/.claude/CLAUDE.md...");
223
236
  try {
224
237
  const r = injectGlobalProtocols();
225
238
  if (r.action === "created") {
226
239
  log.ok(`Global protocols injected ${color.dim(`(${GLOBAL_CLAUDE_MD})`)}`);
240
+ } else if (r.action === "appended") {
241
+ log.ok(`Global protocols appended ${color.dim(`(${GLOBAL_CLAUDE_MD})`)}`);
227
242
  } else if (r.action === "upgraded") {
228
243
  log.ok(`Global protocols upgraded ${color.dim(`(${GLOBAL_CLAUDE_MD})`)}`);
229
244
  } else {
@@ -237,15 +252,26 @@ export async function install(args = []) {
237
252
  }
238
253
 
239
254
  function printNextSteps() {
240
- console.log(`\n${color.bold("✅ Install complete")}\n`);
255
+ // Detect whether the CLI is globally installed (curdx-flow on PATH) or
256
+ // the user ran us via npx. Tell them the right invocation each time.
257
+ const cliOnPath = has("curdx-flow");
258
+ const cliCmd = cliOnPath ? "curdx-flow" : "npx @curdx/flow";
259
+
260
+ console.log(`\n${color.bold(`${color.green("✓")} Install complete`)}\n`);
261
+ console.log(`${color.bold("Restart Claude Code")} so the plugin registers all its commands and hooks.\n`);
241
262
  console.log(`${color.bold("Next steps")}:\n`);
242
263
  console.log(` ${color.dim("# Verify health")}`);
243
- console.log(` curdx-flow doctor\n`);
244
- console.log(` ${color.dim("# Initialize .flow/ in your project")}`);
245
- console.log(` cd ~/your-project && curdx-flow init\n`);
246
- console.log(` ${color.dim("# Start using it (inside Claude Code)")}`);
264
+ console.log(` ${cliCmd} doctor\n`);
265
+ console.log(` ${color.dim("# Inside any project, initialize and start a feature spec")}`);
266
+ console.log(` ${color.cyan("cd ~/your-project")}`);
247
267
  console.log(` ${color.cyan("claude")}`);
248
- console.log(` ${color.cyan("/curdx-flow:start my-feature \"<describe what to build>\"")}\n`);
268
+ console.log(` ${color.cyan("/curdx-flow:init")}`);
269
+ console.log(` ${color.cyan("/curdx-flow:start my-feature \"<one-line goal>\"")}\n`);
270
+ if (!cliOnPath) {
271
+ console.log(
272
+ `${color.dim("Tip: install the CLI globally for shorter commands —")} ${color.cyan("npm i -g @curdx/flow")}\n`
273
+ );
274
+ }
249
275
  console.log(
250
276
  `${color.bold("Learn more")}: https://github.com/curdx/curdx-flow/blob/main/docs/getting-started.md\n`
251
277
  );