@curdx/flow 2.0.0-beta.1 → 2.0.0-beta.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +3 -10
- package/CHANGELOG.md +61 -0
- package/README.zh.md +2 -2
- package/agent-preamble/preamble.md +81 -11
- package/agents/flow-adversary.md +40 -55
- package/agents/flow-architect.md +23 -10
- package/agents/flow-debugger.md +2 -2
- package/agents/flow-edge-hunter.md +20 -6
- package/agents/flow-executor.md +3 -3
- package/agents/flow-planner.md +51 -48
- package/agents/flow-product-designer.md +14 -1
- package/agents/flow-qa-engineer.md +1 -1
- package/agents/flow-researcher.md +17 -2
- package/agents/flow-reviewer.md +5 -1
- package/agents/flow-security-auditor.md +1 -1
- package/agents/flow-triage-analyst.md +1 -1
- package/agents/flow-ui-researcher.md +2 -2
- package/agents/flow-ux-designer.md +1 -1
- package/agents/flow-verifier.md +47 -14
- package/bin/curdx-flow.js +13 -1
- package/cli/doctor.js +73 -13
- package/cli/install.js +62 -36
- package/cli/protocols.js +63 -10
- package/cli/registry.js +73 -0
- package/cli/uninstall.js +9 -11
- package/cli/upgrade.js +6 -10
- package/cli/utils.js +150 -56
- package/commands/fast.md +1 -1
- package/commands/implement.md +4 -4
- package/commands/init.md +14 -3
- package/commands/review.md +14 -5
- package/commands/spec.md +26 -2
- package/commands/start.md +47 -17
- package/commands/verify.md +13 -0
- package/gates/adversarial-review-gate.md +19 -19
- package/gates/devex-gate.md +4 -5
- package/gates/edge-case-gate.md +1 -1
- package/hooks/hooks.json +0 -11
- package/hooks/scripts/quick-mode-guard.sh +12 -9
- package/hooks/scripts/session-start.sh +1 -1
- package/hooks/scripts/stop-watcher.sh +25 -15
- package/knowledge/execution-strategies.md +6 -5
- package/knowledge/spec-driven-development.md +8 -7
- package/knowledge/two-stage-review.md +4 -3
- package/package.json +4 -2
- package/skills/brownfield-index/SKILL.md +62 -0
- package/skills/browser-qa/SKILL.md +50 -0
- package/skills/epic/SKILL.md +68 -0
- package/skills/security-audit/SKILL.md +50 -0
- package/skills/ui-sketch/SKILL.md +49 -0
- package/templates/config.json.tmpl +1 -1
- package/templates/design.md.tmpl +32 -112
- package/templates/requirements.md.tmpl +25 -43
- package/templates/research.md.tmpl +37 -68
- package/templates/tasks.md.tmpl +27 -84
- package/hooks/scripts/fail-tracker.sh +0 -31
package/cli/utils.js
CHANGED
|
@@ -108,39 +108,6 @@ export function confirm(message, defaultYes = true) {
|
|
|
108
108
|
});
|
|
109
109
|
}
|
|
110
110
|
|
|
111
|
-
/**
|
|
112
|
-
* Ask user to pick from a list. Returns selected value or null if aborted.
|
|
113
|
-
*/
|
|
114
|
-
export function select(message, choices, defaultIndex = 0) {
|
|
115
|
-
return new Promise((resolve) => {
|
|
116
|
-
console.log(`${color.cyan("?")} ${message}`);
|
|
117
|
-
choices.forEach((ch, i) => {
|
|
118
|
-
const marker = i === defaultIndex ? color.green("▸") : " ";
|
|
119
|
-
console.log(` ${marker} ${color.bold(String(i + 1))}. ${ch.label}`);
|
|
120
|
-
});
|
|
121
|
-
|
|
122
|
-
const rl = createInterface({
|
|
123
|
-
input: process.stdin,
|
|
124
|
-
output: process.stdout,
|
|
125
|
-
});
|
|
126
|
-
rl.question(
|
|
127
|
-
` ${color.dim(`(default: ${defaultIndex + 1}, q to abort) `)}`,
|
|
128
|
-
(ans) => {
|
|
129
|
-
rl.close();
|
|
130
|
-
const v = ans.trim().toLowerCase();
|
|
131
|
-
if (v === "q") return resolve(null);
|
|
132
|
-
if (v === "") return resolve(choices[defaultIndex].value);
|
|
133
|
-
const n = parseInt(v, 10);
|
|
134
|
-
if (Number.isInteger(n) && n >= 1 && n <= choices.length) {
|
|
135
|
-
return resolve(choices[n - 1].value);
|
|
136
|
-
}
|
|
137
|
-
console.log(color.yellow(" (invalid, using default)"));
|
|
138
|
-
resolve(choices[defaultIndex].value);
|
|
139
|
-
}
|
|
140
|
-
);
|
|
141
|
-
});
|
|
142
|
-
}
|
|
143
|
-
|
|
144
111
|
/**
|
|
145
112
|
* Multi-select (checkbox-style via comma-separated input).
|
|
146
113
|
* Returns array of selected values.
|
|
@@ -199,47 +166,170 @@ export function claudeVersion() {
|
|
|
199
166
|
return m ? m[1] : res.stdout.trim().split("\n")[0];
|
|
200
167
|
}
|
|
201
168
|
|
|
202
|
-
/**
|
|
169
|
+
/**
|
|
170
|
+
* List installed plugins. Prefers the structured `claude plugin list --json`
|
|
171
|
+
* output (stable machine-readable format; confirmed present in claude
|
|
172
|
+
* 2.1.117+). Falls back to parsing the human-readable stream-text output
|
|
173
|
+
* for older CLI versions, but warns that parser is brittle.
|
|
174
|
+
*
|
|
175
|
+
* Returns array of { name, version, status }.
|
|
176
|
+
*/
|
|
203
177
|
export function listPlugins() {
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
178
|
+
// Preferred: structured JSON output.
|
|
179
|
+
const j = runSync("claude", ["plugin", "list", "--json"]);
|
|
180
|
+
if (j.code === 0 && j.stdout.trim().startsWith("[")) {
|
|
181
|
+
try {
|
|
182
|
+
const arr = JSON.parse(j.stdout);
|
|
183
|
+
return arr.map((p) => ({
|
|
184
|
+
// id has form "name@marketplace" — name is stable for dedup/lookup.
|
|
185
|
+
name: String(p.id || "").split("@")[0],
|
|
186
|
+
version: p.version,
|
|
187
|
+
status: p.enabled === false ? "disabled" : "enabled",
|
|
188
|
+
raw: JSON.stringify(p),
|
|
189
|
+
}));
|
|
190
|
+
} catch {
|
|
191
|
+
// JSON parse failed — fall through to legacy text parser.
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Legacy fallback: parse the human-readable format.
|
|
209
196
|
// ❯ curdx-flow@curdx-flow-marketplace
|
|
210
197
|
// Version: 1.1.1
|
|
211
|
-
// Scope: user
|
|
212
198
|
// Status: ✔ enabled
|
|
213
|
-
|
|
199
|
+
// Fragile — matches unicode markers. Kept only for older claude CLIs.
|
|
200
|
+
const res = runSync("claude", ["plugin", "list"]);
|
|
201
|
+
if (res.code !== 0) return [];
|
|
202
|
+
const plugins = [];
|
|
203
|
+
const blocks = res.stdout.split(/\n\s*❯\s*/).slice(1);
|
|
214
204
|
for (const block of blocks) {
|
|
215
205
|
const lines = block.split("\n");
|
|
216
206
|
const name = lines[0].trim().split("@")[0];
|
|
217
207
|
const version = (block.match(/Version:\s*(\S+)/) || [])[1];
|
|
218
|
-
const status = block.includes("✔")
|
|
208
|
+
const status = block.includes("✔")
|
|
209
|
+
? "enabled"
|
|
210
|
+
: block.includes("✘")
|
|
211
|
+
? "failed"
|
|
212
|
+
: "unknown";
|
|
219
213
|
plugins.push({ name, version, status, raw: block });
|
|
220
214
|
}
|
|
221
215
|
return plugins;
|
|
222
216
|
}
|
|
223
217
|
|
|
224
|
-
/**
|
|
218
|
+
/**
|
|
219
|
+
* Read the user-level MCP registrations from ~/.claude.json. These are the
|
|
220
|
+
* MCPs the user added manually via `claude mcp add …` — distinct from
|
|
221
|
+
* plugin-bundled MCPs (which live in plugin.json).
|
|
222
|
+
*
|
|
223
|
+
* Returns a Map keyed by server name with the raw config object. Returns
|
|
224
|
+
* an empty Map if the file is missing / unreadable / has no mcpServers
|
|
225
|
+
* section — all of which are normal states and not errors.
|
|
226
|
+
*/
|
|
227
|
+
export function readUserMcpConfig() {
|
|
228
|
+
try {
|
|
229
|
+
const path = join(HOME, ".claude.json");
|
|
230
|
+
if (!existsSync(path)) return new Map();
|
|
231
|
+
const cfg = JSON.parse(readFileSync(path, "utf-8"));
|
|
232
|
+
const servers = cfg?.mcpServers || {};
|
|
233
|
+
return new Map(Object.entries(servers));
|
|
234
|
+
} catch {
|
|
235
|
+
return new Map();
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Given the output of listMcps() and a user-level MCP config map, find
|
|
241
|
+
* MCPs that are registered BOTH as user-level AND as plugin-bundled.
|
|
242
|
+
* The plugin-bundled form shows up as `plugin:<plugin>:<name>` in
|
|
243
|
+
* listMcps output, so a user-level "context7" and a plugin-level
|
|
244
|
+
* "plugin:curdx-flow:context7" are a duplicate pair.
|
|
245
|
+
*
|
|
246
|
+
* Returns array of { name, userConfig, pluginEntry }.
|
|
247
|
+
*/
|
|
248
|
+
export function findDuplicateMcps(mcps, userConfig) {
|
|
249
|
+
const duplicates = [];
|
|
250
|
+
for (const m of mcps) {
|
|
251
|
+
// Only look at plugin-prefixed entries — they're the reference for
|
|
252
|
+
// what's bundled. Check if user has their own non-prefixed version.
|
|
253
|
+
if (m.plugin && userConfig.has(m.name)) {
|
|
254
|
+
duplicates.push({
|
|
255
|
+
name: m.name,
|
|
256
|
+
userConfig: userConfig.get(m.name),
|
|
257
|
+
pluginEntry: m,
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
return duplicates;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* List MCP servers registered with the `claude` CLI. Returns array of
|
|
266
|
+
* { name, plugin, fullName, status, command }
|
|
267
|
+
* where `plugin` is set when the MCP came from a plugin (real name is
|
|
268
|
+
* `plugin:<plugin>:<mcp>`), `name` is the trailing segment, and `fullName`
|
|
269
|
+
* is the original as reported by claude.
|
|
270
|
+
*
|
|
271
|
+
* Fixture captured from `claude mcp list` (2.1.117):
|
|
272
|
+
* Checking MCP server health…
|
|
273
|
+
*
|
|
274
|
+
* plugin:curdx-flow:context7: npx -y @upstash/context7-mcp@latest - ✓ Connected
|
|
275
|
+
* context7: npx -y @upstash/context7-mcp --api-key ... - ✓ Connected
|
|
276
|
+
* claude.ai Gmail: https://gmailmcp... - ✓ Connected
|
|
277
|
+
*
|
|
278
|
+
* `claude mcp list --json` does not exist on 2.1.117 (verified), so this
|
|
279
|
+
* parser is the primary path. It is fixture-tested in test/utils.test.js
|
|
280
|
+
* so format regressions get caught in CI.
|
|
281
|
+
*/
|
|
225
282
|
export function listMcps() {
|
|
226
283
|
const res = runSync("claude", ["mcp", "list"]);
|
|
227
284
|
if (res.code !== 0) return [];
|
|
228
|
-
|
|
285
|
+
return parseMcpList(res.stdout);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/** Exported for testing against a fixed input. */
|
|
289
|
+
export function parseMcpList(output) {
|
|
229
290
|
const mcps = [];
|
|
230
|
-
for (const
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
291
|
+
for (const raw of output.split("\n")) {
|
|
292
|
+
const line = raw.trimEnd();
|
|
293
|
+
if (!line) continue;
|
|
294
|
+
// skip the health-check header line
|
|
295
|
+
if (line.startsWith("Checking") || line.startsWith("checking")) continue;
|
|
296
|
+
// Expected format: "<fullName>: <command-or-url> - <status>"
|
|
297
|
+
// fullName may itself contain colons when prefixed with "plugin:<p>:<m>".
|
|
298
|
+
// Match from the end to find the status sentinel " - ", then split off
|
|
299
|
+
// the name at the first ": " after the identifier prefix.
|
|
300
|
+
const statusSplit = line.lastIndexOf(" - ");
|
|
301
|
+
if (statusSplit === -1) continue;
|
|
302
|
+
const statusRaw = line.slice(statusSplit + 3).trim();
|
|
303
|
+
const beforeStatus = line.slice(0, statusSplit);
|
|
304
|
+
// Find the first ": " that separates name from command. Note the space
|
|
305
|
+
// after the colon — this disambiguates from the colons inside
|
|
306
|
+
// "plugin:foo:bar".
|
|
307
|
+
const nameSplit = beforeStatus.indexOf(": ");
|
|
308
|
+
if (nameSplit === -1) continue;
|
|
309
|
+
const fullName = beforeStatus.slice(0, nameSplit).trim();
|
|
310
|
+
const command = beforeStatus.slice(nameSplit + 2).trim();
|
|
311
|
+
|
|
312
|
+
let plugin = null;
|
|
313
|
+
let name = fullName;
|
|
314
|
+
if (fullName.startsWith("plugin:")) {
|
|
315
|
+
const parts = fullName.split(":");
|
|
316
|
+
if (parts.length >= 3) {
|
|
317
|
+
plugin = parts[1];
|
|
318
|
+
name = parts.slice(2).join(":");
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
const status = /Connected|✓/.test(statusRaw)
|
|
323
|
+
? "connected"
|
|
324
|
+
: /Failed|✗/.test(statusRaw)
|
|
325
|
+
? "failed"
|
|
326
|
+
: "unknown";
|
|
327
|
+
|
|
328
|
+
mcps.push({ name, plugin, fullName, status, command });
|
|
234
329
|
}
|
|
235
330
|
return mcps;
|
|
236
331
|
}
|
|
237
332
|
|
|
238
|
-
// ---------- Paths ----------
|
|
239
|
-
export function pluginCacheDir(pluginName = "curdx-flow", marketplace = "curdx-flow-marketplace") {
|
|
240
|
-
return `${process.env.HOME}/.claude/plugins/cache/${marketplace}/${pluginName}`;
|
|
241
|
-
}
|
|
242
|
-
|
|
243
333
|
// ---------- Runtime PATH guards (bun / uv) ----------
|
|
244
334
|
// claude-mem hard-codes `command: "bun"` in its .mcp.json, but bun installs to
|
|
245
335
|
// ~/.bun/bin which is not on PATH when Claude Code spawns MCP servers
|
|
@@ -247,10 +337,14 @@ export function pluginCacheDir(pluginName = "curdx-flow", marketplace = "curdx-f
|
|
|
247
337
|
// detection + self-healing: create a symlink to the user-level bun install
|
|
248
338
|
// in a PATH-visible directory.
|
|
249
339
|
|
|
250
|
-
import { mkdirSync, symlinkSync, lstatSync, unlinkSync, readlinkSync } from "node:fs";
|
|
251
|
-
|
|
340
|
+
import { existsSync, mkdirSync, symlinkSync, lstatSync, unlinkSync, readlinkSync } from "node:fs";
|
|
341
|
+
import { homedir } from "node:os";
|
|
342
|
+
// `join` already imported at the top of this file.
|
|
252
343
|
|
|
253
|
-
|
|
344
|
+
// os.homedir() is sourced from the OS-level user record and works even
|
|
345
|
+
// when $HOME is empty (non-login shells, some CI containers). See the
|
|
346
|
+
// same rationale in cli/protocols.js.
|
|
347
|
+
const HOME = homedir();
|
|
254
348
|
|
|
255
349
|
/** Candidate bun install locations (priority order) */
|
|
256
350
|
const BUN_CANDIDATES = [
|
package/commands/fast.md
CHANGED
|
@@ -123,6 +123,6 @@ Choosing the right scenario matters more than forcing the flow.
|
|
|
123
123
|
## Forbidden
|
|
124
124
|
|
|
125
125
|
- ✗ Committing without running verification
|
|
126
|
-
- ✗ Changes touching
|
|
126
|
+
- ✗ Changes touching many unrelated files or modules (means it is no longer fast — run the full flow)
|
|
127
127
|
- ✗ Writing library APIs from memory
|
|
128
128
|
- ✗ Skipping the Step 2 5-question clarification (even when "obvious," explicit statement still has value)
|
package/commands/implement.md
CHANGED
|
@@ -15,7 +15,7 @@ Execute spec tasks per tasks.md. Select the best execution strategy based on arg
|
|
|
15
15
|
## Step 1: Preflight Checks
|
|
16
16
|
|
|
17
17
|
```bash
|
|
18
|
-
[ ! -d ".flow" ] && { echo "
|
|
18
|
+
[ ! -d ".flow" ] && { echo "✗ Not a CurDX-Flow project. Run /curdx-flow:init first"; exit 1; }
|
|
19
19
|
|
|
20
20
|
ARGS="$ARGUMENTS"
|
|
21
21
|
SPEC_NAME=""
|
|
@@ -35,10 +35,10 @@ for arg in $ARGS; do
|
|
|
35
35
|
done
|
|
36
36
|
|
|
37
37
|
[ -z "$SPEC_NAME" ] && SPEC_NAME=$(cat .flow/.active-spec 2>/dev/null)
|
|
38
|
-
[ -z "$SPEC_NAME" ] && { echo "
|
|
38
|
+
[ -z "$SPEC_NAME" ] && { echo "✗ No active spec. Run /curdx-flow:start first"; exit 1; }
|
|
39
39
|
|
|
40
40
|
DIR=".flow/specs/$SPEC_NAME"
|
|
41
|
-
[ ! -f "$DIR/tasks.md" ] && { echo "
|
|
41
|
+
[ ! -f "$DIR/tasks.md" ] && { echo "✗ Missing tasks.md. Run /curdx-flow:spec first (or /curdx-flow:spec --phase=tasks to rebuild just the tasks phase)"; exit 1; }
|
|
42
42
|
```
|
|
43
43
|
|
|
44
44
|
## Step 2: Parse Task Characteristics from tasks.md
|
|
@@ -330,7 +330,7 @@ Prerequisites:
|
|
|
330
330
|
|
|
331
331
|
## Step 6: Progress Feedback
|
|
332
332
|
|
|
333
|
-
|
|
333
|
+
At each wave boundary (or periodically during long linear runs), print status:
|
|
334
334
|
|
|
335
335
|
```
|
|
336
336
|
═════ Progress ═════
|
package/commands/init.md
CHANGED
|
@@ -71,9 +71,20 @@ Append (if not already present):
|
|
|
71
71
|
|
|
72
72
|
### Step 5: Health Check
|
|
73
73
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
74
|
+
Do NOT shell out to a new terminal for this step — you are already inside
|
|
75
|
+
Claude Code. Verify inline via the information the plugin already has:
|
|
76
|
+
|
|
77
|
+
- Read `~/.claude/plugins/data/curdx-flow/.deps-checked` (optional — the
|
|
78
|
+
SessionStart hook already refreshes this once per day).
|
|
79
|
+
- If the user asks for the full report, suggest they run
|
|
80
|
+
`npx @curdx/flow doctor` in a separate terminal — don't try to spawn
|
|
81
|
+
it from inside the Claude Code session (output won't render cleanly
|
|
82
|
+
and the user has to alt-tab to see it).
|
|
83
|
+
|
|
84
|
+
Items the CLI doctor covers (for user reference):
|
|
85
|
+
- 2 bundled MCPs (context7 / sequential-thinking) — visible in `claude mcp list`
|
|
86
|
+
- 4 recommended plugins (pua / claude-mem / frontend-design / chrome-devtools-mcp)
|
|
87
|
+
- Runtime PATH guards for `bun` / `uv` (relevant only when claude-mem is installed)
|
|
77
88
|
|
|
78
89
|
### Step 6: Prompt Next Steps
|
|
79
90
|
|
package/commands/review.md
CHANGED
|
@@ -16,8 +16,8 @@ Distinct from `/curdx-flow:verify`:
|
|
|
16
16
|
| Flag | Default | Purpose |
|
|
17
17
|
|------|---------|---------|
|
|
18
18
|
| `--stage=<1\|2\|both>` | `both` | Stage 1 = spec compliance only. Stage 2 = code quality only. `both` = sequential. |
|
|
19
|
-
| `--adversarial` | off | Add an adversarial review pass
|
|
20
|
-
| `--edge-case` | off | Add edge-case hunting across
|
|
19
|
+
| `--adversarial` | off | Add an adversarial review pass across applicable categories (zero findings requires proof-of-checking, not fabrication). |
|
|
20
|
+
| `--edge-case` | off | Add edge-case hunting across applicable categories. Produces a test-gap checklist. |
|
|
21
21
|
|
|
22
22
|
## Preflight
|
|
23
23
|
|
|
@@ -65,7 +65,7 @@ Output: Stage-2 section of the report.
|
|
|
65
65
|
## Optional: adversarial review
|
|
66
66
|
|
|
67
67
|
If `--adversarial`:
|
|
68
|
-
Dispatch `flow-adversary`. It
|
|
68
|
+
Dispatch `flow-adversary`. It scans the applicable categories (Architecture / Implementation / Testing / Security / Maintainability / UX — skip N/A with reason) using `sequential-thinking` proportional to the residual uncertainty, probing:
|
|
69
69
|
1. What's missing?
|
|
70
70
|
2. What's overengineered?
|
|
71
71
|
3. What would break first in production?
|
|
@@ -73,12 +73,12 @@ Dispatch `flow-adversary`. It runs 6 dimensions × 2 rounds of `sequential-think
|
|
|
73
73
|
5. What decision locks us out of a future option?
|
|
74
74
|
6. What would a skeptical reviewer reject?
|
|
75
75
|
|
|
76
|
-
**Zero findings
|
|
76
|
+
**Zero findings requires proof-of-checking, not fabrication** — honest "clean" verdicts are fine if the agent lists what it examined. Per `@${CLAUDE_PLUGIN_ROOT}/gates/adversarial-review-gate.md`.
|
|
77
77
|
|
|
78
78
|
## Optional: edge-case hunting
|
|
79
79
|
|
|
80
80
|
If `--edge-case`:
|
|
81
|
-
Dispatch `flow-edge-hunter` across the
|
|
81
|
+
Dispatch `flow-edge-hunter` across the applicable categories (skip N/A with one-line reason):
|
|
82
82
|
1. Boundary values (0, MAX, empty, one-over-limit)
|
|
83
83
|
2. Concurrency / race conditions
|
|
84
84
|
3. Network failure / partial failure
|
|
@@ -91,6 +91,15 @@ Output: test-gap checklist with suggested test cases.
|
|
|
91
91
|
|
|
92
92
|
## Report
|
|
93
93
|
|
|
94
|
+
**Landing check**: sub-agent responses can be truncated. After dispatching review agents, verify the report actually landed on disk:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
REPORT=".flow/specs/$SPEC_NAME/review-report.md"
|
|
98
|
+
if [ ! -f "$REPORT" ] || [ "$(wc -c < "$REPORT" 2>/dev/null | tr -d ' ')" -lt 300 ]; then
|
|
99
|
+
echo "⚠ Report missing or truncated. Re-dispatching flow-reviewer with a terse 'Write the report now, no narration' prompt."
|
|
100
|
+
fi
|
|
101
|
+
```
|
|
102
|
+
|
|
94
103
|
Consolidated output: `.flow/specs/$SPEC_NAME/review-report.md`:
|
|
95
104
|
|
|
96
105
|
```markdown
|
package/commands/spec.md
CHANGED
|
@@ -82,7 +82,7 @@ Output: `requirements.md` with user stories (US-NN), acceptance criteria (AC-N.N
|
|
|
82
82
|
|
|
83
83
|
### design → `flow-architect`
|
|
84
84
|
Inputs: `research.md` + `requirements.md`.
|
|
85
|
-
Output: `design.md` with architecture decisions (AD-NN), component boundaries, data models, error-path design, mermaid diagrams.
|
|
85
|
+
Output: `design.md` with architecture decisions (AD-NN), component boundaries, data models, error-path design, mermaid diagrams (when they clarify). Uses `sequential-thinking` MCP proportional to the genuine tradeoff surface.
|
|
86
86
|
|
|
87
87
|
### tasks → `flow-planner`
|
|
88
88
|
Inputs: all three prior files + `.flow/PROJECT.md` tech stack.
|
|
@@ -94,10 +94,34 @@ After each phase completes successfully, update `.state.json`:
|
|
|
94
94
|
{
|
|
95
95
|
"phase": "<just-completed-phase>",
|
|
96
96
|
"phase_status": { "<phase>": "completed" },
|
|
97
|
-
"
|
|
97
|
+
"updated": "<ISO8601 timestamp>"
|
|
98
98
|
}
|
|
99
99
|
```
|
|
100
100
|
|
|
101
|
+
### Artifact landing check (mandatory after every phase)
|
|
102
|
+
|
|
103
|
+
Sub-agent responses can be truncated by the model's output-length limit, which means the `Write` tool call for the phase's Markdown artifact may never fire. Do NOT trust the agent's return value alone — always verify the file actually landed.
|
|
104
|
+
|
|
105
|
+
For each phase just dispatched, run:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
ARTIFACT=".flow/specs/$SPEC_NAME/<phase>.md"
|
|
109
|
+
if [ ! -f "$ARTIFACT" ]; then
|
|
110
|
+
echo "⚠ $ARTIFACT did not land. Re-dispatching <phase> agent with an explicit 'write the file' prompt."
|
|
111
|
+
# Re-dispatch the same agent, but in the prompt, front-load:
|
|
112
|
+
# "Your ONLY job is to call the Write tool with the full <phase>.md content now.
|
|
113
|
+
# Do not explain. Do not narrate. Write the file and stop."
|
|
114
|
+
# This pattern produces an artifact even when prior verbosity caused truncation.
|
|
115
|
+
fi
|
|
116
|
+
|
|
117
|
+
# Minimum-size sanity check — if the file is <500 bytes, the write likely truncated
|
|
118
|
+
if [ -f "$ARTIFACT" ] && [ "$(wc -c < "$ARTIFACT" | tr -d ' ')" -lt 500 ]; then
|
|
119
|
+
echo "⚠ $ARTIFACT looks truncated (<500 bytes). Re-dispatching to complete it."
|
|
120
|
+
fi
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Only advance `.state.json.phase` after both the file exists AND passes the size sanity check. If a re-dispatch also fails to produce the artifact, stop and surface the issue to the user instead of silently advancing — that prevents later phases from consuming an empty upstream file.
|
|
124
|
+
|
|
101
125
|
## Optional planning review
|
|
102
126
|
|
|
103
127
|
If `--review` (or `--review=<dims>`) is present:
|
package/commands/start.md
CHANGED
|
@@ -32,23 +32,45 @@ Entry point for every feature. Works in four modes depending on flags and existi
|
|
|
32
32
|
|
|
33
33
|
## Flag parsing
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
35
|
+
**Do not shell-split `$ARGUMENTS`.** It is a user-supplied string that may
|
|
36
|
+
contain quoted substrings with spaces, `$`-signs, or embedded quotes.
|
|
37
|
+
`xargs`, naive `awk`, and `sed`-based quote stripping all mis-parse at
|
|
38
|
+
least one of those cases (e.g. `my-feature "Fix user's login bug"` breaks
|
|
39
|
+
`xargs: unmatched quote`). Parse the string as a model task instead:
|
|
40
|
+
|
|
41
|
+
1. **Flags** (order-independent, each is self-delimited):
|
|
42
|
+
- `--resume` / `--list` — boolean presence
|
|
43
|
+
- `--mode=<fast|standard|enterprise>` — value after `=`
|
|
44
|
+
Detect each with a single regex over the full `$ARGUMENTS` string and
|
|
45
|
+
remove the matched span from your working copy. Flags not in the list
|
|
46
|
+
above are errors — surface them to the user.
|
|
47
|
+
|
|
48
|
+
2. **Positional args** (after flags removed):
|
|
49
|
+
- First whitespace-separated token → `SPEC_NAME` (kebab-case `[a-z0-9-]+`).
|
|
50
|
+
- Remainder of the string, trimmed and with one layer of outer `"..."`
|
|
51
|
+
or `'...'` quotes stripped → `GOAL`. Preserve inner quotes as-is.
|
|
52
|
+
|
|
53
|
+
3. If `SPEC_NAME` does not match `^[a-z0-9][a-z0-9-]*$` (per
|
|
54
|
+
`schemas/spec-state.schema.json`), stop and ask the user to pick a
|
|
55
|
+
valid kebab-case name.
|
|
56
|
+
|
|
57
|
+
Mode must be `fast`, `standard`, or `enterprise`. Invalid → default to
|
|
58
|
+
`standard` with a warning.
|
|
59
|
+
|
|
60
|
+
Example inputs and their parse:
|
|
61
|
+
|
|
62
|
+
| `$ARGUMENTS` | SPEC_NAME | GOAL | flags |
|
|
63
|
+
|-------------------------------------------------|--------------|-------------------------------|---------------|
|
|
64
|
+
| `my-feature "Add JWT auth"` | `my-feature` | `Add JWT auth` | — |
|
|
65
|
+
| `my-feature --mode=fast "Add JWT auth"` | `my-feature` | `Add JWT auth` | mode=fast |
|
|
66
|
+
| `my-feature "Fix user's login bug"` | `my-feature` | `Fix user's login bug` | — |
|
|
67
|
+
| `--list` | — | — | list=true |
|
|
68
|
+
| `--resume` | — | — | resume=true |
|
|
47
69
|
|
|
48
70
|
## Branch logic
|
|
49
71
|
|
|
50
72
|
### Branch A: `--list`
|
|
51
|
-
Enumerate every directory under `.flow/specs/`, read each `.state.json` for `phase` and `
|
|
73
|
+
Enumerate every directory under `.flow/specs/`, read each `.state.json` for `phase` and `updated` (per `schemas/spec-state.schema.json`), print a numbered list, then `AskUserQuestion` to pick one. Picking sets `.flow/.active-spec` and exits.
|
|
52
74
|
|
|
53
75
|
### Branch B: `--resume` (no name)
|
|
54
76
|
Read `.flow/.active-spec`. If it points to a valid spec dir, report its current phase and next suggested command (`/curdx-flow:spec` if incomplete, `/curdx-flow:implement` if tasks ready). If `.active-spec` is empty or stale, fall back to Branch A.
|
|
@@ -61,17 +83,25 @@ Create a new spec:
|
|
|
61
83
|
|
|
62
84
|
```bash
|
|
63
85
|
mkdir -p ".flow/specs/$SPEC_NAME"
|
|
86
|
+
# NOTE: field names MUST match schemas/spec-state.schema.json:
|
|
87
|
+
# - spec_name (not "spec")
|
|
88
|
+
# - created (date, not "created_at")
|
|
89
|
+
# - updated (date-time, not "updated_at")
|
|
90
|
+
# - phase must be one of the enum values; the initial phase is "research"
|
|
91
|
+
# (there is no "created" phase — that was schema drift pre-beta.9)
|
|
92
|
+
# - version is required
|
|
64
93
|
cat > ".flow/specs/$SPEC_NAME/.state.json" <<JSON
|
|
65
94
|
{
|
|
66
|
-
"
|
|
95
|
+
"version": "1.0",
|
|
96
|
+
"spec_name": "$SPEC_NAME",
|
|
67
97
|
"goal": "$GOAL",
|
|
68
98
|
"mode": "$FLAG_MODE",
|
|
69
|
-
"phase": "
|
|
99
|
+
"phase": "research",
|
|
70
100
|
"phase_status": {},
|
|
71
101
|
"strategy": "auto",
|
|
72
102
|
"execute_state": {},
|
|
73
|
-
"
|
|
74
|
-
"
|
|
103
|
+
"created": "$(date -u +%Y-%m-%d)",
|
|
104
|
+
"updated": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
75
105
|
}
|
|
76
106
|
JSON
|
|
77
107
|
echo "$SPEC_NAME" > .flow/.active-spec
|
package/commands/verify.md
CHANGED
|
@@ -67,6 +67,19 @@ If `--strict`:
|
|
|
67
67
|
|
|
68
68
|
### Step 4: Produce `verification-report.md`
|
|
69
69
|
|
|
70
|
+
**Landing check**: sub-agent responses can be truncated by the model's output-length limit. After dispatching `flow-verifier`, verify the report actually landed:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
REPORT=".flow/specs/$SPEC_NAME/verification-report.md"
|
|
74
|
+
if [ ! -f "$REPORT" ] || [ "$(wc -c < "$REPORT" 2>/dev/null | tr -d ' ')" -lt 300 ]; then
|
|
75
|
+
echo "⚠ Report missing or truncated. Re-dispatching flow-verifier with a terse 'write the report now' prompt."
|
|
76
|
+
# Re-dispatch pattern:
|
|
77
|
+
# "Your only job right now is to Write the verification-report.md using the
|
|
78
|
+
# findings you already gathered. Do not re-scan. Do not narrate. Write
|
|
79
|
+
# the file and stop."
|
|
80
|
+
fi
|
|
81
|
+
```
|
|
82
|
+
|
|
70
83
|
Write to `.flow/specs/$SPEC_NAME/verification-report.md`:
|
|
71
84
|
|
|
72
85
|
```markdown
|
|
@@ -33,19 +33,19 @@ A reviewer agent's output of "everything looks fine, no issues found" is an **in
|
|
|
33
33
|
- "Looks good" is usually confirmation bias (the agent only checked the obvious)
|
|
34
34
|
- AI tends to please the user ("great job!") — fight this tendency
|
|
35
35
|
|
|
36
|
-
**Forced actions**:
|
|
37
|
-
1.
|
|
38
|
-
2.
|
|
39
|
-
|
|
40
|
-
-
|
|
41
|
-
-
|
|
42
|
-
|
|
36
|
+
**Forced actions when the agent reports "no issues"**:
|
|
37
|
+
1. Automatically trigger a second round framed as "what would a senior skeptic reject in this PR?"
|
|
38
|
+
2. If both rounds still honestly yield no findings, the agent must emit a **proof-of-checking report**:
|
|
39
|
+
- Every category it examined (with "N/A" for categories that don't apply)
|
|
40
|
+
- For each examined category, the specific code/file locations inspected
|
|
41
|
+
- Counterfactual hypotheses of "what this would look like if there were a problem" and why that signature is absent
|
|
42
|
+
3. Fabricating findings to avoid the proof-of-checking step is a violation of L3 red line #2 (fact-driven). Better to emit "clean verdict with proof" than invent issues.
|
|
43
43
|
|
|
44
44
|
---
|
|
45
45
|
|
|
46
|
-
### Rule 2:
|
|
46
|
+
### Rule 2: Coverage proportional to feature scope
|
|
47
47
|
|
|
48
|
-
A complete adversarial review
|
|
48
|
+
A complete adversarial review covers every category that applies to the feature, marks the rest as N/A with reason. Number of findings per category is proportional to real issues, not a quota:
|
|
49
49
|
|
|
50
50
|
1. **Architecture layer**: Are decisions sound? Future-extensible? Lock-in risks?
|
|
51
51
|
2. **Implementation layer**: Code quality? Error handling? Performance?
|
|
@@ -86,22 +86,22 @@ Not allowed:
|
|
|
86
86
|
Input: object under review (code range / spec / PR diff)
|
|
87
87
|
↓
|
|
88
88
|
Round 1 (agent self-analysis):
|
|
89
|
-
- Use sequential-thinking
|
|
90
|
-
- Scan
|
|
89
|
+
- Use sequential-thinking proportional to the surface being probed
|
|
90
|
+
- Scan each applicable category; mark N/A ones with reason
|
|
91
91
|
- Output findings list
|
|
92
92
|
↓
|
|
93
93
|
Decision:
|
|
94
|
-
-
|
|
95
|
-
-
|
|
94
|
+
- Any real findings? → output report with findings
|
|
95
|
+
- Zero findings after honest Round 1? → force Round 2 framed as skeptic
|
|
96
96
|
↓
|
|
97
97
|
Round 2 (deep analysis):
|
|
98
|
-
- sequential-thinking
|
|
98
|
+
- sequential-thinking proportional to residual uncertainty
|
|
99
99
|
- Focus on "seemingly no issues" parts (trust but verify)
|
|
100
|
-
-
|
|
100
|
+
- Optionally introduce external perspectives (read issues from similar projects)
|
|
101
101
|
↓
|
|
102
102
|
Decision:
|
|
103
|
-
- Still
|
|
104
|
-
-
|
|
103
|
+
- Still zero findings? → agent must emit proof-of-checking report (NOT invent findings)
|
|
104
|
+
- Findings exist? → output report
|
|
105
105
|
↓
|
|
106
106
|
Output: review-report.md
|
|
107
107
|
```
|
|
@@ -190,10 +190,10 @@ Fix loop:
|
|
|
190
190
|
|
|
191
191
|
## Failure Recovery
|
|
192
192
|
|
|
193
|
-
If after 2
|
|
193
|
+
If after Round 2 the honest verdict is still zero findings, emit a proof-of-checking report (do NOT fabricate to hit a quota — there is no quota):
|
|
194
194
|
|
|
195
195
|
```markdown
|
|
196
|
-
## Adversarial Review —
|
|
196
|
+
## Adversarial Review — Proof of Checking (zero findings)
|
|
197
197
|
|
|
198
198
|
I have examined the following dimensions across 2 rounds of analysis:
|
|
199
199
|
|
package/gates/devex-gate.md
CHANGED
|
@@ -195,12 +195,12 @@ Reading these test names = reading API behavior documentation.
|
|
|
195
195
|
|
|
196
196
|
### Agent Automatic
|
|
197
197
|
|
|
198
|
-
When `flow-ux-designer` / `flow-reviewer` applies this gate, use sequential-thinking
|
|
198
|
+
When `flow-ux-designer` / `flow-reviewer` applies this gate, use sequential-thinking proportional to the complexity of the codebase being scanned.
|
|
199
199
|
|
|
200
200
|
### Human Review
|
|
201
201
|
|
|
202
202
|
Attach a DevEx checklist at PR time:
|
|
203
|
-
- [ ] Clear naming (
|
|
203
|
+
- [ ] Clear naming (re-read until obvious to a new maintainer)
|
|
204
204
|
- [ ] Critical comments exist
|
|
205
205
|
- [ ] Consistent structure
|
|
206
206
|
- [ ] Actionable error messages
|
|
@@ -210,7 +210,7 @@ Attach a DevEx checklist at PR time:
|
|
|
210
210
|
|
|
211
211
|
## Scoring
|
|
212
212
|
|
|
213
|
-
|
|
213
|
+
Score each **applicable** dimension 0-10 (N/A dimensions are excluded from the total):
|
|
214
214
|
|
|
215
215
|
```
|
|
216
216
|
10 = best practice
|
|
@@ -220,8 +220,7 @@ Each dimension 0-10 points:
|
|
|
220
220
|
0 = serious issue
|
|
221
221
|
```
|
|
222
222
|
|
|
223
|
-
|
|
224
|
-
Total < 40 = blocked, improvement required.
|
|
223
|
+
Emit the per-dimension scores with evidence. The gate itself does not block on a numeric threshold; it surfaces the weaknesses for the user (or the reviewing agent) to decide whether any of them rise to a blocker. A single 0/10 on a material dimension is a blocker regardless of the total.
|
|
225
224
|
|
|
226
225
|
---
|
|
227
226
|
|
package/gates/edge-case-gate.md
CHANGED
|
@@ -104,7 +104,7 @@ Q4. If no test, what test should be added to cover it?
|
|
|
104
104
|
Input: object under review (function / component / API) + requirements + tests
|
|
105
105
|
↓
|
|
106
106
|
For each category (1-7):
|
|
107
|
-
1. Use sequential-thinking to list
|
|
107
|
+
1. Use sequential-thinking to list every plausible edge scenario for this category — stop when you've covered the real risk surface, don't pad to a quota, don't fabricate scenarios that won't occur in production
|
|
108
108
|
2. Check whether each scenario has corresponding coverage in tests
|
|
109
109
|
3. Add uncovered ones to the "gap list"
|
|
110
110
|
↓
|