npm - @curdx/flow - Versions diffs - 2.0.0-beta.1 → 2.0.0-beta.11 - Mend

@curdx/flow 2.0.0-beta.1 → 2.0.0-beta.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

package/.claude-plugin/marketplace.json +1 -1
package/.claude-plugin/plugin.json +3 -10
package/CHANGELOG.md +61 -0
package/README.zh.md +2 -2
package/agent-preamble/preamble.md +81 -11
package/agents/flow-adversary.md +40 -55
package/agents/flow-architect.md +23 -10
package/agents/flow-debugger.md +2 -2
package/agents/flow-edge-hunter.md +20 -6
package/agents/flow-executor.md +3 -3
package/agents/flow-planner.md +51 -48
package/agents/flow-product-designer.md +14 -1
package/agents/flow-qa-engineer.md +1 -1
package/agents/flow-researcher.md +17 -2
package/agents/flow-reviewer.md +5 -1
package/agents/flow-security-auditor.md +1 -1
package/agents/flow-triage-analyst.md +1 -1
package/agents/flow-ui-researcher.md +2 -2
package/agents/flow-ux-designer.md +1 -1
package/agents/flow-verifier.md +47 -14
package/bin/curdx-flow.js +13 -1
package/cli/doctor.js +73 -13
package/cli/install.js +62 -36
package/cli/protocols.js +63 -10
package/cli/registry.js +73 -0
package/cli/uninstall.js +9 -11
package/cli/upgrade.js +6 -10
package/cli/utils.js +150 -56
package/commands/fast.md +1 -1
package/commands/implement.md +4 -4
package/commands/init.md +14 -3
package/commands/review.md +14 -5
package/commands/spec.md +26 -2
package/commands/start.md +47 -17
package/commands/verify.md +13 -0
package/gates/adversarial-review-gate.md +19 -19
package/gates/devex-gate.md +4 -5
package/gates/edge-case-gate.md +1 -1
package/hooks/hooks.json +0 -11
package/hooks/scripts/quick-mode-guard.sh +12 -9
package/hooks/scripts/session-start.sh +1 -1
package/hooks/scripts/stop-watcher.sh +25 -15
package/knowledge/execution-strategies.md +6 -5
package/knowledge/spec-driven-development.md +8 -7
package/knowledge/two-stage-review.md +4 -3
package/package.json +4 -2
package/skills/brownfield-index/SKILL.md +62 -0
package/skills/browser-qa/SKILL.md +50 -0
package/skills/epic/SKILL.md +68 -0
package/skills/security-audit/SKILL.md +50 -0
package/skills/ui-sketch/SKILL.md +49 -0
package/templates/config.json.tmpl +1 -1
package/templates/design.md.tmpl +32 -112
package/templates/requirements.md.tmpl +25 -43
package/templates/research.md.tmpl +37 -68
package/templates/tasks.md.tmpl +27 -84
package/hooks/scripts/fail-tracker.sh +0 -31

package/cli/utils.js CHANGED Viewed

@@ -108,39 +108,6 @@ export function confirm(message, defaultYes = true) {
   });
 }
-/**
- * Ask user to pick from a list. Returns selected value or null if aborted.
- */
-export function select(message, choices, defaultIndex = 0) {
-  return new Promise((resolve) => {
-    console.log(`${color.cyan("?")}  ${message}`);
-    choices.forEach((ch, i) => {
-      const marker = i === defaultIndex ? color.green("▸") : " ";
-      console.log(`   ${marker} ${color.bold(String(i + 1))}. ${ch.label}`);
-    });
-    const rl = createInterface({
-      input: process.stdin,
-      output: process.stdout,
-    });
-    rl.question(
-      `   ${color.dim(`(default: ${defaultIndex + 1}, q to abort) `)}`,
-      (ans) => {
-        rl.close();
-        const v = ans.trim().toLowerCase();
-        if (v === "q") return resolve(null);
-        if (v === "") return resolve(choices[defaultIndex].value);
-        const n = parseInt(v, 10);
-        if (Number.isInteger(n) && n >= 1 && n <= choices.length) {
-          return resolve(choices[n - 1].value);
-        }
-        console.log(color.yellow("  (invalid, using default)"));
-        resolve(choices[defaultIndex].value);
-      }
-    );
-  });
-}
 /**
  * Multi-select (checkbox-style via comma-separated input).
  * Returns array of selected values.
@@ -199,47 +166,170 @@ export function claudeVersion() {
   return m ? m[1] : res.stdout.trim().split("\n")[0];
 }
-/** List installed plugins via `claude plugin list`. Returns array of { name, version, status }. */
+/**
+ * List installed plugins. Prefers the structured `claude plugin list --json`
+ * output (stable machine-readable format; confirmed present in claude
+ * 2.1.117+). Falls back to parsing the human-readable stream-text output
+ * for older CLI versions, but warns that parser is brittle.
+ *
+ * Returns array of { name, version, status }.
+ */
 export function listPlugins() {
-  const res = runSync("claude", ["plugin", "list"]);
-  if (res.code !== 0) return [];
-  const out = res.stdout;
-  const plugins = [];
-  // Parse format like:
+  // Preferred: structured JSON output.
+  const j = runSync("claude", ["plugin", "list", "--json"]);
+  if (j.code === 0 && j.stdout.trim().startsWith("[")) {
+    try {
+      const arr = JSON.parse(j.stdout);
+      return arr.map((p) => ({
+        // id has form "name@marketplace" — name is stable for dedup/lookup.
+        name: String(p.id || "").split("@")[0],
+        version: p.version,
+        status: p.enabled === false ? "disabled" : "enabled",
+        raw: JSON.stringify(p),
+      }));
+    } catch {
+      // JSON parse failed — fall through to legacy text parser.
+    }
+  }
+  // Legacy fallback: parse the human-readable format.
   //   ❯ curdx-flow@curdx-flow-marketplace
   //     Version: 1.1.1
-  //     Scope: user
   //     Status: ✔ enabled
-  const blocks = out.split(/\n\s*❯\s*/).slice(1);
+  // Fragile — matches unicode markers. Kept only for older claude CLIs.
+  const res = runSync("claude", ["plugin", "list"]);
+  if (res.code !== 0) return [];
+  const plugins = [];
+  const blocks = res.stdout.split(/\n\s*❯\s*/).slice(1);
   for (const block of blocks) {
     const lines = block.split("\n");
     const name = lines[0].trim().split("@")[0];
     const version = (block.match(/Version:\s*(\S+)/) || [])[1];
-    const status = block.includes("✔") ? "enabled" : block.includes("✘") ? "failed" : "unknown";
+    const status = block.includes("✔")
+      ? "enabled"
+      : block.includes("✘")
+        ? "failed"
+        : "unknown";
     plugins.push({ name, version, status, raw: block });
   }
   return plugins;
 }
-/** List MCPs via `claude mcp list`. Returns array of { name, status }. */
+/**
+ * Read the user-level MCP registrations from ~/.claude.json. These are the
+ * MCPs the user added manually via `claude mcp add …` — distinct from
+ * plugin-bundled MCPs (which live in plugin.json).
+ *
+ * Returns a Map keyed by server name with the raw config object. Returns
+ * an empty Map if the file is missing / unreadable / has no mcpServers
+ * section — all of which are normal states and not errors.
+ */
+export function readUserMcpConfig() {
+  try {
+    const path = join(HOME, ".claude.json");
+    if (!existsSync(path)) return new Map();
+    const cfg = JSON.parse(readFileSync(path, "utf-8"));
+    const servers = cfg?.mcpServers || {};
+    return new Map(Object.entries(servers));
+  } catch {
+    return new Map();
+  }
+}
+/**
+ * Given the output of listMcps() and a user-level MCP config map, find
+ * MCPs that are registered BOTH as user-level AND as plugin-bundled.
+ * The plugin-bundled form shows up as `plugin:<plugin>:<name>` in
+ * listMcps output, so a user-level "context7" and a plugin-level
+ * "plugin:curdx-flow:context7" are a duplicate pair.
+ *
+ * Returns array of { name, userConfig, pluginEntry }.
+ */
+export function findDuplicateMcps(mcps, userConfig) {
+  const duplicates = [];
+  for (const m of mcps) {
+    // Only look at plugin-prefixed entries — they're the reference for
+    // what's bundled. Check if user has their own non-prefixed version.
+    if (m.plugin && userConfig.has(m.name)) {
+      duplicates.push({
+        name: m.name,
+        userConfig: userConfig.get(m.name),
+        pluginEntry: m,
+      });
+    }
+  }
+  return duplicates;
+}
+/**
+ * List MCP servers registered with the `claude` CLI. Returns array of
+ *   { name, plugin, fullName, status, command }
+ * where `plugin` is set when the MCP came from a plugin (real name is
+ * `plugin:<plugin>:<mcp>`), `name` is the trailing segment, and `fullName`
+ * is the original as reported by claude.
+ *
+ * Fixture captured from `claude mcp list` (2.1.117):
+ *   Checking MCP server health…
+ *
+ *   plugin:curdx-flow:context7: npx -y @upstash/context7-mcp@latest - ✓ Connected
+ *   context7: npx -y @upstash/context7-mcp --api-key ... - ✓ Connected
+ *   claude.ai Gmail: https://gmailmcp... - ✓ Connected
+ *
+ * `claude mcp list --json` does not exist on 2.1.117 (verified), so this
+ * parser is the primary path. It is fixture-tested in test/utils.test.js
+ * so format regressions get caught in CI.
+ */
 export function listMcps() {
   const res = runSync("claude", ["mcp", "list"]);
   if (res.code !== 0) return [];
-  const lines = res.stdout.split("\n");
+  return parseMcpList(res.stdout);
+}
+/** Exported for testing against a fixed input. */
+export function parseMcpList(output) {
   const mcps = [];
-  for (const line of lines) {
-    // Rough parse — adjust if format differs
-    const m = line.match(/^\s*([a-z0-9-]+)\s*[:\-]/i);
-    if (m) mcps.push({ name: m[1], status: "registered" });
+  for (const raw of output.split("\n")) {
+    const line = raw.trimEnd();
+    if (!line) continue;
+    // skip the health-check header line
+    if (line.startsWith("Checking") || line.startsWith("checking")) continue;
+    // Expected format: "<fullName>: <command-or-url> - <status>"
+    // fullName may itself contain colons when prefixed with "plugin:<p>:<m>".
+    // Match from the end to find the status sentinel " - ", then split off
+    // the name at the first ": " after the identifier prefix.
+    const statusSplit = line.lastIndexOf(" - ");
+    if (statusSplit === -1) continue;
+    const statusRaw = line.slice(statusSplit + 3).trim();
+    const beforeStatus = line.slice(0, statusSplit);
+    // Find the first ": " that separates name from command. Note the space
+    // after the colon — this disambiguates from the colons inside
+    // "plugin:foo:bar".
+    const nameSplit = beforeStatus.indexOf(": ");
+    if (nameSplit === -1) continue;
+    const fullName = beforeStatus.slice(0, nameSplit).trim();
+    const command = beforeStatus.slice(nameSplit + 2).trim();
+    let plugin = null;
+    let name = fullName;
+    if (fullName.startsWith("plugin:")) {
+      const parts = fullName.split(":");
+      if (parts.length >= 3) {
+        plugin = parts[1];
+        name = parts.slice(2).join(":");
+      }
+    }
+    const status = /Connected|✓/.test(statusRaw)
+      ? "connected"
+      : /Failed|✗/.test(statusRaw)
+        ? "failed"
+        : "unknown";
+    mcps.push({ name, plugin, fullName, status, command });
   }
   return mcps;
 }
-// ---------- Paths ----------
-export function pluginCacheDir(pluginName = "curdx-flow", marketplace = "curdx-flow-marketplace") {
-  return `${process.env.HOME}/.claude/plugins/cache/${marketplace}/${pluginName}`;
-}
 // ---------- Runtime PATH guards (bun / uv) ----------
 // claude-mem hard-codes `command: "bun"` in its .mcp.json, but bun installs to
 // ~/.bun/bin which is not on PATH when Claude Code spawns MCP servers
@@ -247,10 +337,14 @@ export function pluginCacheDir(pluginName = "curdx-flow", marketplace = "curdx-f
 // detection + self-healing: create a symlink to the user-level bun install
 // in a PATH-visible directory.
-import { mkdirSync, symlinkSync, lstatSync, unlinkSync, readlinkSync } from "node:fs";
-// `existsSync` and `join` already imported at the top of this file.
+import { existsSync, mkdirSync, symlinkSync, lstatSync, unlinkSync, readlinkSync } from "node:fs";
+import { homedir } from "node:os";
+// `join` already imported at the top of this file.
-const HOME = process.env.HOME || "";
+// os.homedir() is sourced from the OS-level user record and works even
+// when $HOME is empty (non-login shells, some CI containers). See the
+// same rationale in cli/protocols.js.
+const HOME = homedir();
 /** Candidate bun install locations (priority order) */
 const BUN_CANDIDATES = [

package/commands/fast.md CHANGED Viewed

@@ -123,6 +123,6 @@ Choosing the right scenario matters more than forcing the flow.
 ## Forbidden
 - ✗ Committing without running verification
-- ✗ Changes touching more than 5 files (means it is no longer fast — run the full flow)
+- ✗ Changes touching many unrelated files or modules (means it is no longer fast — run the full flow)
 - ✗ Writing library APIs from memory
 - ✗ Skipping the Step 2 5-question clarification (even when "obvious," explicit statement still has value)

package/commands/implement.md CHANGED Viewed

@@ -15,7 +15,7 @@ Execute spec tasks per tasks.md. Select the best execution strategy based on arg
 ## Step 1: Preflight Checks
 ```bash
-[ ! -d ".flow" ] && { echo "❌ Not a CurDX-Flow project. Run /curdx-flow:init first"; exit 1; }
+[ ! -d ".flow" ] && { echo "✗ Not a CurDX-Flow project. Run /curdx-flow:init first"; exit 1; }
 ARGS="$ARGUMENTS"
 SPEC_NAME=""
@@ -35,10 +35,10 @@ for arg in $ARGS; do
 done
 [ -z "$SPEC_NAME" ] && SPEC_NAME=$(cat .flow/.active-spec 2>/dev/null)
-[ -z "$SPEC_NAME" ] && { echo "❌ No active spec. Run /curdx-flow:start first"; exit 1; }
+[ -z "$SPEC_NAME" ] && { echo "✗ No active spec. Run /curdx-flow:start first"; exit 1; }
 DIR=".flow/specs/$SPEC_NAME"
-[ ! -f "$DIR/tasks.md" ] && { echo "❌ Missing tasks.md. Run /curdx-flow:spec first (or /curdx-flow:spec --phase=tasks to rebuild just the tasks phase)"; exit 1; }
+[ ! -f "$DIR/tasks.md" ] && { echo "✗ Missing tasks.md. Run /curdx-flow:spec first (or /curdx-flow:spec --phase=tasks to rebuild just the tasks phase)"; exit 1; }
 ```
 ## Step 2: Parse Task Characteristics from tasks.md
@@ -330,7 +330,7 @@ Prerequisites:
 ## Step 6: Progress Feedback
-Every 5 tasks or every wave, print status:
+At each wave boundary (or periodically during long linear runs), print status:
 ```
 ═════ Progress ═════

package/commands/init.md CHANGED Viewed

@@ -71,9 +71,20 @@ Append (if not already present):
 ### Step 5: Health Check
-Run `npx @curdx/flow doctor` (or inline its checks) to verify:
-- 3 MCPs started (context7 / sequential-thinking / chrome-devtools)
-- Recommended plugins status (pua / claude-mem / frontend-design)
+Do NOT shell out to a new terminal for this step — you are already inside
+Claude Code. Verify inline via the information the plugin already has:
+- Read `~/.claude/plugins/data/curdx-flow/.deps-checked` (optional — the
+  SessionStart hook already refreshes this once per day).
+- If the user asks for the full report, suggest they run
+  `npx @curdx/flow doctor` in a separate terminal — don't try to spawn
+  it from inside the Claude Code session (output won't render cleanly
+  and the user has to alt-tab to see it).
+Items the CLI doctor covers (for user reference):
+- 2 bundled MCPs (context7 / sequential-thinking) — visible in `claude mcp list`
+- 4 recommended plugins (pua / claude-mem / frontend-design / chrome-devtools-mcp)
+- Runtime PATH guards for `bun` / `uv` (relevant only when claude-mem is installed)
 ### Step 6: Prompt Next Steps

package/commands/review.md CHANGED Viewed

@@ -16,8 +16,8 @@ Distinct from `/curdx-flow:verify`:
 | Flag | Default | Purpose |
 |------|---------|---------|
 | `--stage=<1\|2\|both>` | `both` | Stage 1 = spec compliance only. Stage 2 = code quality only. `both` = sequential. |
-| `--adversarial` | off | Add an adversarial review pass (6 dimensions × 2 sequential-thinking rounds). Zero-findings forbidden. |
-| `--edge-case` | off | Add edge-case hunting across the 7 categories. Produces a test-gap checklist. |
+| `--adversarial` | off | Add an adversarial review pass across applicable categories (zero findings requires proof-of-checking, not fabrication). |
+| `--edge-case` | off | Add edge-case hunting across applicable categories. Produces a test-gap checklist. |
 ## Preflight
@@ -65,7 +65,7 @@ Output: Stage-2 section of the report.
 ## Optional: adversarial review
 If `--adversarial`:
-Dispatch `flow-adversary`. It runs 6 dimensions × 2 rounds of `sequential-thinking`:
+Dispatch `flow-adversary`. It scans the applicable categories (Architecture / Implementation / Testing / Security / Maintainability / UX — skip N/A with reason) using `sequential-thinking` proportional to the residual uncertainty, probing:
 1. What's missing?
 2. What's overengineered?
 3. What would break first in production?
@@ -73,12 +73,12 @@ Dispatch `flow-adversary`. It runs 6 dimensions × 2 rounds of `sequential-think
 5. What decision locks us out of a future option?
 6. What would a skeptical reviewer reject?
-**Zero findings are forbidden** — if the agent reports "all good", re-dispatch with stronger skepticism. Per `@${CLAUDE_PLUGIN_ROOT}/gates/adversarial-review-gate.md`.
+**Zero findings requires proof-of-checking, not fabrication** — honest "clean" verdicts are fine if the agent lists what it examined. Per `@${CLAUDE_PLUGIN_ROOT}/gates/adversarial-review-gate.md`.
 ## Optional: edge-case hunting
 If `--edge-case`:
-Dispatch `flow-edge-hunter` across the 7 categories:
+Dispatch `flow-edge-hunter` across the applicable categories (skip N/A with one-line reason):
 1. Boundary values (0, MAX, empty, one-over-limit)
 2. Concurrency / race conditions
 3. Network failure / partial failure
@@ -91,6 +91,15 @@ Output: test-gap checklist with suggested test cases.
 ## Report
+**Landing check**: sub-agent responses can be truncated. After dispatching review agents, verify the report actually landed on disk:
+```bash
+REPORT=".flow/specs/$SPEC_NAME/review-report.md"
+if [ ! -f "$REPORT" ] || [ "$(wc -c < "$REPORT" 2>/dev/null | tr -d ' ')" -lt 300 ]; then
+  echo "⚠ Report missing or truncated. Re-dispatching flow-reviewer with a terse 'Write the report now, no narration' prompt."
+fi
+```
 Consolidated output: `.flow/specs/$SPEC_NAME/review-report.md`:
 ```markdown

package/commands/spec.md CHANGED Viewed

@@ -82,7 +82,7 @@ Output: `requirements.md` with user stories (US-NN), acceptance criteria (AC-N.N
 ### design → `flow-architect`
 Inputs: `research.md` + `requirements.md`.
-Output: `design.md` with architecture decisions (AD-NN), component boundaries, data models, error-path design, mermaid diagrams. Must use `sequential-thinking` MCP (≥8 thoughts).
+Output: `design.md` with architecture decisions (AD-NN), component boundaries, data models, error-path design, mermaid diagrams (when they clarify). Uses `sequential-thinking` MCP proportional to the genuine tradeoff surface.
 ### tasks → `flow-planner`
 Inputs: all three prior files + `.flow/PROJECT.md` tech stack.
@@ -94,10 +94,34 @@ After each phase completes successfully, update `.state.json`:
 {
   "phase": "<just-completed-phase>",
   "phase_status": { "<phase>": "completed" },
-  "updated_at": "<ISO8601 timestamp>"
+  "updated": "<ISO8601 timestamp>"
 }
 ```
+### Artifact landing check (mandatory after every phase)
+Sub-agent responses can be truncated by the model's output-length limit, which means the `Write` tool call for the phase's Markdown artifact may never fire. Do NOT trust the agent's return value alone — always verify the file actually landed.
+For each phase just dispatched, run:
+```bash
+ARTIFACT=".flow/specs/$SPEC_NAME/<phase>.md"
+if [ ! -f "$ARTIFACT" ]; then
+  echo "⚠ $ARTIFACT did not land. Re-dispatching <phase> agent with an explicit 'write the file' prompt."
+  # Re-dispatch the same agent, but in the prompt, front-load:
+  #   "Your ONLY job is to call the Write tool with the full <phase>.md content now.
+  #    Do not explain. Do not narrate. Write the file and stop."
+  # This pattern produces an artifact even when prior verbosity caused truncation.
+fi
+# Minimum-size sanity check — if the file is <500 bytes, the write likely truncated
+if [ -f "$ARTIFACT" ] && [ "$(wc -c < "$ARTIFACT" | tr -d ' ')" -lt 500 ]; then
+  echo "⚠ $ARTIFACT looks truncated (<500 bytes). Re-dispatching to complete it."
+fi
+```
+Only advance `.state.json.phase` after both the file exists AND passes the size sanity check. If a re-dispatch also fails to produce the artifact, stop and surface the issue to the user instead of silently advancing — that prevents later phases from consuming an empty upstream file.
 ## Optional planning review
 If `--review` (or `--review=<dims>`) is present:

package/commands/start.md CHANGED Viewed

@@ -32,23 +32,45 @@ Entry point for every feature. Works in four modes depending on flags and existi
 ## Flag parsing
-```bash
-FLAG_RESUME=$(echo "$ARGUMENTS" | grep -q -- '--resume' && echo 1 || echo 0)
-FLAG_LIST=$(echo "$ARGUMENTS" | grep -q -- '--list' && echo 1 || echo 0)
-FLAG_MODE=$(echo "$ARGUMENTS" | grep -oP -- '--mode=\K[^\s]+' || echo "standard")
-# Strip flags from ARGUMENTS to leave the positional args
-POS=$(echo "$ARGUMENTS" | sed -E 's/--[a-z-]+(=[^ ]+)?//g' | xargs)
-SPEC_NAME=$(echo "$POS" | awk '{print $1}')
-GOAL=$(echo "$POS" | awk '{$1=""; print $0}' | sed 's/^"//; s/"$//' | xargs)
-```
-Mode must be `fast`, `standard`, or `enterprise`. Invalid → default to `standard` with a warning.
+**Do not shell-split `$ARGUMENTS`.** It is a user-supplied string that may
+contain quoted substrings with spaces, `$`-signs, or embedded quotes.
+`xargs`, naive `awk`, and `sed`-based quote stripping all mis-parse at
+least one of those cases (e.g. `my-feature "Fix user's login bug"` breaks
+`xargs: unmatched quote`). Parse the string as a model task instead:
+1. **Flags** (order-independent, each is self-delimited):
+   - `--resume` / `--list` — boolean presence
+   - `--mode=<fast|standard|enterprise>` — value after `=`
+   Detect each with a single regex over the full `$ARGUMENTS` string and
+   remove the matched span from your working copy. Flags not in the list
+   above are errors — surface them to the user.
+2. **Positional args** (after flags removed):
+   - First whitespace-separated token → `SPEC_NAME` (kebab-case `[a-z0-9-]+`).
+   - Remainder of the string, trimmed and with one layer of outer `"..."`
+     or `'...'` quotes stripped → `GOAL`. Preserve inner quotes as-is.
+3. If `SPEC_NAME` does not match `^[a-z0-9][a-z0-9-]*$` (per
+   `schemas/spec-state.schema.json`), stop and ask the user to pick a
+   valid kebab-case name.
+Mode must be `fast`, `standard`, or `enterprise`. Invalid → default to
+`standard` with a warning.
+Example inputs and their parse:
+| `$ARGUMENTS`                                    | SPEC_NAME    | GOAL                          | flags         |
+|-------------------------------------------------|--------------|-------------------------------|---------------|
+| `my-feature "Add JWT auth"`                     | `my-feature` | `Add JWT auth`                | —             |
+| `my-feature --mode=fast "Add JWT auth"`         | `my-feature` | `Add JWT auth`                | mode=fast     |
+| `my-feature "Fix user's login bug"`             | `my-feature` | `Fix user's login bug`        | —             |
+| `--list`                                        | —            | —                             | list=true     |
+| `--resume`                                      | —            | —                             | resume=true   |
 ## Branch logic
 ### Branch A: `--list`
-Enumerate every directory under `.flow/specs/`, read each `.state.json` for `phase` and `updated_at`, print a numbered list, then `AskUserQuestion` to pick one. Picking sets `.flow/.active-spec` and exits.
+Enumerate every directory under `.flow/specs/`, read each `.state.json` for `phase` and `updated` (per `schemas/spec-state.schema.json`), print a numbered list, then `AskUserQuestion` to pick one. Picking sets `.flow/.active-spec` and exits.
 ### Branch B: `--resume` (no name)
 Read `.flow/.active-spec`. If it points to a valid spec dir, report its current phase and next suggested command (`/curdx-flow:spec` if incomplete, `/curdx-flow:implement` if tasks ready). If `.active-spec` is empty or stale, fall back to Branch A.
@@ -61,17 +83,25 @@ Create a new spec:
 ```bash
 mkdir -p ".flow/specs/$SPEC_NAME"
+# NOTE: field names MUST match schemas/spec-state.schema.json:
+#   - spec_name (not "spec")
+#   - created (date, not "created_at")
+#   - updated (date-time, not "updated_at")
+#   - phase must be one of the enum values; the initial phase is "research"
+#     (there is no "created" phase — that was schema drift pre-beta.9)
+#   - version is required
 cat > ".flow/specs/$SPEC_NAME/.state.json" <<JSON
 {
-  "spec": "$SPEC_NAME",
+  "version": "1.0",
+  "spec_name": "$SPEC_NAME",
   "goal": "$GOAL",
   "mode": "$FLAG_MODE",
-  "phase": "created",
+  "phase": "research",
   "phase_status": {},
   "strategy": "auto",
   "execute_state": {},
-  "created_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
-  "updated_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+  "created": "$(date -u +%Y-%m-%d)",
+  "updated": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
 }
 JSON
 echo "$SPEC_NAME" > .flow/.active-spec

package/commands/verify.md CHANGED Viewed

@@ -67,6 +67,19 @@ If `--strict`:
 ### Step 4: Produce `verification-report.md`
+**Landing check**: sub-agent responses can be truncated by the model's output-length limit. After dispatching `flow-verifier`, verify the report actually landed:
+```bash
+REPORT=".flow/specs/$SPEC_NAME/verification-report.md"
+if [ ! -f "$REPORT" ] || [ "$(wc -c < "$REPORT" 2>/dev/null | tr -d ' ')" -lt 300 ]; then
+  echo "⚠ Report missing or truncated. Re-dispatching flow-verifier with a terse 'write the report now' prompt."
+  # Re-dispatch pattern:
+  #   "Your only job right now is to Write the verification-report.md using the
+  #    findings you already gathered. Do not re-scan. Do not narrate. Write
+  #    the file and stop."
+fi
+```
 Write to `.flow/specs/$SPEC_NAME/verification-report.md`:
 ```markdown

package/gates/adversarial-review-gate.md CHANGED Viewed

@@ -33,19 +33,19 @@ A reviewer agent's output of "everything looks fine, no issues found" is an **in
 - "Looks good" is usually confirmation bias (the agent only checked the obvious)
 - AI tends to please the user ("great job!") — fight this tendency
-**Forced actions**:
-1. If the agent outputs "no issues", automatically trigger a second round
-2. The second round requires the agent to perform deeper analysis via sequential-thinking
-3. If both rounds yield no findings, the agent must **prove** it checked:
-   - List the dimensions examined (at least 5)
-   - For each dimension, give the specific code/file locations inspected
-   - Provide counterfactual hypotheses of "what it would look like if there were a problem"
+**Forced actions when the agent reports "no issues"**:
+1. Automatically trigger a second round framed as "what would a senior skeptic reject in this PR?"
+2. If both rounds still honestly yield no findings, the agent must emit a **proof-of-checking report**:
+   - Every category it examined (with "N/A" for categories that don't apply)
+   - For each examined category, the specific code/file locations inspected
+   - Counterfactual hypotheses of "what this would look like if there were a problem" and why that signature is absent
+3. Fabricating findings to avoid the proof-of-checking step is a violation of L3 red line #2 (fact-driven). Better to emit "clean verdict with proof" than invent issues.
 ---
-### Rule 2: Findings in at Least 3 Categories
+### Rule 2: Coverage proportional to feature scope
-A complete adversarial review must cover (find issues in at least 3 of these categories):
+A complete adversarial review covers every category that applies to the feature, marks the rest as N/A with reason. Number of findings per category is proportional to real issues, not a quota:
 1. **Architecture layer**: Are decisions sound? Future-extensible? Lock-in risks?
 2. **Implementation layer**: Code quality? Error handling? Performance?
@@ -86,22 +86,22 @@ Not allowed:
 Input: object under review (code range / spec / PR diff)
   ↓
 Round 1 (agent self-analysis):
-  - Use sequential-thinking ≥ 6 rounds
-  - Scan all 6 categories
+  - Use sequential-thinking proportional to the surface being probed
+  - Scan each applicable category; mark N/A ones with reason
   - Output findings list
   ↓
 Decision:
-  - Findings ≥ 3? → output report
-  - Findings < 3? → force Round 2
+  - Any real findings? → output report with findings
+  - Zero findings after honest Round 1? → force Round 2 framed as skeptic
   ↓
 Round 2 (deep analysis):
-  - sequential-thinking for another 6 rounds
+  - sequential-thinking proportional to residual uncertainty
   - Focus on "seemingly no issues" parts (trust but verify)
-  - May introduce external perspectives (read issues from similar projects)
+  - Optionally introduce external perspectives (read issues from similar projects)
   ↓
 Decision:
-  - Still < 3? → agent must explicitly prove it checked
-  - Otherwise → output report
+  - Still zero findings? → agent must emit proof-of-checking report (NOT invent findings)
+  - Findings exist? → output report
   ↓
 Output: review-report.md
 ```
@@ -190,10 +190,10 @@ Fix loop:
 ## Failure Recovery
-If after 2 rounds there are still < 3 findings:
+If after Round 2 the honest verdict is still zero findings, emit a proof-of-checking report (do NOT fabricate to hit a quota — there is no quota):
 ```markdown
-## Adversarial Review — Insufficient Findings
+## Adversarial Review — Proof of Checking (zero findings)
 I have examined the following dimensions across 2 rounds of analysis:

package/gates/devex-gate.md CHANGED Viewed

@@ -195,12 +195,12 @@ Reading these test names = reading API behavior documentation.
 ### Agent Automatic
-When `flow-ux-designer` / `flow-reviewer` applies this gate, use sequential-thinking ≥ 4 rounds to scan the 8 dimensions.
+When `flow-ux-designer` / `flow-reviewer` applies this gate, use sequential-thinking proportional to the complexity of the codebase being scanned.
 ### Human Review
 Attach a DevEx checklist at PR time:
-- [ ] Clear naming (reviewed at least 3 times)
+- [ ] Clear naming (re-read until obvious to a new maintainer)
 - [ ] Critical comments exist
 - [ ] Consistent structure
 - [ ] Actionable error messages
@@ -210,7 +210,7 @@ Attach a DevEx checklist at PR time:
 ## Scoring
-Each dimension 0-10 points:
+Score each **applicable** dimension 0-10 (N/A dimensions are excluded from the total):
 ```
 10 = best practice
@@ -220,8 +220,7 @@ Each dimension 0-10 points:
 0  = serious issue
 ```
-Total 40+ / 80 = pass (warning, non-blocking).
-Total < 40 = blocked, improvement required.
+Emit the per-dimension scores with evidence. The gate itself does not block on a numeric threshold; it surfaces the weaknesses for the user (or the reviewing agent) to decide whether any of them rise to a blocker. A single 0/10 on a material dimension is a blocker regardless of the total.
 ---

package/gates/edge-case-gate.md CHANGED Viewed

@@ -104,7 +104,7 @@ Q4. If no test, what test should be added to cover it?
 Input: object under review (function / component / API) + requirements + tests
   ↓
 For each category (1-7):
-  1. Use sequential-thinking to list at least 3 possible edge scenarios
+  1. Use sequential-thinking to list every plausible edge scenario for this category — stop when you've covered the real risk surface, don't pad to a quota, don't fabricate scenarios that won't occur in production
   2. Check whether each scenario has corresponding coverage in tests
   3. Add uncovered ones to the "gap list"
   ↓