npm - @hanzlaa/rcode - Versions diffs - 3.4.18 → 3.4.20 - Mend

@hanzlaa/rcode 3.4.18 → 3.4.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/cli/index.js +0 -0
package/cli/lib/manifest.cjs +20 -11
package/dist/rcode.js +9 -1
package/package.json +11 -11
package/rihal/agents/rihal-planner.md +37 -0
package/rihal/agents/rihal-sprint-checker.md +8 -2
package/rihal/bin/lib/verify.cjs +7 -2
package/rihal/bin/rihal-tools.cjs +284 -16
package/rihal/workflows/autonomous-smart-discuss.md +2 -1
package/rihal/workflows/execute.md +17 -2
package/rihal/workflows/plan-spawn-planner.md +9 -0
package/rihal/workflows/plan.md +54 -0
package/rihal/workflows/ui-review.md +5 -1

package/cli/index.js CHANGED Viewed

File without changes

package/cli/lib/manifest.cjs CHANGED Viewed

@@ -100,23 +100,32 @@ function diffSet(editor, kind, expected, installed) {
 }
 /**
- * Verify a Claude install: checks .claude/skills/rihal-<agent> and
- * .claude/skills/<action> (action skills keep their bare name) against the
- * package manifest. Returns an array of diff reports.
+ * Verify a Claude install. Agents live at .claude/agents/rihal-<name>.md.
+ * Action skills live at .claude/skills/<name>/ (bare name, no rihal- prefix).
+ *
+ * Note: .claude/skills/ ALSO contains rihal-<name>/ directories that are
+ * auto-generated command stubs by generate-command-skills.cjs (so commands
+ * appear in the IDE sidebar). Those are NOT agents — counting them as agents
+ * makes doctor report drift like "agents 119/23" when nothing is wrong.
+ * That's why the agent count comes from .claude/agents/, not .claude/skills/.
  */
 function verifyClaudeInstall(cwd, packageRoot) {
   const pkg = readPackageManifest(packageRoot);
+  const agentsDir = path.join(cwd, '.claude/agents');
   const skillsDir = path.join(cwd, '.claude/skills');
-  // Agents are installed as rihal-{name} — strip prefix to match pkg.agents keys
-  const installedAgents = readInstalledDirs(skillsDir, 'rihal-');
-  // Do NOT pre-filter against pkg.agents: we want stale entries (installed but
-  // not in current package) to appear in the `extra` list of diffSet so that
-  // `rcode doctor` can flag them as stale and `rcode uninstall` can remove them.
-  // The old intersection filter was hiding orphaned agent dirs after version bumps.
+  // Agents: .claude/agents/rihal-<name>.md (file-based, not dir-based).
+  const installedAgents = new Set();
+  if (fs.existsSync(agentsDir)) {
+    for (const f of fs.readdirSync(agentsDir)) {
+      if (f.startsWith('rihal-') && f.endsWith('.md')) {
+        installedAgents.add(f.replace(/^rihal-/, '').replace(/\.md$/, ''));
+      }
+    }
+  }
-  // Action skills: installed with their bare name (no rihal- prefix).
-  // Exclude known agent dirs (rihal-prefixed) so actions and agents don't bleed.
+  // Actions: .claude/skills/<bare-name>/ — exclude rihal-* dirs (those are
+  // either agent stubs or command stubs, never action skills).
   const allInstalled = readInstalledDirs(skillsDir);
   const actionsInstalled = new Set(
     [...allInstalled].filter((n) => !n.startsWith('rihal-'))

package/dist/rcode.js CHANGED Viewed

@@ -17060,8 +17060,16 @@ var require_manifest = __commonJS({
     }
     function verifyClaudeInstall(cwd, packageRoot) {
       const pkg = readPackageManifest(packageRoot);
+      const agentsDir = path2.join(cwd, ".claude/agents");
       const skillsDir = path2.join(cwd, ".claude/skills");
-      const installedAgents = readInstalledDirs(skillsDir, "rihal-");
+      const installedAgents = /* @__PURE__ */ new Set();
+      if (fs2.existsSync(agentsDir)) {
+        for (const f of fs2.readdirSync(agentsDir)) {
+          if (f.startsWith("rihal-") && f.endsWith(".md")) {
+            installedAgents.add(f.replace(/^rihal-/, "").replace(/\.md$/, ""));
+          }
+        }
+      }
       const allInstalled = readInstalledDirs(skillsDir);
       const actionsInstalled = new Set(
         [...allInstalled].filter((n) => !n.startsWith("rihal-"))

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@hanzlaa/rcode",
-  "version": "3.4.18",
+  "version": "3.4.20",
   "description": "rcode — the memory bank for AI-driven SaaS teams. Persistent project context, distinctive engineering personas, and phase-based workflows. Built by Rihal. Works in Claude Code, Cursor, Gemini, VS Code, and Antigravity.",
   "main": "cli/index.js",
   "bin": {
@@ -8,6 +8,15 @@
     "rihal": "dist/rcode.js",
     "rihal-code": "dist/rcode.js"
   },
+  "scripts": {
+    "dashboard": "node server/dashboard.js",
+    "test": "node --test",
+    "test:ci": "node --test --test-reporter=spec",
+    "postinstall": "node cli/postinstall.js",
+    "build:cli": "node scripts/build.cjs",
+    "build": "node scripts/build.cjs",
+    "dogfood": "bash scripts/dogfood-check.sh"
+  },
   "files": [
     "cli/",
     "rihal/",
@@ -60,14 +69,5 @@
   },
   "publishConfig": {
     "access": "public"
-  },
-  "scripts": {
-    "dashboard": "node server/dashboard.js",
-    "test": "node --test",
-    "test:ci": "node --test --test-reporter=spec",
-    "postinstall": "node cli/postinstall.js",
-    "build:cli": "node scripts/build.cjs",
-    "build": "node scripts/build.cjs",
-    "dogfood": "bash scripts/dogfood-check.sh"
   }
-}
+}

package/rihal/agents/rihal-planner.md CHANGED Viewed

@@ -48,6 +48,11 @@ Core: Parse user decisions from CONTEXT.md, decompose into sprints with stories,
 - `<action>`: Specific instructions, what to avoid & WHY
 - `<verify>`: <automated> command < 60 sec (REQUIRED by Nyquist Rule)
 - `<done>`: Measurable acceptance criteria
+- `<evidence>`: **REQUIRED** (issue #649). Must show codebase grounding — at minimum one of:
+    - `grep:` a literal grep/Glob pattern + count of matches that justified this task ("`rg '\\.alert' apps/web/src` → 13 hits across 9 files")
+    - `lines:` exact `path:line-line` ranges of code being modified
+    - `creates:` the file paths being created from scratch (with one-line justification why no existing file fits)
+  A task without `<evidence>` is theoretical and MUST NOT be written.
 ### Task Types
 | Type | When | Autonomy |
@@ -116,6 +121,37 @@ else: wave = max(waves of dependencies) + 1
 **File ownership:** No overlap in files_modified → can run parallel. Overlap → later depends on earlier.
+## Codebase Discovery (BLOCKER — added after issue #649)
+**Before writing any task body, you MUST query the actual codebase.** Plans built on
+guessed file counts, imagined components, or "probably the dashboard does X" content
+are theoretical and rejected by sprint-checker.
+For every claim a task makes about the codebase, run a real query and capture the
+result in the task's `<evidence>` field:
+| Claim shape | Required query |
+|---|---|
+| "migrate N files away from X" | `rg -l '<X>' <scope>` — record exact file count + paths |
+| "modify component Y" | `Read` the file; record `path:line-line` ranges |
+| "replace pattern P" | `rg '<P>'` — record hit count + a representative match |
+| "add Z where there's no Z today" | `rg '<Z>'` returning 0 hits is the evidence |
+| "create new file F" | confirm F does NOT exist + state why no existing file fits |
+**Hard stops:**
+- Did NOT grep for a symbol the task says it modifies? → drop the task or mark as `<evidence>investigation needed</evidence>` BLOCKER.
+- File count cited but never measured? → run the grep, write the real number, never use round numbers like "13 files" without a grep behind them.
+- Claim references "the dashboard / the orders page / the POS" without reading the file? → Read the file first, cite line ranges.
+**Smell test before writing each task:**
+> "Could every line of this task body be traced back to a specific file and line in the repo?"
+>
+> If not, the task is theoretical. Drop it.
+The orchestrator (`/rihal-plan`) MUST pass this checklist forward to sprint-checker
+which fails the plan if any task lacks `<evidence>`.
 ## File-existence verification (BLOCKER — added in v3.1.0 after #441)
 Before writing each entry into `files_modified`, you MUST verify the file actually exists in the project. Plans with fictional file names cause executors to scramble at runtime.
@@ -191,6 +227,7 @@ Create `.planning/phases/XX-name/{phase}-{plan}-SUMMARY.md`
 4. **Over-splitting:** Ticket-sized work → ONE plan, not three
 5. **No dependency graph:** Tasks look independent but aren't
 6. **Context anxiety:** Plans bloat when context > 50%. Keep to 2-3 tasks.
+7. **Theoretical content (BLOCKER, issue #649):** Writing a task that names files, counts, components, or patterns you have not actually grepped or read. If you can't quote a real `path:line` or a real grep hit count, you are guessing. Drop the task or downgrade it to an investigation BLOCKER.
 ## Constraints

package/rihal/agents/rihal-sprint-checker.md CHANGED Viewed

@@ -96,6 +96,7 @@ Same methodology (goal-backward), different timing, different subject matter.
 9. Cross-Sprint Data Contracts
 10. CLAUDE.md Compliance
 11. File References Verification
+12. Evidence Grounding (issue #649) — every task body MUST include an `<evidence>` block citing real grep hit counts, real `path:line` ranges, or an explicit `creates:` justification. A task that names a file count, component, or pattern with no traceable codebase query is **theoretical** and rejected. Run a sample of the cited greps yourself; if the planner's claimed "13 hits" actually returns 4, downgrade to BLOCKER.
 Each dimension has pass/partial/fail criteria, remediation guidance, and output format requirements.
@@ -105,8 +106,13 @@ Each dimension has pass/partial/fail criteria, remediation guidance, and output
 1. **Load context** — Read phase SCOPE.md, CONTEXT.md (if present), RESEARCH.md, and all SPRINT.md files.
 2. **Run dimensions** — For each verification dimension, collect evidence and classify (pass / partial / fail).
-3. **Synthesize** — Produce CHECK.md with overall verdict, per-dimension scores, remediation asks.
-4. **Return** — Block execution if critical dimensions fail; proceed with cautions if only partials.
+3. **Programmatic evidence check (issue #649)** — call:
+   ```
+   node .rihal/bin/rihal-tools.cjs plan validate-evidence <phase> --spot-check
+   ```
+   Exit code 0 = pass, 1 = at least one task violation. Inline the JSON `violations[]` into dimension 12 of CHECK.md verbatim — these are authoritative and must not be paraphrased away.
+4. **Synthesize** — Produce CHECK.md with overall verdict, per-dimension scores, remediation asks.
+5. **Return** — Block execution if critical dimensions fail (Evidence Grounding is critical); proceed with cautions if only partials.
 ## Mandatory output markers (per #440 / #445 fix)

package/rihal/bin/lib/verify.cjs CHANGED Viewed

@@ -22,8 +22,13 @@ function commitsForPhase(projectRoot, phaseNum) {
   // Look for commit subjects starting with NN- or NNN- matching the phase.
   const log = git(`log --pretty=format:%H%x09%s`, projectRoot);
   if (!log) return [];
-  const prefix = String(phaseNum).padStart(2, '0');
-  const re = new RegExp(`(^|[^0-9])${prefix}-\\d+`);
+  // Issue #652 — accept both unpadded ('8-1') and legacy padded ('08-01')
+  // commit prefixes so verify still works on projects that were created
+  // before the no-leading-zeros rule.
+  const num = String(phaseNum);
+  const padded = num.padStart(2, '0');
+  const alt = num === padded ? num : `(?:${num}|${padded})`;
+  const re = new RegExp(`(^|[^0-9])${alt}-\\d+`);
   const commits = [];
   for (const line of log.split('\n')) {
     const [hash, ...rest] = line.split('\t');

package/rihal/bin/rihal-tools.cjs CHANGED Viewed

@@ -455,7 +455,11 @@ function cmdInit(workflowName, rawArgs) {
       out.phase_found = roadmapPhase !== null;
       out.phase_number = String(phaseNum);
-      out.padded_phase = String(phaseNum).padStart(2, '0');
+      // Issue #652 — no leading zeros in planning artifacts. The field name
+      // 'padded_phase' is kept for workflow backward compat but the value is
+      // now the canonical (unpadded) phase number. The resolver above still
+      // accepts legacy '06-name' directories for older projects.
+      out.padded_phase = String(phaseNum);
       out.phase_name = roadmapPhase ? roadmapPhase.name : null;
       out.phase_slug = phaseDirEntry ? phaseDirEntry.replace(/^\d+-/, '') : null;
       out.phase_dir = phaseDirEntry ? path.join(PLANNING_DIR, 'phases', phaseDirEntry) : null;
@@ -1352,18 +1356,25 @@ function cmdState(subArgs) {
   // --- add-decision ---
   if (sub === 'add-decision') {
-    const summary = subArgs.slice(1).join(' ');
+    // Issue #658 — caller can scope explicitly with --phase <N>; otherwise we
+    // infer from state.current_phase (which can mis-fire mid-orchestration).
+    const flagStart = (() => {
+      for (let i = 1; i < subArgs.length; i++) if (subArgs[i].startsWith('--')) return i;
+      return subArgs.length;
+    })();
+    const summary = subArgs.slice(1, flagStart).join(' ');
+    const flags = parseFlags(flagStart);
     if (!summary) throw new Error('add-decision requires a summary argument');
     const state = readState() || defaultState();
     if (!state.decisions) state.decisions = [];
     const record = {
       summary,
-      phase: state.current_phase,
-      plan: state.current_plan,
+      phase: flags.phase ? String(flags.phase) : state.current_phase,
+      plan: flags.plan ? String(flags.plan) : state.current_plan,
       date: new Date().toISOString(),
     };
     state.decisions.push(record);
-    const result = writeState(state);
+    writeState(state);
     // Mirror to cross-project store (best-effort, never fails the local write).
     try {
       appendGlobalDecision({
@@ -1375,7 +1386,14 @@ function cmdState(subArgs) {
         summary: record.summary,
       });
     } catch (_) { /* silent — local commit must not break on home-dir issues */ }
-    return result;
+    // Issue #658 — return the appended record so callers can confirm the
+    // phase scope and ID without re-reading state.json.
+    return {
+      ok: true,
+      decision: record,
+      decision_index: state.decisions.length - 1,
+      total_decisions: state.decisions.length,
+    };
   }
   // --- decisions-global: query ~/.rihal/decisions.jsonl across all projects ---
@@ -1412,14 +1430,62 @@ function cmdState(subArgs) {
   // --- resolve-blocker ---
   if (sub === 'resolve-blocker') {
-    const index = parseInt(subArgs[1], 10);
     const state = readState();
     if (!state) throw new Error('No state.json found');
-    if (!state.blockers || index < 0 || index >= state.blockers.length) {
-      throw new Error(`Invalid blocker index: ${subArgs[1]}. Valid range: 0-${(state.blockers || []).length - 1}`);
+    if (!state.blockers || state.blockers.length === 0) {
+      throw new Error('No blockers to resolve');
+    }
+    // Issue #656 — support --all and --phase <N> for batch resolution.
+    const flagStart = subArgs[1] && /^--/.test(subArgs[1]) ? 1 : 2;
+    const flags = parseFlags(flagStart);
+    const indices = [];
+    if (flags.all === true || flags.all === 'true') {
+      for (let i = 0; i < state.blockers.length; i++) {
+        if (!state.blockers[i].resolved) indices.push(i);
+      }
+    } else if (flags.phase) {
+      const ph = String(flags.phase).replace(/^[Pp]hase\s*/, '');
+      for (let i = 0; i < state.blockers.length; i++) {
+        const b = state.blockers[i];
+        if (b.resolved) continue;
+        const matchesPhase = String(b.phase || '') === ph ||
+          (b.description || '').includes(`Phase ${ph}`) ||
+          (b.description || '').includes(`[Phase ${ph}]`);
+        if (matchesPhase) indices.push(i);
+      }
+    } else {
+      const index = parseInt(subArgs[1], 10);
+      if (Number.isNaN(index) || index < 0 || index >= state.blockers.length) {
+        throw new Error(`Invalid blocker index: ${subArgs[1]}. Valid range: 0-${state.blockers.length - 1}, or use --all / --phase <N>`);
+      }
+      indices.push(index);
+    }
+    if (indices.length === 0) {
+      throw new Error('No matching unresolved blockers found');
+    }
+    // Issue #654 — tickets-first. Resolution must reference an issue, a
+    // commit SHA, or be explicitly marked as internal with --noref. Silent
+    // resolution drops the audit trail.
+    const hasIssue = flags.issue && /^#?\d+$/.test(String(flags.issue));
+    const hasCommit = flags.commit && /^[0-9a-f]{7,40}$/i.test(String(flags.commit));
+    const noref = flags.noref === true || flags.noref === 'true';
+    if (!hasIssue && !hasCommit && !noref) {
+      throw new Error(
+        `resolve-blocker [${index}] requires an audit reference. Pass one of:\n` +
+        `  --issue <gh-issue-number>     e.g. --issue 654\n` +
+        `  --commit <sha>                7-40 hex chars\n` +
+        `  --noref                       acknowledge no external reference (audit trail will say "internal")`
+      );
     }
-    state.blockers[index].resolved = new Date().toISOString();
-    return writeState(state);
+    const now = new Date().toISOString();
+    for (const idx of indices) {
+      state.blockers[idx].resolved = now;
+      if (hasIssue) state.blockers[idx].resolved_issue = String(flags.issue).replace(/^#/, '');
+      if (hasCommit) state.blockers[idx].resolved_commit = String(flags.commit).slice(0, 40);
+      if (noref && !hasIssue && !hasCommit) state.blockers[idx].resolved_ref = 'internal';
+    }
+    const result = writeState(state);
+    return { ...result, resolved_count: indices.length, resolved_indices: indices };
   }
   // --- record-session ---
@@ -3556,6 +3622,160 @@ function cmdInitPlan(rawArgs) {
   };
 }
+/**
+ * plan validate-evidence — issue #649 enforcement.
+ *
+ * Scans SPRINT.md files under a phase (or a specific file) and checks that
+ * every <task>...</task> block contains an <evidence> sub-block with a real
+ * codebase grounding (grep:, lines:, or creates: marker). Optionally
+ * spot-checks the cited grep patterns by re-running them and comparing hit
+ * counts against the planner's claim.
+ *
+ * Sprint-checker calls this; CI can call it; users can run it manually.
+ *
+ * Usage:
+ *   plan validate-evidence <phase-number>
+ *   plan validate-evidence --file <path>
+ *   plan validate-evidence <phase-number> --spot-check
+ *
+ * Exit code 0 = pass, 1 = at least one task failed evidence check.
+ */
+function cmdPlanValidateEvidence(rawArgs) {
+  const args = (rawArgs || []).slice();
+  const flags = {};
+  const positional = [];
+  for (let i = 0; i < args.length; i++) {
+    if (args[i].startsWith('--')) {
+      const key = args[i].slice(2);
+      const next = args[i + 1];
+      if (next === undefined || next.startsWith('--')) flags[key] = true;
+      else { flags[key] = next; i++; }
+    } else positional.push(args[i]);
+  }
+  const targets = [];
+  if (flags.file) {
+    if (!fs.existsSync(flags.file)) throw new Error(`File not found: ${flags.file}`);
+    targets.push(flags.file);
+  } else {
+    const phaseArg = positional[0];
+    if (!phaseArg) throw new Error('Usage: plan validate-evidence <phase-number> [--spot-check] | --file <path>');
+    const phasesDir = path.join(PLANNING_DIR, 'phases');
+    if (!fs.existsSync(phasesDir)) throw new Error(`No phases directory at ${phasesDir}`);
+    const norm = String(phaseArg).replace(/^0+/, '') || '0';
+    const padded = norm.padStart(2, '0');
+    let phaseDir = null;
+    for (const entry of fs.readdirSync(phasesDir)) {
+      if (entry.startsWith(`${norm}-`) || entry.startsWith(`${padded}-`) || entry === norm || entry === padded) {
+        phaseDir = path.join(phasesDir, entry);
+        break;
+      }
+    }
+    if (!phaseDir) throw new Error(`Phase ${phaseArg} directory not found`);
+    for (const f of fs.readdirSync(phaseDir)) {
+      if (/-SPRINT\.md$/.test(f) || /-PLAN\.md$/.test(f)) targets.push(path.join(phaseDir, f));
+    }
+  }
+  if (targets.length === 0) {
+    return { ok: true, files_scanned: 0, tasks_total: 0, violations: [], message: 'No SPRINT.md / PLAN.md files found' };
+  }
+  const violations = [];
+  let tasksTotal = 0;
+  let tasksPassed = 0;
+  let spotChecks = 0;
+  let spotCheckMismatches = 0;
+  for (const file of targets) {
+    const text = fs.readFileSync(file, 'utf8');
+    // Match <task ...>...</task> blocks (planner format) AND ### Story headings (sprint format).
+    const taskBlocks = [];
+    const taskRe = /<task[^>]*?id\s*=\s*["']([^"']+)["'][^>]*?>([\s\S]*?)<\/task>/gi;
+    let m;
+    while ((m = taskRe.exec(text)) !== null) {
+      taskBlocks.push({ id: m[1], body: m[2] });
+    }
+    // Story-format fallback: ### Story 8.1.3 — name { body until next ### or end }
+    if (taskBlocks.length === 0) {
+      const storyRe = /^###\s+Story\s+(\S+)[^\n]*\n([\s\S]*?)(?=^###\s+Story\s+|\Z)/gm;
+      while ((m = storyRe.exec(text)) !== null) {
+        taskBlocks.push({ id: m[1], body: m[2] });
+      }
+    }
+    for (const t of taskBlocks) {
+      tasksTotal++;
+      const evMatch = t.body.match(/<evidence>([\s\S]*?)<\/evidence>/i)
+        || t.body.match(/(?:^|\n)\s*\*\*Evidence:?\*\*\s*([\s\S]*?)(?=\n\s*\*\*|\n\n|$)/i);
+      if (!evMatch || !evMatch[1].trim()) {
+        violations.push({
+          file: path.relative(PROJECT_ROOT, file),
+          task_id: t.id,
+          severity: 'BLOCKER',
+          kind: 'missing-evidence',
+          message: 'Task has no <evidence> block. Per issue #649, every task must cite grep hits, line ranges, or a creates: justification.',
+        });
+        continue;
+      }
+      const evidence = evMatch[1].trim();
+      // Must contain at least one of: grep:, lines:, creates:
+      const hasGrep = /(^|\n)\s*grep:/i.test(evidence) || /\brg\b/.test(evidence);
+      const hasLines = /(^|\n)\s*lines:/i.test(evidence) || /\b\S+\.\w+:\d+(-\d+)?/.test(evidence);
+      const hasCreates = /(^|\n)\s*creates:/i.test(evidence);
+      if (!hasGrep && !hasLines && !hasCreates) {
+        violations.push({
+          file: path.relative(PROJECT_ROOT, file),
+          task_id: t.id,
+          severity: 'BLOCKER',
+          kind: 'evidence-shape',
+          message: 'Evidence block exists but contains no grep:, lines:, or creates: marker. Cannot be traced to real code.',
+        });
+        continue;
+      }
+      tasksPassed++;
+      // Optional spot-check: re-run the first grep pattern cited and compare hit counts.
+      if (flags['spot-check'] && hasGrep) {
+        const claim = evidence.match(/grep:\s*(?:`|')?([^\n`']+?)(?:`|')?\s*(?:→|->|=>|—|-)\s*(\d+)/i)
+          || evidence.match(/`(rg[^`]+)`[^→]*→\s*(\d+)/i);
+        if (claim) {
+          const pattern = claim[1].trim();
+          const claimedCount = parseInt(claim[2], 10);
+          try {
+            // Use rg if available, else fallback to grep -r.
+            const cmd = `rg --count-matches ${JSON.stringify(pattern.replace(/^rg\s+/, ''))} 2>/dev/null | awk -F: '{s+=$2} END {print s+0}'`;
+            const out = require('child_process').execSync(cmd, { cwd: PROJECT_ROOT, encoding: 'utf8', timeout: 10000 }).trim();
+            const actualCount = parseInt(out, 10) || 0;
+            spotChecks++;
+            const drift = Math.abs(actualCount - claimedCount) / Math.max(claimedCount, 1);
+            if (drift > 0.1) {
+              spotCheckMismatches++;
+              violations.push({
+                file: path.relative(PROJECT_ROOT, file),
+                task_id: t.id,
+                severity: 'BLOCKER',
+                kind: 'spot-check-mismatch',
+                message: `Evidence claims grep hits=${claimedCount} for pattern '${pattern}', actual=${actualCount} (drift ${(drift*100).toFixed(0)}%)`,
+              });
+            }
+          } catch (_) { /* spot-check is best-effort; rg/grep not available shouldn't fail validation */ }
+        }
+      }
+    }
+  }
+  return {
+    ok: violations.length === 0,
+    files_scanned: targets.length,
+    tasks_total: tasksTotal,
+    tasks_passed: tasksPassed,
+    spot_checks_run: spotChecks,
+    spot_check_mismatches: spotCheckMismatches,
+    violations,
+  };
+}
 /** plan list — glob .planning/plans/ for plan files. */
 function cmdPlanList() {
   const plansDir = path.join(PLANNING_DIR, 'plans');
@@ -4825,8 +5045,19 @@ function cmdProgress(args) {
     const routes = [];
     const statePhases = (state && (state.state?.phases || state.phases)) || [];
-    // Route A — phases with pending plans (ready to execute)
+    // Route A — phases with pending plans (ready to execute).
+    // Issue #653 — never recommend executing a phase whose state.json status
+    // is already complete/done/verified, even if its on-disk plan_count >
+    // summary_count. Missing second summary file is not the canonical
+    // completion signal; state.json is. Run /rihal-audit phase <N> for
+    // disk-vs-state drift, but stop steering users into re-executing
+    // finished work.
+    const isPhaseDone = (p) => {
+      const s = String((p && p.status) || '').toLowerCase();
+      return s === 'complete' || s === 'completed' || s === 'done' || s === 'verified' || Boolean(p && p.completed);
+    };
     const pendingExec = statePhases.filter(p => {
+      if (isPhaseDone(p)) return false;
       const disk = diskByNum[phaseKey(p)];
       return disk && disk.plan_count > disk.summary_count;
     }).slice(0, 3);
@@ -5250,7 +5481,13 @@ async function main() {
         break;
       case 'plan':
         if (args[0] === 'list') { result = cmdPlanList(); }
-        else { console.error('Unknown plan subcommand. Valid: list'); process.exit(1); }
+        else if (args[0] === 'validate-evidence') {
+          result = cmdPlanValidateEvidence(args.slice(1));
+          // Issue #649 — non-zero exit on violations so CI / sprint-checker can gate.
+          console.log(JSON.stringify(result, null, 2));
+          process.exit(result.ok ? 0 : 1);
+        }
+        else { console.error('Unknown plan subcommand. Valid: list, validate-evidence'); process.exit(1); }
         break;
       case 'phase-plan-index':
         result = cmdPhasePlanIndex(args.join(' '));
@@ -5440,7 +5677,8 @@ async function main() {
         console.log('  classify-tech --keywords "<keywords>"        → classify tech stack from keywords (frontend/backend/mobile/styling)');
         console.log('  context refresh                              → refresh .rihal/context/ cache from .rihal/sources.yaml');
         console.log('  module <subcommand> [args]                   → module system helpers');
-        console.log('  plan <subcommand> [args]                     → phase/plan operations');
+        console.log('  plan <list|validate-evidence>                → phase/plan operations');
+        console.log('  plan validate-evidence <N> [--spot-check]    → enforce <evidence> blocks in SPRINT.md (#649); exit 1 on violation');
         console.log('  phase-plan-index <N>                         → JSON inventory of plans under phase N (waves, summary status, task counts)');
         console.log('  phases list [--type X] [--pick path]         → directory inventory of .planning/phases (--type: summaries|sprints|directories|all; --pick: e.g. directories[-1])');
         console.log('  find-phase <N> [--raw]                       → resolve phase number to dir/slug + decimal children');
@@ -5472,7 +5710,7 @@ async function main() {
         console.log('  state add-decision "<summary>"               → append to decisions[] + ~/.rihal/decisions.jsonl');
         console.log('  state decisions-global [--limit N] [--project <name>] [--since <ISO>]  → query ~/.rihal/decisions.jsonl across all projects');
         console.log('  state add-blocker "<description>"            → append to blockers[]');
-        console.log('  state resolve-blocker <index>                → mark blocker as resolved');
+        console.log('  state resolve-blocker <index>|--all|--phase <N>  --issue <N>|--commit <sha>|--noref  → mark blocker(s) resolved (#654, #656)');
         console.log('  state record-session                         → update last_session timestamp');
         console.log('  state record-council --slug <s> --panel <csv> --artifact <path>');
         console.log('  state record-chain --slug <s> --agents <csv> --artifacts <path>');
@@ -5505,10 +5743,40 @@ async function main() {
         return;
       default: {
         const stateSubs = ['read','get','init','set-phase','advance-plan','record-execution','record-council','record-chain','add-decision','decisions-global','add-blocker','resolve-blocker','record-session','set-ids-in-state','migrate-ids','migrate-schema','next-phase-id','next-plan-id','next-task-id','resolve-id','workstream-create','workstream-switch','workstream-list','workstream-status','workstream-complete','workstream-validate','insert-phase','planned-phase','begin-phase','complete-phase','reset'];
+        // Issue #656 — top-level aliases for intuitive guesses.
+        const intuitionAliases = {
+          blocker: 'state resolve-blocker',
+          blockers: 'state resolve-blocker',
+          decision: 'state add-decision',
+          decisions: 'state decisions-global',
+          sync: 'state sync',
+        };
         if (stateSubs.includes(subcommand)) {
           console.error(`Did you mean: state ${subcommand}? Run 'rihal-tools.cjs help' for full usage.`);
+        } else if (intuitionAliases[subcommand]) {
+          console.error(`'${subcommand}' is not a top-level command. Did you mean: ${intuitionAliases[subcommand]}?`);
         } else {
-          console.error(`Unknown subcommand: ${subcommand}. Run 'rihal-tools.cjs help' for full usage.`);
+          // Fuzzy hint — suggest top 2 closest state subcommands by simple substring/edit-distance.
+          const lev = (a, b) => {
+            const m = Array.from({length: a.length+1}, (_,i) => Array(b.length+1).fill(0));
+            for (let i=0; i<=a.length; i++) m[i][0]=i;
+            for (let j=0; j<=b.length; j++) m[0][j]=j;
+            for (let i=1; i<=a.length; i++) for (let j=1; j<=b.length; j++) {
+              m[i][j] = a[i-1]===b[j-1] ? m[i-1][j-1] : 1 + Math.min(m[i-1][j], m[i][j-1], m[i-1][j-1]);
+            }
+            return m[a.length][b.length];
+          };
+          const candidates = stateSubs.concat(Object.keys(intuitionAliases));
+          const scored = candidates
+            .map(c => ({ c, d: c.includes(subcommand) || subcommand.includes(c) ? 0.5 : lev(c, subcommand) }))
+            .sort((a, b) => a.d - b.d)
+            .slice(0, 2)
+            .filter(x => x.d <= Math.max(2, subcommand.length / 2));
+          if (scored.length > 0) {
+            console.error(`Unknown subcommand: ${subcommand}. Closest matches: ${scored.map(s => s.c).join(', ')}. Run 'rihal-tools.cjs help' for full usage.`);
+          } else {
+            console.error(`Unknown subcommand: ${subcommand}. Run 'rihal-tools.cjs help' for full usage.`);
+          }
         }
         process.exit(1);
       }

package/rihal/workflows/autonomous-smart-discuss.md CHANGED Viewed

@@ -12,7 +12,8 @@ Run smart discuss for the current phase. Proposes grey area answers in batch tab
 ```bash
 PHASE_NUM="${PHASE_NUM}"  # local alias; other workflows use PHASE_NUMBER from init JSON
-PADDED_PHASE=$(printf "%02d" "${PHASE_NUM%.*}")
+# Issue #652 — no leading zeros. Variable name kept for backward compat.
+PADDED_PHASE="${PHASE_NUM%.*}"
 PHASE_DIR=".planning/phases/${PADDED_PHASE}-${PHASE_SLUG}"
 ```

package/rihal/workflows/execute.md CHANGED Viewed

@@ -12,8 +12,23 @@ route back to the user.
    plan count, wave count, autonomy flag per plan, files_modified overlaps
 3. **Anti-patterns**: check for `.continue-here.md` (paused state), STATE.md
    error flag, existing VERIFICATION.md with FAIL items without overrides
-4. **Branch check**: confirm current git branch matches milestone's expected
-   branch (from config or roadmap)
+4. **Branch check** (issue #659): confirm current git branch is appropriate
+   for the work. Two checks, both blocking:
+   a. **Not on main/master without consent**: if `git branch --show-current`
+      returns `main` or `master`, refuse to execute. Suggest:
+      `git switch -c <phase>-<plan>-<slug>` (e.g. `git switch -c 8-1-aria`).
+      User can override only by passing `--allow-main` to /rihal-execute and
+      explicitly typing the override on this turn.
+   b. **Working tree clean enough**: if `git status --porcelain` shows
+      modified files unrelated to this phase's `files_modified` frontmatter,
+      surface them and ask whether to commit, stash, or proceed. Real-session
+      repro: P0 CSS fixes landed loose in a dirty tree with no commit
+      boundary.
+   The branch name should align with the phase/plan IDs from state — check
+   `workflow.branch_pattern` config (default `<phase>-<plan>-<slug>`).
 5. **Worktree config**: read `workflow.use_worktrees` — if true + parallelization
    is true + no file overlaps, plans in a wave run parallel via worktrees
 </pre_flight>

package/rihal/workflows/plan-spawn-planner.md CHANGED Viewed

@@ -2,6 +2,15 @@
 Sub-step of plan.md — Step 8 Spawn rihal-planner Agent. Spawns rihal-planner with full context to generate SPRINT.md plans. Includes deep-work rules and downstream consumer spec.
 </purpose>
+<filename_convention>
+Issue #657 — every SPRINT.md, including the first plan in a phase, uses the
+sequence-numbered form `{phase}-{plan}-SPRINT.md` (no leading zeros per #652).
+Examples: `8-1-SPRINT.md`, `8-2-SPRINT.md`. Do NOT emit a bare `{phase}-SPRINT.md`
+or `{phase}-PLAN.md` for the first plan — that creates an inconsistent series
+when a second plan is added later. The plan-number computation in plan.md uses
+`NEXT_PLAN_NUMBER=$((EXISTING_PLAN_COUNT + 1))` and starts at 1 for new phases.
+</filename_convention>
 ## 8. Spawn rihal-planner Agent
 Display banner:

package/rihal/workflows/plan.md CHANGED Viewed

@@ -327,6 +327,30 @@ Always offer exactly three numbered options:
 Wait for the user's choice before proceeding. Do not auto-select.
+**If user picks option 1 (Add more plans) — issue #650:**
+This is **NOT** a license to hand-write a new SPRINT.md inline. Continue down the
+normal pipeline exactly as if no plans existed yet:
+1. Proceed to Step 7 (context-paths) and Step 7.5 (Nyquist verification) as normal.
+2. Spawn `rihal-planner` via `@rihal/workflows/plan-spawn-planner.md` (Step 8). The
+   planner subagent is mandatory — the orchestrator must NOT write SPRINT.md
+   directly via the `Write` tool. Pass the existing plan list to the planner so
+   it picks the next plan number and avoids re-covering shipped tasks.
+3. After the planner returns, run sprint-checker (Step 10) the same as a
+   first-time plan. The "PLANNED ✓" banner is gated on a passing CHECK.md.
+A run that emits a SPRINT.md without a corresponding planner Task() invocation
+in the same turn is a malfunction — see issue #650. Stop and report instead of
+shipping a hand-rolled plan.
+**If user picks option 3 (Replan from scratch):**
+Same as option 1, but pass the existing plans to the planner with a `replace:
+true` directive. Existing PLAN.md files are renamed to `*-SUPERSEDED.md` (do
+not delete) before the planner writes the new ones. Subagent invocation is
+still mandatory.
 **If user picks option 2 (View existing plans):**
 Display a sprint summary table (sprint id → one-line goal).
@@ -822,6 +846,36 @@ Route to `<offer_next>` (existing behavior).
 </process>
+<banner_emission_gate>
+Issue #655 — the success banner is gated on real verification, not vibes.
+Before emitting `PLANNED ✓`, confirm one of these is true:
+1. A passing CHECK.md exists at `${PHASE_DIR}/*-CHECK.md` from rihal-sprint-checker
+   in this run AND its overall verdict is `pass` (or `pass-with-cautions`).
+2. The user has explicitly said "skip verification" / "override" this run AND that
+   override is recorded in the offer-next output's `Verification:` field as
+   `Passed with override`.
+3. `plan_checker_enabled` is false in config — recorded as `Verification: Skipped
+   (config-disabled)`.
+If none of the three holds (sprint-checker was never spawned, or it returned a
+fail verdict, or its CHECK.md is missing) — DO NOT emit `PLANNED ✓`. Emit:
+```
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ Rihal ► PHASE {X} PLANNED ⚠ (gates skipped)
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+Plans were written but rihal-sprint-checker did not return a passing
+CHECK.md. Run /rihal-plan {X} --reviews to gate the plans before
+executing, or pass --skip-verify if you accept the risk.
+```
+The same rule applies to `VERIFIED ✓` (after /rihal-verify-phase) and
+`DONE ✓` (after /rihal-execute) — the success-tick is reserved for
+gate-passed states.
+</banner_emission_gate>
 <offer_next>
 Output this markdown directly (not as a code block):

package/rihal/workflows/ui-review.md CHANGED Viewed

@@ -86,7 +86,11 @@ Task tool call:
     **Audit 6 pillars (pass/fail + findings):**
-    1. Color Consistency — All text/backgrounds match color tokens, contrast ratios >= WCAG AA
+    1. Color Consistency — All text/backgrounds match color tokens, contrast ratios >= WCAG AA.
+       **Hex literal scan (issue #660):** run `rg -n '#[0-9A-Fa-f]{3,6}\b' <css/tailwind paths>` —
+       any hex outside the `:root { ... }` token block in globals.css (or equivalent token
+       definition file) is a regression flag. If a token is missing for the stated semantic
+       role, the fix is to ADD the token, never to inline the hex. Cite the exact file:line.
     2. Typography Compliance — Font sizes, weights, line heights match typography scales
     3. Component Inventory — All specified components present, all variants implemented
     4. Accessibility — aria-labels, roles, keyboard navigation, focus rings per WCAG 2.1 AA