@hanzlaa/rcode 3.4.18 → 3.4.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/index.js CHANGED
File without changes
@@ -100,23 +100,32 @@ function diffSet(editor, kind, expected, installed) {
100
100
  }
101
101
 
102
102
  /**
103
- * Verify a Claude install: checks .claude/skills/rihal-<agent> and
104
- * .claude/skills/<action> (action skills keep their bare name) against the
105
- * package manifest. Returns an array of diff reports.
103
+ * Verify a Claude install. Agents live at .claude/agents/rihal-<name>.md.
104
+ * Action skills live at .claude/skills/<name>/ (bare name, no rihal- prefix).
105
+ *
106
+ * Note: .claude/skills/ ALSO contains rihal-<name>/ directories that are
107
+ * auto-generated command stubs by generate-command-skills.cjs (so commands
108
+ * appear in the IDE sidebar). Those are NOT agents — counting them as agents
109
+ * makes doctor report drift like "agents 119/23" when nothing is wrong.
110
+ * That's why the agent count comes from .claude/agents/, not .claude/skills/.
106
111
  */
107
112
  function verifyClaudeInstall(cwd, packageRoot) {
108
113
  const pkg = readPackageManifest(packageRoot);
114
+ const agentsDir = path.join(cwd, '.claude/agents');
109
115
  const skillsDir = path.join(cwd, '.claude/skills');
110
116
 
111
- // Agents are installed as rihal-{name} strip prefix to match pkg.agents keys
112
- const installedAgents = readInstalledDirs(skillsDir, 'rihal-');
113
- // Do NOT pre-filter against pkg.agents: we want stale entries (installed but
114
- // not in current package) to appear in the `extra` list of diffSet so that
115
- // `rcode doctor` can flag them as stale and `rcode uninstall` can remove them.
116
- // The old intersection filter was hiding orphaned agent dirs after version bumps.
117
+ // Agents: .claude/agents/rihal-<name>.md (file-based, not dir-based).
118
+ const installedAgents = new Set();
119
+ if (fs.existsSync(agentsDir)) {
120
+ for (const f of fs.readdirSync(agentsDir)) {
121
+ if (f.startsWith('rihal-') && f.endsWith('.md')) {
122
+ installedAgents.add(f.replace(/^rihal-/, '').replace(/\.md$/, ''));
123
+ }
124
+ }
125
+ }
117
126
 
118
- // Action skills: installed with their bare name (no rihal- prefix).
119
- // Exclude known agent dirs (rihal-prefixed) so actions and agents don't bleed.
127
+ // Actions: .claude/skills/<bare-name>/ exclude rihal-* dirs (those are
128
+ // either agent stubs or command stubs, never action skills).
120
129
  const allInstalled = readInstalledDirs(skillsDir);
121
130
  const actionsInstalled = new Set(
122
131
  [...allInstalled].filter((n) => !n.startsWith('rihal-'))
package/dist/rcode.js CHANGED
@@ -17060,8 +17060,16 @@ var require_manifest = __commonJS({
17060
17060
  }
17061
17061
  function verifyClaudeInstall(cwd, packageRoot) {
17062
17062
  const pkg = readPackageManifest(packageRoot);
17063
+ const agentsDir = path2.join(cwd, ".claude/agents");
17063
17064
  const skillsDir = path2.join(cwd, ".claude/skills");
17064
- const installedAgents = readInstalledDirs(skillsDir, "rihal-");
17065
+ const installedAgents = /* @__PURE__ */ new Set();
17066
+ if (fs2.existsSync(agentsDir)) {
17067
+ for (const f of fs2.readdirSync(agentsDir)) {
17068
+ if (f.startsWith("rihal-") && f.endsWith(".md")) {
17069
+ installedAgents.add(f.replace(/^rihal-/, "").replace(/\.md$/, ""));
17070
+ }
17071
+ }
17072
+ }
17065
17073
  const allInstalled = readInstalledDirs(skillsDir);
17066
17074
  const actionsInstalled = new Set(
17067
17075
  [...allInstalled].filter((n) => !n.startsWith("rihal-"))
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hanzlaa/rcode",
3
- "version": "3.4.18",
3
+ "version": "3.4.20",
4
4
  "description": "rcode — the memory bank for AI-driven SaaS teams. Persistent project context, distinctive engineering personas, and phase-based workflows. Built by Rihal. Works in Claude Code, Cursor, Gemini, VS Code, and Antigravity.",
5
5
  "main": "cli/index.js",
6
6
  "bin": {
@@ -8,6 +8,15 @@
8
8
  "rihal": "dist/rcode.js",
9
9
  "rihal-code": "dist/rcode.js"
10
10
  },
11
+ "scripts": {
12
+ "dashboard": "node server/dashboard.js",
13
+ "test": "node --test",
14
+ "test:ci": "node --test --test-reporter=spec",
15
+ "postinstall": "node cli/postinstall.js",
16
+ "build:cli": "node scripts/build.cjs",
17
+ "build": "node scripts/build.cjs",
18
+ "dogfood": "bash scripts/dogfood-check.sh"
19
+ },
11
20
  "files": [
12
21
  "cli/",
13
22
  "rihal/",
@@ -60,14 +69,5 @@
60
69
  },
61
70
  "publishConfig": {
62
71
  "access": "public"
63
- },
64
- "scripts": {
65
- "dashboard": "node server/dashboard.js",
66
- "test": "node --test",
67
- "test:ci": "node --test --test-reporter=spec",
68
- "postinstall": "node cli/postinstall.js",
69
- "build:cli": "node scripts/build.cjs",
70
- "build": "node scripts/build.cjs",
71
- "dogfood": "bash scripts/dogfood-check.sh"
72
72
  }
73
- }
73
+ }
@@ -48,6 +48,11 @@ Core: Parse user decisions from CONTEXT.md, decompose into sprints with stories,
48
48
  - `<action>`: Specific instructions, what to avoid & WHY
49
49
  - `<verify>`: <automated> command < 60 sec (REQUIRED by Nyquist Rule)
50
50
  - `<done>`: Measurable acceptance criteria
51
+ - `<evidence>`: **REQUIRED** (issue #649). Must show codebase grounding — at minimum one of:
52
+ - `grep:` a literal grep/Glob pattern + count of matches that justified this task ("`rg '\\.alert' apps/web/src` → 13 hits across 9 files")
53
+ - `lines:` exact `path:line-line` ranges of code being modified
54
+ - `creates:` the file paths being created from scratch (with one-line justification why no existing file fits)
55
+ A task without `<evidence>` is theoretical and MUST NOT be written.
51
56
 
52
57
  ### Task Types
53
58
  | Type | When | Autonomy |
@@ -116,6 +121,37 @@ else: wave = max(waves of dependencies) + 1
116
121
 
117
122
  **File ownership:** No overlap in files_modified → can run parallel. Overlap → later depends on earlier.
118
123
 
124
+ ## Codebase Discovery (BLOCKER — added after issue #649)
125
+
126
+ **Before writing any task body, you MUST query the actual codebase.** Plans built on
127
+ guessed file counts, imagined components, or "probably the dashboard does X" content
128
+ are theoretical and rejected by sprint-checker.
129
+
130
+ For every claim a task makes about the codebase, run a real query and capture the
131
+ result in the task's `<evidence>` field:
132
+
133
+ | Claim shape | Required query |
134
+ |---|---|
135
+ | "migrate N files away from X" | `rg -l '<X>' <scope>` — record exact file count + paths |
136
+ | "modify component Y" | `Read` the file; record `path:line-line` ranges |
137
+ | "replace pattern P" | `rg '<P>'` — record hit count + a representative match |
138
+ | "add Z where there's no Z today" | `rg '<Z>'` returning 0 hits is the evidence |
139
+ | "create new file F" | confirm F does NOT exist + state why no existing file fits |
140
+
141
+ **Hard stops:**
142
+
143
+ - Did NOT grep for a symbol the task says it modifies? → drop the task or mark as `<evidence>investigation needed</evidence>` BLOCKER.
144
+ - File count cited but never measured? → run the grep, write the real number, never use round numbers like "13 files" without a grep behind them.
145
+ - Claim references "the dashboard / the orders page / the POS" without reading the file? → Read the file first, cite line ranges.
146
+
147
+ **Smell test before writing each task:**
148
+ > "Could every line of this task body be traced back to a specific file and line in the repo?"
149
+ >
150
+ > If not, the task is theoretical. Drop it.
151
+
152
+ The orchestrator (`/rihal-plan`) MUST pass this checklist forward to sprint-checker
153
+ which fails the plan if any task lacks `<evidence>`.
154
+
119
155
  ## File-existence verification (BLOCKER — added in v3.1.0 after #441)
120
156
 
121
157
  Before writing each entry into `files_modified`, you MUST verify the file actually exists in the project. Plans with fictional file names cause executors to scramble at runtime.
@@ -191,6 +227,7 @@ Create `.planning/phases/XX-name/{phase}-{plan}-SUMMARY.md`
191
227
  4. **Over-splitting:** Ticket-sized work → ONE plan, not three
192
228
  5. **No dependency graph:** Tasks look independent but aren't
193
229
  6. **Context anxiety:** Plans bloat when context > 50%. Keep to 2-3 tasks.
230
+ 7. **Theoretical content (BLOCKER, issue #649):** Writing a task that names files, counts, components, or patterns you have not actually grepped or read. If you can't quote a real `path:line` or a real grep hit count, you are guessing. Drop the task or downgrade it to an investigation BLOCKER.
194
231
 
195
232
  ## Constraints
196
233
 
@@ -96,6 +96,7 @@ Same methodology (goal-backward), different timing, different subject matter.
96
96
  9. Cross-Sprint Data Contracts
97
97
  10. CLAUDE.md Compliance
98
98
  11. File References Verification
99
+ 12. Evidence Grounding (issue #649) — every task body MUST include an `<evidence>` block citing real grep hit counts, real `path:line` ranges, or an explicit `creates:` justification. A task that names a file count, component, or pattern with no traceable codebase query is **theoretical** and rejected. Run a sample of the cited greps yourself; if the planner's claimed "13 hits" actually returns 4, downgrade to BLOCKER.
99
100
 
100
101
  Each dimension has pass/partial/fail criteria, remediation guidance, and output format requirements.
101
102
 
@@ -105,8 +106,13 @@ Each dimension has pass/partial/fail criteria, remediation guidance, and output
105
106
 
106
107
  1. **Load context** — Read phase SCOPE.md, CONTEXT.md (if present), RESEARCH.md, and all SPRINT.md files.
107
108
  2. **Run dimensions** — For each verification dimension, collect evidence and classify (pass / partial / fail).
108
- 3. **Synthesize** Produce CHECK.md with overall verdict, per-dimension scores, remediation asks.
109
- 4. **Return** — Block execution if critical dimensions fail; proceed with cautions if only partials.
109
+ 3. **Programmatic evidence check (issue #649)** call:
110
+ ```
111
+ node .rihal/bin/rihal-tools.cjs plan validate-evidence <phase> --spot-check
112
+ ```
113
+ Exit code 0 = pass, 1 = at least one task violation. Inline the JSON `violations[]` into dimension 12 of CHECK.md verbatim — these are authoritative and must not be paraphrased away.
114
+ 4. **Synthesize** — Produce CHECK.md with overall verdict, per-dimension scores, remediation asks.
115
+ 5. **Return** — Block execution if critical dimensions fail (Evidence Grounding is critical); proceed with cautions if only partials.
110
116
 
111
117
  ## Mandatory output markers (per #440 / #445 fix)
112
118
 
@@ -22,8 +22,13 @@ function commitsForPhase(projectRoot, phaseNum) {
22
22
  // Look for commit subjects starting with NN- or NNN- matching the phase.
23
23
  const log = git(`log --pretty=format:%H%x09%s`, projectRoot);
24
24
  if (!log) return [];
25
- const prefix = String(phaseNum).padStart(2, '0');
26
- const re = new RegExp(`(^|[^0-9])${prefix}-\\d+`);
25
+ // Issue #652 — accept both unpadded ('8-1') and legacy padded ('08-01')
26
+ // commit prefixes so verify still works on projects that were created
27
+ // before the no-leading-zeros rule.
28
+ const num = String(phaseNum);
29
+ const padded = num.padStart(2, '0');
30
+ const alt = num === padded ? num : `(?:${num}|${padded})`;
31
+ const re = new RegExp(`(^|[^0-9])${alt}-\\d+`);
27
32
  const commits = [];
28
33
  for (const line of log.split('\n')) {
29
34
  const [hash, ...rest] = line.split('\t');
@@ -455,7 +455,11 @@ function cmdInit(workflowName, rawArgs) {
455
455
 
456
456
  out.phase_found = roadmapPhase !== null;
457
457
  out.phase_number = String(phaseNum);
458
- out.padded_phase = String(phaseNum).padStart(2, '0');
458
+ // Issue #652 — no leading zeros in planning artifacts. The field name
459
+ // 'padded_phase' is kept for workflow backward compat but the value is
460
+ // now the canonical (unpadded) phase number. The resolver above still
461
+ // accepts legacy '06-name' directories for older projects.
462
+ out.padded_phase = String(phaseNum);
459
463
  out.phase_name = roadmapPhase ? roadmapPhase.name : null;
460
464
  out.phase_slug = phaseDirEntry ? phaseDirEntry.replace(/^\d+-/, '') : null;
461
465
  out.phase_dir = phaseDirEntry ? path.join(PLANNING_DIR, 'phases', phaseDirEntry) : null;
@@ -1352,18 +1356,25 @@ function cmdState(subArgs) {
1352
1356
 
1353
1357
  // --- add-decision ---
1354
1358
  if (sub === 'add-decision') {
1355
- const summary = subArgs.slice(1).join(' ');
1359
+ // Issue #658 caller can scope explicitly with --phase <N>; otherwise we
1360
+ // infer from state.current_phase (which can mis-fire mid-orchestration).
1361
+ const flagStart = (() => {
1362
+ for (let i = 1; i < subArgs.length; i++) if (subArgs[i].startsWith('--')) return i;
1363
+ return subArgs.length;
1364
+ })();
1365
+ const summary = subArgs.slice(1, flagStart).join(' ');
1366
+ const flags = parseFlags(flagStart);
1356
1367
  if (!summary) throw new Error('add-decision requires a summary argument');
1357
1368
  const state = readState() || defaultState();
1358
1369
  if (!state.decisions) state.decisions = [];
1359
1370
  const record = {
1360
1371
  summary,
1361
- phase: state.current_phase,
1362
- plan: state.current_plan,
1372
+ phase: flags.phase ? String(flags.phase) : state.current_phase,
1373
+ plan: flags.plan ? String(flags.plan) : state.current_plan,
1363
1374
  date: new Date().toISOString(),
1364
1375
  };
1365
1376
  state.decisions.push(record);
1366
- const result = writeState(state);
1377
+ writeState(state);
1367
1378
  // Mirror to cross-project store (best-effort, never fails the local write).
1368
1379
  try {
1369
1380
  appendGlobalDecision({
@@ -1375,7 +1386,14 @@ function cmdState(subArgs) {
1375
1386
  summary: record.summary,
1376
1387
  });
1377
1388
  } catch (_) { /* silent — local commit must not break on home-dir issues */ }
1378
- return result;
1389
+ // Issue #658 — return the appended record so callers can confirm the
1390
+ // phase scope and ID without re-reading state.json.
1391
+ return {
1392
+ ok: true,
1393
+ decision: record,
1394
+ decision_index: state.decisions.length - 1,
1395
+ total_decisions: state.decisions.length,
1396
+ };
1379
1397
  }
1380
1398
 
1381
1399
  // --- decisions-global: query ~/.rihal/decisions.jsonl across all projects ---
@@ -1412,14 +1430,62 @@ function cmdState(subArgs) {
1412
1430
 
1413
1431
  // --- resolve-blocker ---
1414
1432
  if (sub === 'resolve-blocker') {
1415
- const index = parseInt(subArgs[1], 10);
1416
1433
  const state = readState();
1417
1434
  if (!state) throw new Error('No state.json found');
1418
- if (!state.blockers || index < 0 || index >= state.blockers.length) {
1419
- throw new Error(`Invalid blocker index: ${subArgs[1]}. Valid range: 0-${(state.blockers || []).length - 1}`);
1435
+ if (!state.blockers || state.blockers.length === 0) {
1436
+ throw new Error('No blockers to resolve');
1437
+ }
1438
+ // Issue #656 — support --all and --phase <N> for batch resolution.
1439
+ const flagStart = subArgs[1] && /^--/.test(subArgs[1]) ? 1 : 2;
1440
+ const flags = parseFlags(flagStart);
1441
+ const indices = [];
1442
+ if (flags.all === true || flags.all === 'true') {
1443
+ for (let i = 0; i < state.blockers.length; i++) {
1444
+ if (!state.blockers[i].resolved) indices.push(i);
1445
+ }
1446
+ } else if (flags.phase) {
1447
+ const ph = String(flags.phase).replace(/^[Pp]hase\s*/, '');
1448
+ for (let i = 0; i < state.blockers.length; i++) {
1449
+ const b = state.blockers[i];
1450
+ if (b.resolved) continue;
1451
+ const matchesPhase = String(b.phase || '') === ph ||
1452
+ (b.description || '').includes(`Phase ${ph}`) ||
1453
+ (b.description || '').includes(`[Phase ${ph}]`);
1454
+ if (matchesPhase) indices.push(i);
1455
+ }
1456
+ } else {
1457
+ const index = parseInt(subArgs[1], 10);
1458
+ if (Number.isNaN(index) || index < 0 || index >= state.blockers.length) {
1459
+ throw new Error(`Invalid blocker index: ${subArgs[1]}. Valid range: 0-${state.blockers.length - 1}, or use --all / --phase <N>`);
1460
+ }
1461
+ indices.push(index);
1462
+ }
1463
+ if (indices.length === 0) {
1464
+ throw new Error('No matching unresolved blockers found');
1465
+ }
1466
+ // Issue #654 — tickets-first. Resolution must reference an issue, a
1467
+ // commit SHA, or be explicitly marked as internal with --noref. Silent
1468
+ // resolution drops the audit trail.
1469
+ const hasIssue = flags.issue && /^#?\d+$/.test(String(flags.issue));
1470
+ const hasCommit = flags.commit && /^[0-9a-f]{7,40}$/i.test(String(flags.commit));
1471
+ const noref = flags.noref === true || flags.noref === 'true';
1472
+ if (!hasIssue && !hasCommit && !noref) {
1473
+ throw new Error(
1474
+ `resolve-blocker [${index}] requires an audit reference. Pass one of:\n` +
1475
+ ` --issue <gh-issue-number> e.g. --issue 654\n` +
1476
+ ` --commit <sha> 7-40 hex chars\n` +
1477
+ ` --noref acknowledge no external reference (audit trail will say "internal")`
1478
+ );
1420
1479
  }
1421
- state.blockers[index].resolved = new Date().toISOString();
1422
- return writeState(state);
1480
+ const now = new Date().toISOString();
1481
+ for (const idx of indices) {
1482
+ state.blockers[idx].resolved = now;
1483
+ if (hasIssue) state.blockers[idx].resolved_issue = String(flags.issue).replace(/^#/, '');
1484
+ if (hasCommit) state.blockers[idx].resolved_commit = String(flags.commit).slice(0, 40);
1485
+ if (noref && !hasIssue && !hasCommit) state.blockers[idx].resolved_ref = 'internal';
1486
+ }
1487
+ const result = writeState(state);
1488
+ return { ...result, resolved_count: indices.length, resolved_indices: indices };
1423
1489
  }
1424
1490
 
1425
1491
  // --- record-session ---
@@ -3556,6 +3622,160 @@ function cmdInitPlan(rawArgs) {
3556
3622
  };
3557
3623
  }
3558
3624
 
3625
+ /**
3626
+ * plan validate-evidence — issue #649 enforcement.
3627
+ *
3628
+ * Scans SPRINT.md files under a phase (or a specific file) and checks that
3629
+ * every <task>...</task> block contains an <evidence> sub-block with a real
3630
+ * codebase grounding (grep:, lines:, or creates: marker). Optionally
3631
+ * spot-checks the cited grep patterns by re-running them and comparing hit
3632
+ * counts against the planner's claim.
3633
+ *
3634
+ * Sprint-checker calls this; CI can call it; users can run it manually.
3635
+ *
3636
+ * Usage:
3637
+ * plan validate-evidence <phase-number>
3638
+ * plan validate-evidence --file <path>
3639
+ * plan validate-evidence <phase-number> --spot-check
3640
+ *
3641
+ * Exit code 0 = pass, 1 = at least one task failed evidence check.
3642
+ */
3643
+ function cmdPlanValidateEvidence(rawArgs) {
3644
+ const args = (rawArgs || []).slice();
3645
+ const flags = {};
3646
+ const positional = [];
3647
+ for (let i = 0; i < args.length; i++) {
3648
+ if (args[i].startsWith('--')) {
3649
+ const key = args[i].slice(2);
3650
+ const next = args[i + 1];
3651
+ if (next === undefined || next.startsWith('--')) flags[key] = true;
3652
+ else { flags[key] = next; i++; }
3653
+ } else positional.push(args[i]);
3654
+ }
3655
+
3656
+ const targets = [];
3657
+ if (flags.file) {
3658
+ if (!fs.existsSync(flags.file)) throw new Error(`File not found: ${flags.file}`);
3659
+ targets.push(flags.file);
3660
+ } else {
3661
+ const phaseArg = positional[0];
3662
+ if (!phaseArg) throw new Error('Usage: plan validate-evidence <phase-number> [--spot-check] | --file <path>');
3663
+ const phasesDir = path.join(PLANNING_DIR, 'phases');
3664
+ if (!fs.existsSync(phasesDir)) throw new Error(`No phases directory at ${phasesDir}`);
3665
+ const norm = String(phaseArg).replace(/^0+/, '') || '0';
3666
+ const padded = norm.padStart(2, '0');
3667
+ let phaseDir = null;
3668
+ for (const entry of fs.readdirSync(phasesDir)) {
3669
+ if (entry.startsWith(`${norm}-`) || entry.startsWith(`${padded}-`) || entry === norm || entry === padded) {
3670
+ phaseDir = path.join(phasesDir, entry);
3671
+ break;
3672
+ }
3673
+ }
3674
+ if (!phaseDir) throw new Error(`Phase ${phaseArg} directory not found`);
3675
+ for (const f of fs.readdirSync(phaseDir)) {
3676
+ if (/-SPRINT\.md$/.test(f) || /-PLAN\.md$/.test(f)) targets.push(path.join(phaseDir, f));
3677
+ }
3678
+ }
3679
+
3680
+ if (targets.length === 0) {
3681
+ return { ok: true, files_scanned: 0, tasks_total: 0, violations: [], message: 'No SPRINT.md / PLAN.md files found' };
3682
+ }
3683
+
3684
+ const violations = [];
3685
+ let tasksTotal = 0;
3686
+ let tasksPassed = 0;
3687
+ let spotChecks = 0;
3688
+ let spotCheckMismatches = 0;
3689
+
3690
+ for (const file of targets) {
3691
+ const text = fs.readFileSync(file, 'utf8');
3692
+ // Match <task ...>...</task> blocks (planner format) AND ### Story headings (sprint format).
3693
+ const taskBlocks = [];
3694
+ const taskRe = /<task[^>]*?id\s*=\s*["']([^"']+)["'][^>]*?>([\s\S]*?)<\/task>/gi;
3695
+ let m;
3696
+ while ((m = taskRe.exec(text)) !== null) {
3697
+ taskBlocks.push({ id: m[1], body: m[2] });
3698
+ }
3699
+ // Story-format fallback: ### Story 8.1.3 — name { body until next ### or end }
3700
+ if (taskBlocks.length === 0) {
3701
+ const storyRe = /^###\s+Story\s+(\S+)[^\n]*\n([\s\S]*?)(?=^###\s+Story\s+|\Z)/gm;
3702
+ while ((m = storyRe.exec(text)) !== null) {
3703
+ taskBlocks.push({ id: m[1], body: m[2] });
3704
+ }
3705
+ }
3706
+
3707
+ for (const t of taskBlocks) {
3708
+ tasksTotal++;
3709
+ const evMatch = t.body.match(/<evidence>([\s\S]*?)<\/evidence>/i)
3710
+ || t.body.match(/(?:^|\n)\s*\*\*Evidence:?\*\*\s*([\s\S]*?)(?=\n\s*\*\*|\n\n|$)/i);
3711
+ if (!evMatch || !evMatch[1].trim()) {
3712
+ violations.push({
3713
+ file: path.relative(PROJECT_ROOT, file),
3714
+ task_id: t.id,
3715
+ severity: 'BLOCKER',
3716
+ kind: 'missing-evidence',
3717
+ message: 'Task has no <evidence> block. Per issue #649, every task must cite grep hits, line ranges, or a creates: justification.',
3718
+ });
3719
+ continue;
3720
+ }
3721
+ const evidence = evMatch[1].trim();
3722
+ // Must contain at least one of: grep:, lines:, creates:
3723
+ const hasGrep = /(^|\n)\s*grep:/i.test(evidence) || /\brg\b/.test(evidence);
3724
+ const hasLines = /(^|\n)\s*lines:/i.test(evidence) || /\b\S+\.\w+:\d+(-\d+)?/.test(evidence);
3725
+ const hasCreates = /(^|\n)\s*creates:/i.test(evidence);
3726
+ if (!hasGrep && !hasLines && !hasCreates) {
3727
+ violations.push({
3728
+ file: path.relative(PROJECT_ROOT, file),
3729
+ task_id: t.id,
3730
+ severity: 'BLOCKER',
3731
+ kind: 'evidence-shape',
3732
+ message: 'Evidence block exists but contains no grep:, lines:, or creates: marker. Cannot be traced to real code.',
3733
+ });
3734
+ continue;
3735
+ }
3736
+ tasksPassed++;
3737
+
3738
+ // Optional spot-check: re-run the first grep pattern cited and compare hit counts.
3739
+ if (flags['spot-check'] && hasGrep) {
3740
+ const claim = evidence.match(/grep:\s*(?:`|')?([^\n`']+?)(?:`|')?\s*(?:→|->|=>|—|-)\s*(\d+)/i)
3741
+ || evidence.match(/`(rg[^`]+)`[^→]*→\s*(\d+)/i);
3742
+ if (claim) {
3743
+ const pattern = claim[1].trim();
3744
+ const claimedCount = parseInt(claim[2], 10);
3745
+ try {
3746
+ // Use rg if available, else fallback to grep -r.
3747
+ const cmd = `rg --count-matches ${JSON.stringify(pattern.replace(/^rg\s+/, ''))} 2>/dev/null | awk -F: '{s+=$2} END {print s+0}'`;
3748
+ const out = require('child_process').execSync(cmd, { cwd: PROJECT_ROOT, encoding: 'utf8', timeout: 10000 }).trim();
3749
+ const actualCount = parseInt(out, 10) || 0;
3750
+ spotChecks++;
3751
+ const drift = Math.abs(actualCount - claimedCount) / Math.max(claimedCount, 1);
3752
+ if (drift > 0.1) {
3753
+ spotCheckMismatches++;
3754
+ violations.push({
3755
+ file: path.relative(PROJECT_ROOT, file),
3756
+ task_id: t.id,
3757
+ severity: 'BLOCKER',
3758
+ kind: 'spot-check-mismatch',
3759
+ message: `Evidence claims grep hits=${claimedCount} for pattern '${pattern}', actual=${actualCount} (drift ${(drift*100).toFixed(0)}%)`,
3760
+ });
3761
+ }
3762
+ } catch (_) { /* spot-check is best-effort; rg/grep not available shouldn't fail validation */ }
3763
+ }
3764
+ }
3765
+ }
3766
+ }
3767
+
3768
+ return {
3769
+ ok: violations.length === 0,
3770
+ files_scanned: targets.length,
3771
+ tasks_total: tasksTotal,
3772
+ tasks_passed: tasksPassed,
3773
+ spot_checks_run: spotChecks,
3774
+ spot_check_mismatches: spotCheckMismatches,
3775
+ violations,
3776
+ };
3777
+ }
3778
+
3559
3779
  /** plan list — glob .planning/plans/ for plan files. */
3560
3780
  function cmdPlanList() {
3561
3781
  const plansDir = path.join(PLANNING_DIR, 'plans');
@@ -4825,8 +5045,19 @@ function cmdProgress(args) {
4825
5045
  const routes = [];
4826
5046
  const statePhases = (state && (state.state?.phases || state.phases)) || [];
4827
5047
 
4828
- // Route A — phases with pending plans (ready to execute)
5048
+ // Route A — phases with pending plans (ready to execute).
5049
+ // Issue #653 — never recommend executing a phase whose state.json status
5050
+ // is already complete/done/verified, even if its on-disk plan_count >
5051
+ // summary_count. Missing second summary file is not the canonical
5052
+ // completion signal; state.json is. Run /rihal-audit phase <N> for
5053
+ // disk-vs-state drift, but stop steering users into re-executing
5054
+ // finished work.
5055
+ const isPhaseDone = (p) => {
5056
+ const s = String((p && p.status) || '').toLowerCase();
5057
+ return s === 'complete' || s === 'completed' || s === 'done' || s === 'verified' || Boolean(p && p.completed);
5058
+ };
4829
5059
  const pendingExec = statePhases.filter(p => {
5060
+ if (isPhaseDone(p)) return false;
4830
5061
  const disk = diskByNum[phaseKey(p)];
4831
5062
  return disk && disk.plan_count > disk.summary_count;
4832
5063
  }).slice(0, 3);
@@ -5250,7 +5481,13 @@ async function main() {
5250
5481
  break;
5251
5482
  case 'plan':
5252
5483
  if (args[0] === 'list') { result = cmdPlanList(); }
5253
- else { console.error('Unknown plan subcommand. Valid: list'); process.exit(1); }
5484
+ else if (args[0] === 'validate-evidence') {
5485
+ result = cmdPlanValidateEvidence(args.slice(1));
5486
+ // Issue #649 — non-zero exit on violations so CI / sprint-checker can gate.
5487
+ console.log(JSON.stringify(result, null, 2));
5488
+ process.exit(result.ok ? 0 : 1);
5489
+ }
5490
+ else { console.error('Unknown plan subcommand. Valid: list, validate-evidence'); process.exit(1); }
5254
5491
  break;
5255
5492
  case 'phase-plan-index':
5256
5493
  result = cmdPhasePlanIndex(args.join(' '));
@@ -5440,7 +5677,8 @@ async function main() {
5440
5677
  console.log(' classify-tech --keywords "<keywords>" → classify tech stack from keywords (frontend/backend/mobile/styling)');
5441
5678
  console.log(' context refresh → refresh .rihal/context/ cache from .rihal/sources.yaml');
5442
5679
  console.log(' module <subcommand> [args] → module system helpers');
5443
- console.log(' plan <subcommand> [args] → phase/plan operations');
5680
+ console.log(' plan <list|validate-evidence> → phase/plan operations');
5681
+ console.log(' plan validate-evidence <N> [--spot-check] → enforce <evidence> blocks in SPRINT.md (#649); exit 1 on violation');
5444
5682
  console.log(' phase-plan-index <N> → JSON inventory of plans under phase N (waves, summary status, task counts)');
5445
5683
  console.log(' phases list [--type X] [--pick path] → directory inventory of .planning/phases (--type: summaries|sprints|directories|all; --pick: e.g. directories[-1])');
5446
5684
  console.log(' find-phase <N> [--raw] → resolve phase number to dir/slug + decimal children');
@@ -5472,7 +5710,7 @@ async function main() {
5472
5710
  console.log(' state add-decision "<summary>" → append to decisions[] + ~/.rihal/decisions.jsonl');
5473
5711
  console.log(' state decisions-global [--limit N] [--project <name>] [--since <ISO>] → query ~/.rihal/decisions.jsonl across all projects');
5474
5712
  console.log(' state add-blocker "<description>" → append to blockers[]');
5475
- console.log(' state resolve-blocker <index> → mark blocker as resolved');
5713
+ console.log(' state resolve-blocker <index>|--all|--phase <N> --issue <N>|--commit <sha>|--noref → mark blocker(s) resolved (#654, #656)');
5476
5714
  console.log(' state record-session → update last_session timestamp');
5477
5715
  console.log(' state record-council --slug <s> --panel <csv> --artifact <path>');
5478
5716
  console.log(' state record-chain --slug <s> --agents <csv> --artifacts <path>');
@@ -5505,10 +5743,40 @@ async function main() {
5505
5743
  return;
5506
5744
  default: {
5507
5745
  const stateSubs = ['read','get','init','set-phase','advance-plan','record-execution','record-council','record-chain','add-decision','decisions-global','add-blocker','resolve-blocker','record-session','set-ids-in-state','migrate-ids','migrate-schema','next-phase-id','next-plan-id','next-task-id','resolve-id','workstream-create','workstream-switch','workstream-list','workstream-status','workstream-complete','workstream-validate','insert-phase','planned-phase','begin-phase','complete-phase','reset'];
5746
+ // Issue #656 — top-level aliases for intuitive guesses.
5747
+ const intuitionAliases = {
5748
+ blocker: 'state resolve-blocker',
5749
+ blockers: 'state resolve-blocker',
5750
+ decision: 'state add-decision',
5751
+ decisions: 'state decisions-global',
5752
+ sync: 'state sync',
5753
+ };
5508
5754
  if (stateSubs.includes(subcommand)) {
5509
5755
  console.error(`Did you mean: state ${subcommand}? Run 'rihal-tools.cjs help' for full usage.`);
5756
+ } else if (intuitionAliases[subcommand]) {
5757
+ console.error(`'${subcommand}' is not a top-level command. Did you mean: ${intuitionAliases[subcommand]}?`);
5510
5758
  } else {
5511
- console.error(`Unknown subcommand: ${subcommand}. Run 'rihal-tools.cjs help' for full usage.`);
5759
+ // Fuzzy hint suggest top 2 closest state subcommands by simple substring/edit-distance.
5760
+ const lev = (a, b) => {
5761
+ const m = Array.from({length: a.length+1}, (_,i) => Array(b.length+1).fill(0));
5762
+ for (let i=0; i<=a.length; i++) m[i][0]=i;
5763
+ for (let j=0; j<=b.length; j++) m[0][j]=j;
5764
+ for (let i=1; i<=a.length; i++) for (let j=1; j<=b.length; j++) {
5765
+ m[i][j] = a[i-1]===b[j-1] ? m[i-1][j-1] : 1 + Math.min(m[i-1][j], m[i][j-1], m[i-1][j-1]);
5766
+ }
5767
+ return m[a.length][b.length];
5768
+ };
5769
+ const candidates = stateSubs.concat(Object.keys(intuitionAliases));
5770
+ const scored = candidates
5771
+ .map(c => ({ c, d: c.includes(subcommand) || subcommand.includes(c) ? 0.5 : lev(c, subcommand) }))
5772
+ .sort((a, b) => a.d - b.d)
5773
+ .slice(0, 2)
5774
+ .filter(x => x.d <= Math.max(2, subcommand.length / 2));
5775
+ if (scored.length > 0) {
5776
+ console.error(`Unknown subcommand: ${subcommand}. Closest matches: ${scored.map(s => s.c).join(', ')}. Run 'rihal-tools.cjs help' for full usage.`);
5777
+ } else {
5778
+ console.error(`Unknown subcommand: ${subcommand}. Run 'rihal-tools.cjs help' for full usage.`);
5779
+ }
5512
5780
  }
5513
5781
  process.exit(1);
5514
5782
  }
@@ -12,7 +12,8 @@ Run smart discuss for the current phase. Proposes grey area answers in batch tab
12
12
 
13
13
  ```bash
14
14
  PHASE_NUM="${PHASE_NUM}" # local alias; other workflows use PHASE_NUMBER from init JSON
15
- PADDED_PHASE=$(printf "%02d" "${PHASE_NUM%.*}")
15
+ # Issue #652 — no leading zeros. Variable name kept for backward compat.
16
+ PADDED_PHASE="${PHASE_NUM%.*}"
16
17
  PHASE_DIR=".planning/phases/${PADDED_PHASE}-${PHASE_SLUG}"
17
18
  ```
18
19
 
@@ -12,8 +12,23 @@ route back to the user.
12
12
  plan count, wave count, autonomy flag per plan, files_modified overlaps
13
13
  3. **Anti-patterns**: check for `.continue-here.md` (paused state), STATE.md
14
14
  error flag, existing VERIFICATION.md with FAIL items without overrides
15
- 4. **Branch check**: confirm current git branch matches milestone's expected
16
- branch (from config or roadmap)
15
+ 4. **Branch check** (issue #659): confirm current git branch is appropriate
16
+ for the work. Two checks, both blocking:
17
+
18
+ a. **Not on main/master without consent**: if `git branch --show-current`
19
+ returns `main` or `master`, refuse to execute. Suggest:
20
+ `git switch -c <phase>-<plan>-<slug>` (e.g. `git switch -c 8-1-aria`).
21
+ User can override only by passing `--allow-main` to /rihal-execute and
22
+ explicitly typing the override on this turn.
23
+
24
+ b. **Working tree clean enough**: if `git status --porcelain` shows
25
+ modified files unrelated to this phase's `files_modified` frontmatter,
26
+ surface them and ask whether to commit, stash, or proceed. Real-session
27
+ repro: P0 CSS fixes landed loose in a dirty tree with no commit
28
+ boundary.
29
+
30
+ The branch name should align with the phase/plan IDs from state — check
31
+ `workflow.branch_pattern` config (default `<phase>-<plan>-<slug>`).
17
32
  5. **Worktree config**: read `workflow.use_worktrees` — if true + parallelization
18
33
  is true + no file overlaps, plans in a wave run parallel via worktrees
19
34
  </pre_flight>
@@ -2,6 +2,15 @@
2
2
  Sub-step of plan.md — Step 8 Spawn rihal-planner Agent. Spawns rihal-planner with full context to generate SPRINT.md plans. Includes deep-work rules and downstream consumer spec.
3
3
  </purpose>
4
4
 
5
+ <filename_convention>
6
+ Issue #657 — every SPRINT.md, including the first plan in a phase, uses the
7
+ sequence-numbered form `{phase}-{plan}-SPRINT.md` (no leading zeros per #652).
8
+ Examples: `8-1-SPRINT.md`, `8-2-SPRINT.md`. Do NOT emit a bare `{phase}-SPRINT.md`
9
+ or `{phase}-PLAN.md` for the first plan — that creates an inconsistent series
10
+ when a second plan is added later. The plan-number computation in plan.md uses
11
+ `NEXT_PLAN_NUMBER=$((EXISTING_PLAN_COUNT + 1))` and starts at 1 for new phases.
12
+ </filename_convention>
13
+
5
14
  ## 8. Spawn rihal-planner Agent
6
15
 
7
16
  Display banner:
@@ -327,6 +327,30 @@ Always offer exactly three numbered options:
327
327
 
328
328
  Wait for the user's choice before proceeding. Do not auto-select.
329
329
 
330
+ **If user picks option 1 (Add more plans) — issue #650:**
331
+
332
+ This is **NOT** a license to hand-write a new SPRINT.md inline. Continue down the
333
+ normal pipeline exactly as if no plans existed yet:
334
+
335
+ 1. Proceed to Step 7 (context-paths) and Step 7.5 (Nyquist verification) as normal.
336
+ 2. Spawn `rihal-planner` via `@rihal/workflows/plan-spawn-planner.md` (Step 8). The
337
+ planner subagent is mandatory — the orchestrator must NOT write SPRINT.md
338
+ directly via the `Write` tool. Pass the existing plan list to the planner so
339
+ it picks the next plan number and avoids re-covering shipped tasks.
340
+ 3. After the planner returns, run sprint-checker (Step 10) the same as a
341
+ first-time plan. The "PLANNED ✓" banner is gated on a passing CHECK.md.
342
+
343
+ A run that emits a SPRINT.md without a corresponding planner Task() invocation
344
+ in the same turn is a malfunction — see issue #650. Stop and report instead of
345
+ shipping a hand-rolled plan.
346
+
347
+ **If user picks option 3 (Replan from scratch):**
348
+
349
+ Same as option 1, but pass the existing plans to the planner with a `replace:
350
+ true` directive. Existing PLAN.md files are renamed to `*-SUPERSEDED.md` (do
351
+ not delete) before the planner writes the new ones. Subagent invocation is
352
+ still mandatory.
353
+
330
354
  **If user picks option 2 (View existing plans):**
331
355
 
332
356
  Display a sprint summary table (sprint id → one-line goal).
@@ -822,6 +846,36 @@ Route to `<offer_next>` (existing behavior).
822
846
 
823
847
  </process>
824
848
 
849
+ <banner_emission_gate>
850
+ Issue #655 — the success banner is gated on real verification, not vibes.
851
+ Before emitting `PLANNED ✓`, confirm one of these is true:
852
+
853
+ 1. A passing CHECK.md exists at `${PHASE_DIR}/*-CHECK.md` from rihal-sprint-checker
854
+ in this run AND its overall verdict is `pass` (or `pass-with-cautions`).
855
+ 2. The user has explicitly said "skip verification" / "override" this run AND that
856
+ override is recorded in the offer-next output's `Verification:` field as
857
+ `Passed with override`.
858
+ 3. `plan_checker_enabled` is false in config — recorded as `Verification: Skipped
859
+ (config-disabled)`.
860
+
861
+ If none of the three holds (sprint-checker was never spawned, or it returned a
862
+ fail verdict, or its CHECK.md is missing) — DO NOT emit `PLANNED ✓`. Emit:
863
+
864
+ ```
865
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
866
+ Rihal ► PHASE {X} PLANNED ⚠ (gates skipped)
867
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
868
+
869
+ Plans were written but rihal-sprint-checker did not return a passing
870
+ CHECK.md. Run /rihal-plan {X} --reviews to gate the plans before
871
+ executing, or pass --skip-verify if you accept the risk.
872
+ ```
873
+
874
+ The same rule applies to `VERIFIED ✓` (after /rihal-verify-phase) and
875
+ `DONE ✓` (after /rihal-execute) — the success-tick is reserved for
876
+ gate-passed states.
877
+ </banner_emission_gate>
878
+
825
879
  <offer_next>
826
880
  Output this markdown directly (not as a code block):
827
881
 
@@ -86,7 +86,11 @@ Task tool call:
86
86
 
87
87
  **Audit 6 pillars (pass/fail + findings):**
88
88
 
89
- 1. Color Consistency — All text/backgrounds match color tokens, contrast ratios >= WCAG AA
89
+ 1. Color Consistency — All text/backgrounds match color tokens, contrast ratios >= WCAG AA.
90
+ **Hex literal scan (issue #660):** run `rg -n '#[0-9A-Fa-f]{3,6}\b' <css/tailwind paths>` —
91
+ any hex outside the `:root { ... }` token block in globals.css (or equivalent token
92
+ definition file) is a regression flag. If a token is missing for the stated semantic
93
+ role, the fix is to ADD the token, never to inline the hex. Cite the exact file:line.
90
94
  2. Typography Compliance — Font sizes, weights, line heights match typography scales
91
95
  3. Component Inventory — All specified components present, all variants implemented
92
96
  4. Accessibility — aria-labels, roles, keyboard navigation, focus rings per WCAG 2.1 AA