@haposoft/cafekit 0.8.8 → 0.8.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  > Claude Code-first spec-driven workflow and runtime bundle for AI coding assistants.
4
4
 
5
- [![Version](https://img.shields.io/badge/version-0.8.0-blue.svg)](https://github.com/haposoft/cafekit)
5
+ [![Version](https://img.shields.io/badge/version-0.8.10-blue.svg)](https://github.com/haposoft/cafekit)
6
6
  [![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
7
7
  [![Claude%20Code](https://img.shields.io/badge/Claude%20Code-Primary-orange.svg)](https://claude.ai/code)
8
8
 
@@ -49,6 +49,7 @@ Claude Code install targets:
49
49
  ├── skills/
50
50
  ├── agents/
51
51
  ├── hooks/
52
+ ├── cafekit.json
52
53
  ├── status.cjs
53
54
  ├── runtime.json
54
55
  ├── settings.json
@@ -61,6 +62,13 @@ Managed runtime features include:
61
62
  - rule/context injection
62
63
  - spec state awareness
63
64
  - safe settings merge on reinstall
65
+ - installed CafeKit version tracking in `.claude/cafekit.json`
66
+
67
+ To check the installed CafeKit package version:
68
+
69
+ ```bash
70
+ cat .claude/cafekit.json
71
+ ```
64
72
 
65
73
  ## Core Skills
66
74
 
package/bin/install.js CHANGED
@@ -20,6 +20,7 @@ const os = require('os');
20
20
  const readline = require('readline');
21
21
  const { execSync } = require('child_process');
22
22
  const packageJson = require('../package.json');
23
+ const INSTALL_COMMAND = `npx ${packageJson.name}@${packageJson.version}`;
23
24
 
24
25
  function validateManifestV2(manifest) {
25
26
  if (!manifest || manifest.version !== 2) return false;
@@ -339,6 +340,55 @@ function ensureWorkflowDependencies(platformKey, platform, results, options = {}
339
340
  });
340
341
  }
341
342
 
343
+ function readJsonFile(filePath) {
344
+ if (!fs.existsSync(filePath)) {
345
+ return {};
346
+ }
347
+
348
+ try {
349
+ return JSON.parse(fs.readFileSync(filePath, 'utf8'));
350
+ } catch {
351
+ return {};
352
+ }
353
+ }
354
+
355
+ function writePlatformVersionMetadata(platformKey, results) {
356
+ const platform = PLATFORMS[platformKey];
357
+ const targetPath = path.join(platform.folder, 'cafekit.json');
358
+ const targetExists = fs.existsSync(targetPath);
359
+ const existingMetadata = readJsonFile(targetPath);
360
+ const now = new Date().toISOString();
361
+ const previousVersion = typeof existingMetadata.version === 'string'
362
+ ? existingMetadata.version
363
+ : null;
364
+
365
+ const metadata = {
366
+ schemaVersion: 1,
367
+ packageName: packageJson.name,
368
+ version: packageJson.version,
369
+ platform: platform.id,
370
+ platformName: platform.name,
371
+ installedAt: existingMetadata.installedAt || now,
372
+ lastInstalledAt: now,
373
+ installCommand: INSTALL_COMMAND
374
+ };
375
+
376
+ if (previousVersion && previousVersion !== packageJson.version) {
377
+ metadata.previousVersion = previousVersion;
378
+ }
379
+
380
+ fs.mkdirSync(path.dirname(targetPath), { recursive: true });
381
+ fs.writeFileSync(targetPath, `${JSON.stringify(metadata, null, 2)}\n`, 'utf8');
382
+
383
+ if (targetExists) {
384
+ console.log(` ↻ Version metadata updated: ${targetPath}`);
385
+ results.updated++;
386
+ } else {
387
+ console.log(` ✓ Version metadata installed: ${targetPath}`);
388
+ results.copied++;
389
+ }
390
+ }
391
+
342
392
  function getPlatformSpecFiles(platformKey) {
343
393
  if (platformKey === 'claude') {
344
394
  const manifestCommands = CLAUDE_MIGRATION_MANIFEST?.commands?.core;
@@ -1065,6 +1115,8 @@ async function main() {
1065
1115
  copyGeminiFile(platformKey, results, installerOptions);
1066
1116
  }
1067
1117
 
1118
+ writePlatformVersionMetadata(platformKey, results);
1119
+
1068
1120
  results.targets.push(platform.commandsDir);
1069
1121
  console.log();
1070
1122
  }
@@ -1085,6 +1137,7 @@ async function main() {
1085
1137
  console.log();
1086
1138
  console.log(` Copied Files: ${results.copied}`);
1087
1139
  console.log(` Updated Files: ${results.updated}`);
1140
+ console.log(` CafeKit Version: ${packageJson.version}`);
1088
1141
  console.log(` Skipped Files: ${results.skipped}`);
1089
1142
  console.log(` Installed Skills: ${results.installedSkills > 0 ? 'Yes ✓' : 'No'}`);
1090
1143
  console.log(` Dependency Checks: ${results.dependencyChecks}`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@haposoft/cafekit",
3
- "version": "0.8.8",
3
+ "version": "0.8.10",
4
4
  "description": "Claude Code-first spec-driven workflow for AI coding assistants. Bundles CafeKit hapo: skills, runtime hooks, agents, and installer scaffolding.",
5
5
  "author": "Haposoft <nghialt@haposoft.com>",
6
6
  "license": "MIT",
@@ -37,7 +37,7 @@ These rules reduce common agent coding failures: hidden assumptions, overbuilt s
37
37
  ### 4. Goal-Driven Execution
38
38
 
39
39
  - Convert requests into verifiable success criteria.
40
- - For spec tasks, use `Completion Criteria` and `Task Test Plan & Verification Evidence` as the source of truth.
40
+ - For spec tasks, use `Completion Criteria` and `Evidence` as the source of truth. Existing task files may use `Task Test Plan & Verification Evidence` or legacy `Verification & Evidence`.
41
41
  - For bugs, reproduce with a failing test or concrete evidence when feasible before fixing.
42
42
  - Loop until verification passes or a real blocker is recorded.
43
43
 
@@ -65,7 +65,7 @@ Use this loop for non-trivial work:
65
65
  A task is done only when all apply:
66
66
 
67
67
  - implementation satisfies `Completion Criteria`
68
- - `Task Test Plan & Verification Evidence` is satisfied with concrete proof
68
+ - `Evidence` is satisfied with concrete proof
69
69
  - preflight/build/test outcomes are passing or an explicit blocker is recorded
70
70
  - code review has no critical issues
71
71
  - a verification receipt exists before task state is synced to `done`
@@ -23,7 +23,7 @@ Extract and verify:
23
23
  1. Declared deliverables (files, routes, entrypoints, UI surfaces, schemas, migrations)
24
24
  2. Declared task scope (`Related Files` and direct support files that are clearly justified)
25
25
  3. Completion Criteria
26
- 4. Task Test Plan & Verification Evidence expectations (or legacy Verification & Evidence)
26
+ 4. Task Evidence expectations (or Task Test Plan & Verification Evidence / legacy Verification & Evidence)
27
27
  5. Canonical Contracts & Invariants from the design
28
28
  6. Named technologies and runtime choices that the task/spec explicitly requires
29
29
  7. Runtime entrypoints/callers and reachability obligations from task evidence or the task-aware scout report
@@ -31,6 +31,7 @@ specs/<feature>/
31
31
  - `spec.json` is generated from `.claude/skills/specs/templates/spec-state.json`; never write `init.json` or `spec-state.json` into the spec directory.
32
32
  - Task filenames MUST include the `task-` prefix, requirement number, two-digit sequence, and descriptive slug, for example `tasks/task-R0-01-project-scaffolding.md`.
33
33
  - Do NOT write `hydration.md`; task hydration is session/task-state synchronization only.
34
+ - Before setting `ready_for_implementation = true`, run `node .claude/scripts/validate-spec-output.cjs specs/<feature>` and fix every failure.
34
35
 
35
36
  ## Mental Models (How You Think)
36
37
 
@@ -125,26 +126,24 @@ Before writing `design.md`, select a discovery mode and record the reason:
125
126
  - Reject tasks outside `scope_lock.in_scope`
126
127
  - When requirement coverage format: list numeric IDs only, no descriptive suffixes
127
128
  - Apply `(P)` parallel markers when applicable (load `.claude/skills/specs/rules/tasks-parallel-analysis.md`)
128
- - Every task MUST include `Task Test Plan & Verification Evidence` with exact commands, artifacts/runtime surfaces, runtime reachability proof, and negative-path checks.
129
+ - Every task MUST use the compact implementation-ready shape: `Context`, `Steps`, `Requirements`, `Related Files`, `Completion Criteria`, `Evidence`, `Risk Assessment`.
130
+ - `Evidence` MUST include exact commands, artifacts/runtime surfaces, runtime reachability proof, and negative-path checks. Existing specs may use `Task Test Plan & Verification Evidence` or legacy `Verification & Evidence`.
129
131
  - Completion criteria MUST be objective enough that a downstream quality gate can prove them without guesswork.
130
132
  - UI/app/runtime workflows MUST include a final integration/reachability task or final integration section that names the real entrypoint and proves all scoped user-facing surfaces are wired.
131
133
  - Do not allow orphan task outputs: components, services, hooks, routes, commands, workers, providers, reducers, data loaders, and generated artifacts must be reachable now or assigned to a named later integration task.
132
- - Validation decisions that affect implementation MUST be written into implementation-facing sections (`Objective`, `Constraints`, `Implementation Steps`, `Completion Criteria`, `Task Test Plan & Verification Evidence`) rather than only `Risk Assessment`.
133
-
134
- ### Sub-Task Detail Requirements (MANDATORY)
135
- Each task file MUST contain granular sub-tasks with the following structure:
136
- 1. **Major steps** (`- [ ] 1. ...`) group related work by cohesion
137
- 2. **Sub-tasks** (`- [ ] 1.1 ...`) describe specific actionable items (1-3 hours each)
138
- 3. **Detail bullets** under each sub-task describe:
139
- - Business logic and behavior to implement
140
- - Edge cases and constraints
141
- - Validation rules
142
- 4. **Requirement mapping** (`_Requirements: X.X_`) at the end of EVERY sub-task — no exceptions
143
- 5. **Test coverage section** as the last major step in every task, with unit + integration sub-tasks
144
- 6. **Completion criteria** must be observable and testable not subjective
145
- 7. **Scope/reachability criteria** must prove the task implements scoped behavior without out-of-scope additions and without unreachable runtime-facing outputs
146
-
147
- **FORBIDDEN**: Task files with only 3-5 top-level checkboxes and no sub-task breakdown. This level of detail is INSUFFICIENT for implementation.
134
+ - Validation decisions that affect implementation MUST be written into implementation-facing sections (`Context`, `Steps`, `Requirements`, `Completion Criteria`, `Evidence`) rather than only `Risk Assessment`.
135
+
136
+ ### Task Detail Requirements (MANDATORY)
137
+ Each task file MUST be compact but implementation-ready:
138
+ 1. `Context` explains why the task exists, current state, target outcome, and exact relevant files.
139
+ 2. `Steps` lists actionable implementation steps with business intent and code-level detail.
140
+ 3. `Requirements` lists the requirement IDs covered by this task.
141
+ 4. `Related Files` names exact paths and action type when known.
142
+ 5. `Completion Criteria` is observable and testable.
143
+ 6. `Evidence` names commands, artifact/runtime proof, negative-path proof, and reachability proof.
144
+ 7. `Risk Assessment` states real risks or `None identified`.
145
+
146
+ **FORBIDDEN**: Vague task files with no exact files, no requirement mapping, or no evidence. Compact is good; vague is invalid.
148
147
 
149
148
  ## Research Phase
150
149
 
@@ -175,12 +174,14 @@ Before marking the spec ready:
175
174
  4. Fail if any path in `task_files` does not exist
176
175
  5. Fail if any on-disk task file is missing from `task_registry` or any registry path does not exist
177
176
  6. Fail if any task file path does not match `tasks/task-R{N}-{SEQ}-<slug>.md` with two-digit `SEQ` (for example `tasks/task-R0-01-project-scaffolding.md`)
178
- 7. Infer `design_context.validation_recommended = true` for auth, privacy, delete-data, migration, schema-change, browser-extension-permission, external-provider, or 5+ task file specs
179
- 8. If the spec scope switched away from Claude/Anthropic, fail if `requirements.md`, `design.md`, or `tasks/*.md` still contain stale provider strings like `Claude API`, `Haiku`, or `haiku_reachable`. `research.md` may mention old providers only as historical comparison.
180
- 9. For delete/privacy specs, fail if requirements/design/tasks mix multiple deletion policies (for example `email_hash` in one place and `deleted-<uuid>` in another) without one canonical design decision.
181
- 10. If `validation_recommended = true` and validation has not completed (or the user did not explicitly accept risk), keep `ready_for_implementation = false`
182
- 11. Reject task files that use legacy non-numeric mappings like `NFR-1`
183
- 12. If validation decisions were accepted, fail unless they are reflected in implementation-facing sections of affected artifacts and `spec.json.updated_at` / review timestamps reflect the reviewed state
177
+ 7. Fail if all task files are `R0` when the spec has more than two tasks
178
+ 8. Run `node .claude/scripts/validate-spec-output.cjs specs/<feature>` and treat non-zero exit as blocking
179
+ 9. Infer `design_context.validation_recommended = true` for auth, privacy, delete-data, migration, schema-change, browser-extension-permission, external-provider, or 5+ task file specs
180
+ 10. If the spec scope switched away from Claude/Anthropic, fail if `requirements.md`, `design.md`, or `tasks/*.md` still contain stale provider strings like `Claude API`, `Haiku`, or `haiku_reachable`. `research.md` may mention old providers only as historical comparison.
181
+ 11. For delete/privacy specs, fail if requirements/design/tasks mix multiple deletion policies (for example `email_hash` in one place and `deleted-<uuid>` in another) without one canonical design decision.
182
+ 12. If `validation_recommended = true` and validation has not completed (or the user did not explicitly accept risk), keep `ready_for_implementation = false`
183
+ 13. Reject task files that use legacy non-numeric mappings like `NFR-1`
184
+ 14. If validation decisions were accepted, fail unless they are reflected in implementation-facing sections of affected artifacts and `spec.json.updated_at` / review timestamps reflect the reviewed state
184
185
 
185
186
  ## Execution Workflow Summary
186
187
 
@@ -11,15 +11,26 @@ You are a battle-hardened QA engineer who has been burned by production incident
11
11
 
12
12
  ## Task-Aware Inputs
13
13
 
14
- If the prompt includes task file paths, Completion Criteria, Task Test Plan & Verification Evidence, or legacy Verification & Evidence instructions, treat them as authoritative.
14
+ If the prompt includes task file paths, Completion Criteria, Evidence, Task Test Plan & Verification Evidence, or legacy Verification & Evidence instructions, treat them as authoritative.
15
15
  Diff-aware test selection does NOT replace task-specific verification.
16
16
  If the task/spec names a specific framework, auth system, transport, or shared-state boundary, keep that contract visible while evaluating evidence.
17
- If the prompt includes a feature name or `specs/<feature>`, load `spec.json`, `requirements.md`, `design.md`, and the active/recent task files. Treat `scope_lock`, Completion Criteria, and Task Test Plan evidence as the test contract.
17
+ If the prompt includes a feature name or `specs/<feature>`, load `spec.json`, `requirements.md`, `design.md`, and the active/recent task files. Treat `scope_lock`, Completion Criteria, and Evidence as the test contract.
18
+
19
+ ## Test Type Expectations
20
+
21
+ Select tests by the task's touched surface:
22
+ - Pure logic/data/parser/sort/filter/validator/regression work requires unit tests with negative-path coverage.
23
+ - Stateful UI, context/store, API/service, persistence, or provider wiring requires component or integration proof.
24
+ - Complete user workflows require E2E/UI-flow proof once the vertical slice exists.
25
+ - Layout/theme/responsive work requires runtime visual checks, viewport checks, or screenshot proof when practical.
26
+ - Interactive UI requires accessibility checks for focus, labels, roles, keyboard behavior, and ARIA when relevant.
27
+ - Scaffold/config/release plumbing can pass with smoke proof when deeper behavior is not in scope.
28
+ - Performance/security checks are required only when requirements, risk, or changed boundaries make them relevant.
18
29
 
19
30
  ## Command Resolution Order
20
31
 
21
32
  When the task file names exact commands, use this order:
22
- 1. Run every exact executable command from `Task Test Plan & Verification Evidence` (or legacy `Verification & Evidence`) in declaration order.
33
+ 1. Run every exact executable command from `Evidence` (or `Task Test Plan & Verification Evidence` / legacy `Verification & Evidence`) in declaration order.
23
34
  2. Run repo-default typecheck/test/build commands only to fill gaps not already covered above.
24
35
  3. Apply diff-aware test selection only after task-mandated commands are satisfied.
25
36
 
@@ -94,7 +94,8 @@ try {
94
94
  lines.push(`> Bạn PHẢI sử dụng công cụ Edit để cập nhật trạng thái vật lý sau khi đã có bằng chứng verify thật (build/test/runtime/artifact), không phải chỉ vì code đã viết xong.`);
95
95
  lines.push(`> 1. Sửa file \`spec.json\` (status, phase/current_phase, timestamps, \`task_files\`, \`task_registry\`, validation state nếu có thay đổi).`);
96
96
  lines.push(`> 2. Chỉ khi verify xong mới sửa file \`tasks/task-*.md\` (status + tick '[x]' các sub-task và completion criteria liên quan).`);
97
- lines.push(`> 3. NẾU VỪA HOÀN THÀNH 1 TASK SỬA SOURCE CODE, BẮT BUỘC cập nhật ngay tài liệu trong \`docs/\` (\`system-architecture.md\` hoặc Changelog) cho đồng bộ.`);
97
+ lines.push(`> 3. Trước khi set \`ready_for_implementation = true\`, PHẢI chạy \`node .claude/scripts/validate-spec-output.cjs specs/${featureName}\` sửa mọi lỗi.`);
98
+ lines.push(`> 4. NẾU VỪA HOÀN THÀNH 1 TASK CÓ SỬA SOURCE CODE, BẮT BUỘC cập nhật ngay tài liệu trong \`docs/\` (\`system-architecture.md\` hoặc Changelog) cho đồng bộ.`);
98
99
  lines.push(`> CẤM VI PHẠM LUẬT TOLLGATE NÀY NHẰM ĐẢM BẢO TÍNH ĐỒNG BỘ CỦA HỆ THỐNG.`);
99
100
  lines.push('');
100
101
 
@@ -56,7 +56,8 @@
56
56
  "scripts": {
57
57
  "required": [
58
58
  "validate-docs.cjs",
59
- "browser-tool.cjs"
59
+ "browser-tool.cjs",
60
+ "validate-spec-output.cjs"
60
61
  ]
61
62
  },
62
63
  "agentReferences": {
@@ -15,11 +15,12 @@ Use the CafeKit loop: **Understand -> Plan -> Execute -> Verify -> Sync**.
15
15
  - For non-trivial features, use `/hapo:specs` to create or validate the spec.
16
16
  - For approved specs, work one task file at a time.
17
17
  - Extract from the active task:
18
- - `Objective`
19
- - `Constraints`
18
+ - `Context`
19
+ - `Steps`
20
+ - `Requirements`
20
21
  - `Related Files`
21
22
  - `Completion Criteria`
22
- - `Task Test Plan & Verification Evidence`
23
+ - `Evidence`
23
24
  - If these are missing or too vague to verify, route back to spec correction.
24
25
 
25
26
  ## 3. Execute
@@ -31,7 +32,7 @@ Use the CafeKit loop: **Understand -> Plan -> Execute -> Verify -> Sync**.
31
32
 
32
33
  ## 4. Verify
33
34
 
34
- - Run exact commands from `Task Test Plan & Verification Evidence` first.
35
+ - Run exact commands from `Evidence` first.
35
36
  - Then run repo-level lint/test/build as needed for confidence.
36
37
  - Use only fresh verification from the current run when claiming completion.
37
38
  - `PRECHECK_FAIL` outranks `NO_TESTS`.
@@ -0,0 +1,271 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * CafeKit spec artifact validator.
4
+ *
5
+ * This is intentionally deterministic. Prompt rules can drift; this script is
6
+ * the hard backstop before a spec is marked ready for implementation.
7
+ */
8
+
9
+ const fs = require('fs');
10
+ const path = require('path');
11
+
12
+ const TASK_PATH_RE = /^tasks\/task-R\d+-\d{2}-[a-z0-9]+(?:-[a-z0-9]+)*\.md$/;
13
+ const REQUIRED_REGISTRY_KEYS = [
14
+ 'id',
15
+ 'title',
16
+ 'status',
17
+ 'dependencies',
18
+ 'blocker',
19
+ 'started_at',
20
+ 'completed_at',
21
+ 'last_updated_at',
22
+ ];
23
+
24
+ function usage() {
25
+ console.error('Usage: node .claude/scripts/validate-spec-output.cjs specs/<feature>');
26
+ }
27
+
28
+ function resolveSpecDir(input) {
29
+ if (!input) return null;
30
+
31
+ const cwd = process.cwd();
32
+ const direct = path.resolve(cwd, input);
33
+ if (fs.existsSync(direct)) return direct;
34
+
35
+ const viaSpecs = path.resolve(cwd, 'specs', input);
36
+ if (fs.existsSync(viaSpecs)) return viaSpecs;
37
+
38
+ return direct;
39
+ }
40
+
41
+ function readJson(filePath, errors) {
42
+ try {
43
+ return JSON.parse(fs.readFileSync(filePath, 'utf8'));
44
+ } catch (error) {
45
+ errors.push(`${filePath}: invalid JSON (${error.message})`);
46
+ return null;
47
+ }
48
+ }
49
+
50
+ function listTaskFiles(specDir) {
51
+ const tasksDir = path.join(specDir, 'tasks');
52
+ if (!fs.existsSync(tasksDir)) return [];
53
+
54
+ return fs
55
+ .readdirSync(tasksDir, { withFileTypes: true })
56
+ .filter((entry) => entry.isFile() && entry.name.endsWith('.md'))
57
+ .map((entry) => `tasks/${entry.name}`)
58
+ .sort();
59
+ }
60
+
61
+ function hasHeading(content, heading) {
62
+ const escaped = heading.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
63
+ return new RegExp(`^##\\s+${escaped}\\s*$`, 'm').test(content);
64
+ }
65
+
66
+ function extractRequirementIds(requirementsText) {
67
+ const ids = new Set();
68
+ const headingRe = /^#{2,4}\s+(?:(?:Requirement)\s+)?((?:REQ-\d+)|(?:R\d+))\b/gim;
69
+ let match;
70
+
71
+ while ((match = headingRe.exec(requirementsText)) !== null) {
72
+ ids.add(match[1].toUpperCase());
73
+ }
74
+
75
+ const numericRequirementRe = /^#{2,4}\s+(?:Requirement\s+)?(\d+)(?=[:.\s-])/gim;
76
+ while ((match = numericRequirementRe.exec(requirementsText)) !== null) {
77
+ ids.add(`R${match[1]}`);
78
+ }
79
+
80
+ const bracketRe = /\[((?:REQ-\d+)|(?:R\d+))\]/gi;
81
+ while ((match = bracketRe.exec(requirementsText)) !== null) {
82
+ ids.add(match[1].toUpperCase());
83
+ }
84
+
85
+ return [...ids].filter((id) => id !== 'R0').sort();
86
+ }
87
+
88
+ function validateTaskSections(taskPath, content, errors) {
89
+ const hasContext =
90
+ hasHeading(content, 'Context') ||
91
+ hasHeading(content, 'Objective') ||
92
+ hasHeading(content, 'Goal');
93
+ const hasSteps =
94
+ hasHeading(content, 'Steps') || hasHeading(content, 'Implementation Steps');
95
+ const hasRequirements =
96
+ hasHeading(content, 'Requirements') || /_Requirements:\s*[^_\n]+_/i.test(content);
97
+ const hasEvidence =
98
+ hasHeading(content, 'Evidence') ||
99
+ hasHeading(content, 'Task Test Plan & Verification Evidence') ||
100
+ hasHeading(content, 'Verification & Evidence');
101
+
102
+ if (!hasContext) errors.push(`${taskPath}: missing Context/Objective/Goal`);
103
+ if (!hasSteps) errors.push(`${taskPath}: missing Steps/Implementation Steps`);
104
+ if (!hasRequirements) errors.push(`${taskPath}: missing Requirements mapping`);
105
+ if (!hasEvidence) errors.push(`${taskPath}: missing Evidence or task test plan`);
106
+ }
107
+
108
+ function validateSpec(specDir) {
109
+ const errors = [];
110
+ const warnings = [];
111
+ const specJsonPath = path.join(specDir, 'spec.json');
112
+
113
+ if (!fs.existsSync(specDir)) {
114
+ errors.push(`${specDir}: spec directory does not exist`);
115
+ return { errors, warnings };
116
+ }
117
+
118
+ for (const forbidden of ['init.json', 'spec-state.json', 'hydration.md']) {
119
+ if (fs.existsSync(path.join(specDir, forbidden))) {
120
+ errors.push(`${forbidden}: forbidden generated artifact`);
121
+ }
122
+ }
123
+
124
+ if (!fs.existsSync(specJsonPath)) {
125
+ errors.push('spec.json: missing');
126
+ return { errors, warnings };
127
+ }
128
+
129
+ const spec = readJson(specJsonPath, errors);
130
+ if (!spec) return { errors, warnings };
131
+
132
+ if (!spec.scope_lock || typeof spec.scope_lock !== 'object' || Array.isArray(spec.scope_lock)) {
133
+ errors.push('spec.json.scope_lock: must be an object, not a boolean or array');
134
+ }
135
+
136
+ const taskFiles = listTaskFiles(specDir);
137
+ const taskFileSet = new Set(taskFiles);
138
+
139
+ if (!Array.isArray(spec.task_files)) {
140
+ errors.push('spec.json.task_files: missing array');
141
+ if (Array.isArray(spec.tasks)) {
142
+ errors.push('spec.json.tasks: legacy field detected; use task_files');
143
+ }
144
+ } else {
145
+ const declared = [...spec.task_files].sort();
146
+ if (JSON.stringify(declared) !== JSON.stringify(taskFiles)) {
147
+ errors.push('spec.json.task_files: must exactly match files under tasks/');
148
+ warnings.push(`expected task_files=${JSON.stringify(taskFiles)}`);
149
+ }
150
+ }
151
+
152
+ if (!spec.task_registry || typeof spec.task_registry !== 'object' || Array.isArray(spec.task_registry)) {
153
+ errors.push('spec.json.task_registry: missing object keyed by task file path');
154
+ } else {
155
+ const registryKeys = Object.keys(spec.task_registry).sort();
156
+ if (JSON.stringify(registryKeys) !== JSON.stringify(taskFiles)) {
157
+ errors.push('spec.json.task_registry: keys must exactly match task file paths');
158
+ }
159
+
160
+ for (const [registryPath, entry] of Object.entries(spec.task_registry)) {
161
+ if (!taskFileSet.has(registryPath)) {
162
+ errors.push(`spec.json.task_registry.${registryPath}: no matching task file`);
163
+ }
164
+ for (const key of REQUIRED_REGISTRY_KEYS) {
165
+ if (!(key in (entry || {}))) {
166
+ errors.push(`spec.json.task_registry.${registryPath}: missing ${key}`);
167
+ }
168
+ }
169
+ if (entry && !Array.isArray(entry.dependencies)) {
170
+ errors.push(`spec.json.task_registry.${registryPath}.dependencies: must be an array`);
171
+ }
172
+ for (const dep of entry?.dependencies || []) {
173
+ if (!taskFileSet.has(dep)) {
174
+ errors.push(`spec.json.task_registry.${registryPath}.dependencies: unknown dependency ${dep}`);
175
+ }
176
+ }
177
+ }
178
+ }
179
+
180
+ for (const taskFile of taskFiles) {
181
+ if (!TASK_PATH_RE.test(taskFile)) {
182
+ errors.push(`${taskFile}: must match tasks/task-R{N}-{SEQ}-<slug>.md with two-digit SEQ`);
183
+ }
184
+ }
185
+
186
+ if (taskFiles.length > 2 && taskFiles.every((taskFile) => /^tasks\/task-R0-/.test(taskFile))) {
187
+ errors.push('tasks/: feature work cannot be entirely R0; reserve R0 for shared foundation tasks');
188
+ }
189
+
190
+ const requirementsPath = path.join(specDir, 'requirements.md');
191
+ const designPath = path.join(specDir, 'design.md');
192
+ const researchPath = path.join(specDir, 'research.md');
193
+
194
+ if (!fs.existsSync(requirementsPath)) errors.push('requirements.md: missing');
195
+ if (!fs.existsSync(designPath)) errors.push('design.md: missing');
196
+
197
+ if (taskFiles.length > 0) {
198
+ if (!fs.existsSync(researchPath)) {
199
+ errors.push('research.md: missing Evidence Summary for non-trivial spec');
200
+ } else {
201
+ const research = fs.readFileSync(researchPath, 'utf8');
202
+ if (!/^##\s+Evidence Summary\s*$/m.test(research)) {
203
+ errors.push('research.md: missing ## Evidence Summary');
204
+ }
205
+ }
206
+ }
207
+
208
+ let requirementIds = [];
209
+ if (fs.existsSync(requirementsPath)) {
210
+ requirementIds = extractRequirementIds(fs.readFileSync(requirementsPath, 'utf8'));
211
+ }
212
+
213
+ const coveredRequirementIds = new Set();
214
+ for (const taskFile of taskFiles) {
215
+ const fullPath = path.join(specDir, taskFile);
216
+ const content = fs.readFileSync(fullPath, 'utf8');
217
+ validateTaskSections(taskFile, content, errors);
218
+
219
+ const idRe = /\b((?:REQ-\d+)|(?:R\d+))\b/gi;
220
+ let match;
221
+ while ((match = idRe.exec(content)) !== null) {
222
+ const id = match[1].toUpperCase();
223
+ if (id !== 'R0') coveredRequirementIds.add(id);
224
+ }
225
+
226
+ const numericMappingRe = /_Requirements:\s*([^_\n]+)_/gi;
227
+ while ((match = numericMappingRe.exec(content)) !== null) {
228
+ for (const token of match[1].split(',')) {
229
+ const number = token.trim().match(/^(\d+)(?:\.\d+)?$/);
230
+ if (number) coveredRequirementIds.add(`R${number[1]}`);
231
+ }
232
+ }
233
+ }
234
+
235
+ for (const requirementId of requirementIds) {
236
+ if (!coveredRequirementIds.has(requirementId)) {
237
+ errors.push(`requirements.md:${requirementId}: not covered by any task`);
238
+ }
239
+ }
240
+
241
+ if (spec.ready_for_implementation === true && errors.length > 0) {
242
+ errors.push('spec.json.ready_for_implementation: cannot be true while validator errors exist');
243
+ }
244
+
245
+ return { errors, warnings };
246
+ }
247
+
248
+ function main() {
249
+ const specDir = resolveSpecDir(process.argv[2]);
250
+ if (!specDir) {
251
+ usage();
252
+ process.exit(2);
253
+ }
254
+
255
+ const { errors, warnings } = validateSpec(specDir);
256
+ for (const warning of warnings) {
257
+ console.warn(`[WARN] ${warning}`);
258
+ }
259
+
260
+ if (errors.length > 0) {
261
+ console.error(`FAIL ${path.relative(process.cwd(), specDir) || specDir}`);
262
+ for (const error of errors) {
263
+ console.error(`- ${error}`);
264
+ }
265
+ process.exit(1);
266
+ }
267
+
268
+ console.log(`PASS ${path.relative(process.cwd(), specDir) || specDir}`);
269
+ }
270
+
271
+ main();
@@ -24,7 +24,7 @@ Do not attempt a standard text-based review if the project includes Visual Specs
24
24
  3. If NO (Markdown Spec only): Read the spec directly and extract:
25
25
  - requirement bullets
26
26
  - task `Completion Criteria`
27
- - task `Task Test Plan & Verification Evidence` (or legacy `Verification & Evidence`)
27
+ - task `Evidence` (or `Task Test Plan & Verification Evidence` / legacy `Verification & Evidence`)
28
28
  - canonical contracts/invariants from `design.md`
29
29
  Then verify the changed files against those concrete obligations.
30
30
 
@@ -45,7 +45,7 @@ DO NOT write implementation code until an approved spec exists.
45
45
 
46
46
  <DEFINITION-OF-DONE>
47
47
  A task is NOT done because code compiles or a placeholder renders.
48
- A task is done only when the task file's Completion Criteria AND Task Test Plan & Verification Evidence section are satisfied with real execution proof. Existing specs may use legacy `Verification & Evidence`; treat that as the same contract.
48
+ A task is done only when the task file's Completion Criteria AND Evidence section are satisfied with real execution proof. Existing specs may use `Task Test Plan & Verification Evidence` or legacy `Verification & Evidence`; treat those as the same contract.
49
49
  </DEFINITION-OF-DONE>
50
50
 
51
51
  <CONTRACT-FIDELITY>
@@ -92,7 +92,7 @@ flowchart TD
92
92
  - Objective + Constraints
93
93
  - Related Files
94
94
  - Completion Criteria
95
- - Task Test Plan & Verification Evidence (or legacy Verification & Evidence)
95
+ - Evidence (or `Task Test Plan & Verification Evidence` / legacy `Verification & Evidence`)
96
96
  - Exact executable verification commands named in the task
97
97
  - Requirement IDs referenced by the task
98
98
  - Named technologies, frameworks, protocols, and data stores that the task/spec explicitly requires
@@ -142,7 +142,7 @@ The moment you finish coding, DO NOT proceed further. Switch to `references/qual
142
142
  **Mantra:** Scope/spec compliance first, code quality second. All feedback from code-auditor must be addressed thoroughly: Score >= 9.5 & Zero Critical issues.
143
143
 
144
144
  - Passing Step 4 requires ALL of the following:
145
- 1. Automated verification passes, including preflight compile/typecheck/build health and every exact command named in the task's `Task Test Plan & Verification Evidence` section (or legacy `Verification & Evidence`)
145
+ 1. Automated verification passes, including preflight compile/typecheck/build health and every exact command named in the task's `Evidence` section (or `Task Test Plan & Verification Evidence` / legacy `Verification & Evidence`)
146
146
  2. Spec compliance review passes: every scoped requirement and active task criterion is implemented, with no extras and no omissions
147
147
  3. Code quality review passes
148
148
  4. Task evidence passes (artifacts/runtime surfaces/reachability/negative-path checks from the task file are proven)
@@ -161,7 +161,7 @@ The moment you finish coding, DO NOT proceed further. Switch to `references/qual
161
161
  - `spec.json.task_registry[path].status = "done"`
162
162
  - `completed_at` + `last_updated_at`
163
163
  - synchronized top-level `updated_at`
164
- - a human-readable verification receipt inside the task's `Task Test Plan & Verification Evidence` section showing which commands ran, their outcomes, and what proof was observed
164
+ - a human-readable verification receipt inside the task's `Evidence` section showing which commands ran, their outcomes, and what proof was observed
165
165
  - Verification receipts with `PRECHECK_FAIL`, `FAIL`, `UNVERIFIED`, or an explicit note that the implementation intentionally simplified a named contract MUST NOT be synchronized as `done`.
166
166
  - After syncing the active task, run a **Task Closeout Docs Checkpoint**
167
167
  - Task Closeout Docs Checkpoint:
@@ -10,11 +10,12 @@ Green tests are NOT enough. The gate requires four proofs:
10
10
 
11
11
  ## Automation Semantics
12
12
 
13
- - If the task names exact commands in `Task Test Plan & Verification Evidence` (or legacy `Verification & Evidence`), those exact commands are mandatory and must run before any fallback repo defaults.
13
+ - If the task names exact commands in `Evidence` (or `Task Test Plan & Verification Evidence` / legacy `Verification & Evidence`), those exact commands are mandatory and must run before any fallback repo defaults.
14
14
  - Preflight compile/typecheck/build health is mandatory. If compile/typecheck/build fails before tests are meaningful, the gate result is `PRECHECK_FAIL`, not `NO_TESTS`.
15
15
  - `NO_TESTS` is never an automatic PASS.
16
16
  - `NO_TESTS` is acceptable only when the task does **not** require a dedicated test suite command and every other required automated command/evidence item passes.
17
17
  - If the task explicitly requires tests and the repo has no such test command or suite, the task is FAIL or BLOCKED, not done.
18
+ - If the task kind implies a concrete test type, the gate must enforce it: unit tests for logic/regression, component or integration tests for stateful UI or cross-module wiring, E2E/UI-flow checks for complete user workflows, visual/responsive checks for layout/theme work, accessibility checks for interactive UI, and smoke checks for scaffold/config. Performance/security checks are mandatory only when specified by requirement/risk/boundary.
18
19
  - Named frameworks, auth systems, transports, datastores, and runtime boundaries in the task/spec are contractual. Silent substitutions are review failures, not acceptable implementation trade-offs.
19
20
  - Multi-process or multi-runtime flows must prove shared real state or a real boundary contract. Matching in-memory placeholders on both sides do not count as working integration.
20
21
  - Scope fidelity is mandatory: missing scoped behavior, extra unapproved behavior, or task output that exists only as orphaned/unreachable code is a review failure even when build/tests pass.
@@ -29,7 +30,7 @@ Variable: retry_count = 0
29
30
 
30
31
  Before START_LOOP:
31
32
  - Read the active task file(s)
32
- - Extract Related Files, Completion Criteria, Task Test Plan & Verification Evidence (or legacy Verification & Evidence)
33
+ - Extract Related Files, Completion Criteria, Evidence (or Task Test Plan & Verification Evidence / legacy Verification & Evidence)
33
34
  - Extract the exact executable verification commands in declaration order
34
35
  - Extract relevant design contracts/invariants for the touched area
35
36
  - Extract scope_lock, requirement IDs, runtime entrypoints/callers, and reachability proof obligations
@@ -80,6 +80,9 @@ Forbidden generated artifacts:
80
80
  - Do NOT create shorthand task files such as `tasks/task-R0-1.md`, `tasks/task-R1-1.md`, or `tasks/R0-1-<slug>.md`.
81
81
  - The template file name is never the output file name. `templates/spec-state.json` is only the schema source for generated `spec.json`.
82
82
  - Task hydration is session/task-state synchronization only; it MUST NOT be written as a markdown artifact.
83
+ - Before marking a spec ready, run the deterministic validator:
84
+ - `node .claude/scripts/validate-spec-output.cjs specs/<feature>`
85
+ - Any validator failure blocks `ready_for_implementation = true`.
83
86
 
84
87
  ### Writing Style
85
88
  - Concise, prefer bullet lists
@@ -134,6 +137,7 @@ The system MUST NOT execute Steps 1-8. Instead, load `references/review.md` and
134
137
  5. **MUST NOT create implementation code files** (`.ts`, `.js`, `.py`, etc.). The validate workflow produces ONLY markdown spec documents and reports. If a fix requires a new shared module, describe it in the relevant task file instead of creating the actual code file.
135
138
  6. **MUST NOT over-engineer fixes.** Apply YAGNI — if user says "configure later", add an abstraction note to the task, do NOT generate 4 concrete provider implementations.
136
139
  7. **MUST follow auto-decision table exactly.** Count task files + scan for keywords → pick mode. No self-justification to override the table result.
140
+ 8. **MUST run deterministic validator.** Before reporting validation PASS, run `node .claude/scripts/validate-spec-output.cjs specs/<feature>`. If it exits non-zero, validation is FAIL/BLOCKED, `ready_for_implementation` remains `false`, and output MUST NOT suggest `/hapo:develop`.
137
141
 
138
142
  ## Workflow Diagram
139
143
 
@@ -266,7 +270,8 @@ Load: `references/scope-inquiry.md`
266
270
  - Load `rules/tasks-parallel-analysis.md` for parallel markers (default: enabled)
267
271
  - Each task file follows template `templates/task.md`
268
272
  - `Related Files` and test plans must inherit paths, contracts, and test targets from the codebase scout. If exact files/tests cannot be named for an enhancement, run targeted inspect before generating tasks.
269
- - Each task file MUST include `Completion Criteria` and `Task Test Plan & Verification Evidence` sections detailed enough that a downstream quality gate can prove the task is truly done.
273
+ - Each task file MUST include `Completion Criteria` and `Evidence` sections detailed enough that a downstream quality gate can prove the task is truly done. Existing specs may use `Task Test Plan & Verification Evidence` or legacy `Verification & Evidence`.
274
+ - Each task's `Evidence` MUST choose the right proof type for the touched surface: unit for pure logic, component/integration for UI or state wiring, E2E/UI flow for complete user workflows, visual/responsive checks for style/layout work, accessibility checks for interactive UI, smoke checks for scaffold/config, regression checks for bug fixes, and performance/security checks only when the requirement or risk calls for them.
270
275
  - Every task MUST preserve the approved `scope_lock`: implement all scoped acceptance criteria for its requirement, avoid out-of-scope features, and record any intentional deferral as a named later task rather than implicit omission.
271
276
  - For UI/app/runtime features, generate a final integration/reachability task or final section that names the real runtime entrypoint and proves prior task outputs are imported, mounted, registered, invoked, or otherwise reachable.
272
277
  - Build `spec.json.task_registry` alongside `task_files`. For each task file, register at minimum:
@@ -280,11 +285,11 @@ Load: `references/scope-inquiry.md`
280
285
  - `last_updated_at`
281
286
  - Update `spec.json` phase + task metadata
282
287
 
283
- #### Requirement-Driven Task Grouping (MANDATORY)
284
- Tasks MUST be organized **by requirement**, NOT by technical concern. Each requirement from `requirements.md` gets its own cluster of task files.
288
+ #### Requirement-Covered Task Grouping (MANDATORY)
289
+ Tasks MUST be organized by implementation flow while preserving explicit requirement coverage. Foundation work uses `R0`; feature work uses `R1+`.
285
290
 
286
291
  **Naming convention:** `tasks/task-R{N}-{SEQ}-<slug>.md`
287
- - `R{N}` = requirement number (e.g., R1, R2, R3...)
292
+ - `R{N}` = foundation or implementation cluster (R0 foundation, R1+ feature work)
288
293
  - `{SEQ}` = sequential number within that requirement (01, 02, 03...)
289
294
  - `<slug>` = descriptive kebab-case name
290
295
 
@@ -303,11 +308,11 @@ tasks/
303
308
  ```
304
309
 
305
310
  **Splitting rules:**
306
- - Each requirement 1 or more task files (split by sub-scope within the requirement)
307
- - A task file MUST serve exactly 1 primary requirement (cross-cutting references allowed as secondary)
308
- - If a requirement has only 1 natural task, create 1 file (no forced splitting)
309
- - If a requirement has many acceptance criteria spanning different concerns split into multiple task files
310
- - After generating all tasks: verify **every requirement ID** appears as primary in at least one task file — gaps = failure
311
+ - Split by real implementation dependency chain first: model/schema -> service -> API -> UI -> integration.
312
+ - A task file MAY cover multiple requirement IDs when one code change naturally satisfies them.
313
+ - A requirement MAY be covered by multiple task files when it spans layers.
314
+ - Do not create all tasks under `R0`; `R0` is only shared foundation/setup.
315
+ - After generating all tasks: verify **every requirement ID** appears in at least one task file's `## Requirements` section — gaps = failure.
311
316
  - **Legacy Protection:** If the `research.md` identified existing codebase files or tests that will be broken (Blast Radius), you MUST generate explicitly tasked files (e.g., `task-R5-01-update-legacy-tests.md`) to fix those breakages. Do not leave broken tests out of scope.
312
317
 
313
318
  **Dependency ordering:** Tasks within the same requirement are ordered by natural implementation flow. Cross-requirement dependencies use `Dependencies:` field referencing other task file names.
@@ -316,24 +321,17 @@ tasks/
316
321
  Each task file MUST be **self-contained and implementation-ready** — detailed enough for a junior developer or AI coding agent to execute without guessing.
317
322
 
318
323
  **Structure per task file:**
319
- 1. **Objective** — 1-2 sentence objective (WHAT, not HOW)
320
- 2. **Implementation Steps** — Hierarchical breakdown:
321
- - Major steps (`- [ ] 1. ...`) group by cohesion
322
- - Sub-tasks (`- [ ] 1.1 ...`) are specific actionable items (1-3 hours each)
323
- - Detail bullets under each sub-task describe:
324
- - Business logic and behavior to implement
325
- - Edge cases and constraints
326
- - Validation rules
327
- - `_Requirements: X.X_` at the END of every sub-task — **no exceptions**
328
- 3. **Test coverage** — Last major step in every task must cover unit + integration tests
329
- 4. **Related Files** — Table with exact paths, action type, and descriptions
330
- 5. **Completion Criteria** — Observable, testable criteria (checkbox format)
331
- 6. **Risk Assessment** — Table with risk, severity, mitigation
332
- 7. **Runtime reachability** — For any created component, service, route, command, worker, provider, or data loader, state where it is reached from or which named later task wires it
324
+ 1. **Context** — why this task exists, current state, target outcome, relevant exact files.
325
+ 2. **Steps** — concise implementation checklist with business intent and code-level detail.
326
+ 3. **Requirements** list requirement IDs and acceptance criteria covered by this task.
327
+ 4. **Related Files** table with exact paths, action type, and descriptions when paths are known; otherwise run scout first.
328
+ 5. **Completion Criteria** observable, testable criteria.
329
+ 6. **Evidence** — automated command(s), artifact/runtime proof, negative-path proof, and runtime reachability proof.
330
+ 7. **Risk Assessment** table with risk, severity, mitigation.
333
331
 
334
332
  **Parallel markers:** Append `(P)` to tasks that can run concurrently (no data dependency, no shared files, no prerequisite approval from another task). Tasks serving DIFFERENT requirements are often parallelizable.
335
333
 
336
- **FORBIDDEN:** Task files with only 3-5 top-level checkboxes and no sub-task breakdown. This level of detail is INSUFFICIENT for implementation.
334
+ **FORBIDDEN:** Task files with only vague checkboxes and no exact files, requirements, or evidence. Compact is good; vague is invalid.
337
335
 
338
336
  ### Step 8: Task Hydration
339
337
  Load: `references/task-hydration.md`
@@ -353,10 +351,12 @@ Load: `references/review.md` + `rules/design-review.md`
353
351
  - **PROHIBITION:** The system MUST NOT skip Red Team because of a prior code-auditor review. Code review ≠ Spec review.
354
352
  - **PROHIBITION:** The system MUST NOT create `.ts`, `.js`, `.py` or any implementation files during validation. Spec-only outputs.
355
353
  - **Reconciliation Rule:** `validation.status = "completed"` is forbidden until all accepted findings and validation decisions are physically propagated into `requirements.md`, `design.md`, `tasks/*.md`, and `spec.json` where applicable.
354
+ - **Deterministic Gate:** Run `node .claude/scripts/validate-spec-output.cjs specs/<feature>` after all fixes and before final output. Script failure overrides any LLM checklist result and blocks `ready_for_implementation = true`.
356
355
 
357
356
  ### Step 9.5: Finalization Audit (MANDATORY)
358
357
  - Re-scan the `tasks/` directory and rebuild `spec.json.task_files` from the real filesystem (sorted, relative paths)
359
358
  - Rebuild `spec.json.task_registry` from the real filesystem if it is missing, stale, or missing keys. Preserve task status fields when the path still matches.
359
+ - Run `node .claude/scripts/validate-spec-output.cjs specs/<feature>` and treat any non-zero exit as a blocking failure.
360
360
  - FAIL if any task file exists on disk but is missing from `task_files`
361
361
  - FAIL if any path in `task_files` does not exist on disk
362
362
  - FAIL if any task file exists on disk but is missing from `task_registry`
@@ -364,10 +364,10 @@ Load: `references/review.md` + `rules/design-review.md`
364
364
  - FAIL if any task file path does not match `tasks/task-R{N}-{SEQ}-<slug>.md` with two-digit `SEQ` (for example `tasks/task-R0-01-project-scaffolding.md`)
365
365
  - FAIL if a newly generated non-trivial spec lacks a `research.md` Evidence Summary with codebase scout result, external research result or skip rationale, selected decision, rejected alternatives, and downstream task/test implications.
366
366
  - FAIL if any requirement or NFR mapping uses non-numeric labels (`NFR-1`, `SEC-1`, etc.)
367
- - FAIL if a task lacks `Completion Criteria` or `Task Test Plan & Verification Evidence` (legacy `Verification & Evidence` is accepted only for pre-existing task files)
367
+ - FAIL if a task lacks `Completion Criteria` or `Evidence` (existing `Task Test Plan & Verification Evidence` or legacy `Verification & Evidence` is accepted)
368
368
  - FAIL if a task creates runtime-facing artifacts but neither proves reachability from an entrypoint/caller nor names a later integration task responsible for wiring them.
369
369
  - FAIL if a UI/app/runtime spec has multiple user-facing task outputs but no final integration/reachability task or final integration section.
370
- - FAIL if accepted validation decisions exist in reports but are not reflected in the implementation-facing sections of affected artifacts (`Objective`, `Constraints`, `Implementation Steps`, `Completion Criteria`, `Task Test Plan & Verification Evidence`, canonical contracts, or requirements text).
370
+ - FAIL if accepted validation decisions exist in reports but are not reflected in the implementation-facing sections of affected artifacts (`Context`, `Steps`, `Requirements`, `Completion Criteria`, `Evidence`, canonical contracts, or requirements text).
371
371
  - FAIL if the spec scope/provider was switched away from Anthropic/Claude but `requirements.md`, `design.md`, or `tasks/*.md` still contain stale provider-specific strings such as `Claude API`, `Haiku`, or `haiku_reachable`. `research.md` is the only allowed place for historical cost comparisons.
372
372
  - FAIL if privacy/delete-data work lacks a single canonical deletion policy. The design MUST explicitly choose either:
373
373
  1. hard-delete with no re-registration lock, or
@@ -450,7 +450,7 @@ specs/
450
450
  ├── requirements.md # Technical requirements (EARS format)
451
451
  ├── research.md # Research notes
452
452
  ├── design.md # Architectural design
453
- ├── tasks/ # Grouped by requirement (R1, R2, R3...)
453
+ ├── tasks/ # Foundation + implementation clusters (R0, R1, R2...)
454
454
  │ ├── task-R0-01-foundation.md
455
455
  │ ├── task-R1-01-<slug>.md
456
456
  │ ├── task-R1-02-<slug>.md
@@ -505,13 +505,14 @@ Before finalizing any specification, assert all the following:
505
505
  - [ ] **Requirements traceability** matrix present in design.md
506
506
  - [ ] **Canonical Contracts & Invariants** filled for auth/transport/persistence/artifact-sensitive work
507
507
  - [ ] **Every task file** maps to at least 1 valid in-scope requirement ID
508
- - [ ] **Every task file** includes `Task Test Plan & Verification Evidence` with executable or inspectable proof
508
+ - [ ] **Every task file** includes `Evidence` with executable or inspectable proof
509
509
  - [ ] **State Machine Blueprint:** design.md contains Mermaid diagrams for non-trivial flows
510
510
  - [ ] **Dependency graph complete**: no task can start before its blockers are listed
511
511
  - [ ] **Risk matrix filled**: likelihood × impact, with mitigation for High items
512
512
  - [ ] **Test strategy defined**: what gets unit tested, integration tested, e2e validated
513
513
  - [ ] **task_files inventory synced**: no missing or orphaned task references
514
514
  - [ ] **task_registry synced**: every task file has exactly one machine-state entry with valid status + dependencies
515
+ - [ ] **deterministic validator passed**: `node .claude/scripts/validate-spec-output.cjs specs/<feature>`
515
516
  - [ ] **Validation gate consistent**: validation_recommended and validation.status agree with spec risk
516
517
  - [ ] **Provider wording clean**: no stale vendor/provider strings outside allowed research context
517
518
  - [ ] **spec.json fully updated**: phase, current_phase, progress, timestamps, approvals, design_context
@@ -538,6 +539,7 @@ Before finalizing any specification, assert all the following:
538
539
  - `design.md` — Design document template
539
540
  - `research.md` — Research log template
540
541
  - `task.md` — Template for individual task file
542
+ - `.claude/scripts/validate-spec-output.cjs` — Deterministic validator for generated spec artifacts
541
543
 
542
544
  ### Rules (`rules/`)
543
545
  - `ears-format.md` — EARS requirements standard
@@ -10,6 +10,24 @@ Review a spec before implementation. The system auto-decides the review depth ba
10
10
  2. If not → check active spec (spec with `in_progress` status; accept legacy `in-progress` when reading existing files)
11
11
  3. If nothing found → ask user to specify path
12
12
 
13
+ ## Deterministic Validator Gate (MANDATORY)
14
+
15
+ This gate is the hard source of truth for `hapo:specs --validate`. LLM red-team tables and markdown validation reports are advisory until this script passes.
16
+
17
+ After resolving the spec path, run:
18
+
19
+ ```bash
20
+ node .claude/scripts/validate-spec-output.cjs specs/<feature>
21
+ ```
22
+
23
+ Required behavior:
24
+ 1. Run the validator once before the final PASS decision. If it fails, copy the exact failing categories into the validation findings/blockers and fix the physical spec artifacts.
25
+ 2. Red Team and Validate may continue while fixing issues, but they cannot approve the spec while validator errors remain.
26
+ 3. Run the validator again after every accepted Red Team / Validate fix set and before any final verdict.
27
+ 4. The final report MUST include the validator command and the final PASS/FAIL result.
28
+ 5. If the validator exits non-zero, final verdict is **FAIL / BLOCKED**, `validation.status` MUST NOT become `completed`, `ready_for_implementation` MUST remain `false`, and the output MUST NOT suggest `/hapo:develop`.
29
+ 6. A markdown checklist, manual QA table, or "all required sections present" claim cannot override validator failure.
30
+
13
31
  ## Auto-Decision: When to Red Team vs Validate
14
32
 
15
33
  The system evaluates the spec and picks the appropriate review mode:
@@ -42,9 +60,10 @@ These rules override any self-reasoning or optimization the system may attempt:
42
60
  4. **Apply YAGNI to fixes.** When user says "configure later" or "decide later", add a single note to the task file. Do NOT generate multiple concrete implementations (e.g., 4 provider files when user only asked for abstraction).
43
61
  5. **No false completion.** You MUST NOT set `validation.status = "completed"` or `ready_for_implementation = true` until a reconciliation audit proves the accepted findings and validation decisions are reflected in the physical spec artifacts.
44
62
  6. **Provider drift is a real defect.** If the scope changed away from Claude/Anthropic, stale strings like `Claude API`, `Haiku`, or `haiku_reachable` in `requirements.md`, `design.md`, or `tasks/*.md` are validation failures. `research.md` may mention them only as historical comparison.
45
- 7. **Implementation-facing propagation is mandatory.** A decision that affects implementation is NOT considered applied if it only appears in `Risk Assessment`, `validate-log.md`, or `red-team-report.md`. It must update at least one of: `requirements.md`, `Canonical Contracts & Invariants`, `Objective`, `Constraints`, `Implementation Steps`, `Completion Criteria`, or `Task Test Plan & Verification Evidence`.
63
+ 7. **Implementation-facing propagation is mandatory.** A decision that affects implementation is NOT considered applied if it only appears in `Risk Assessment`, `validate-log.md`, or `red-team-report.md`. It must update at least one of: `requirements.md`, `Canonical Contracts & Invariants`, `Context`, `Steps`, `Requirements`, `Completion Criteria`, or `Evidence`.
46
64
  8. **CafeKit command dialect only.** Validation output MUST use `/hapo:develop <feature>` as the implementation handoff. Never mention `/sdd:execute-spec`, `/sdd:*`, `/work`, `/code`, `/specs <feature> --approve`, `/hapo:specs <feature> --approve`, or non-CafeKit aliases.
47
65
  9. **CafeKit task filename convention only.** Task files MUST use `tasks/task-R{N}-{SEQ}-<slug>.md` with two-digit `SEQ` (for example `tasks/task-R0-01-project-scaffolding.md`). Files like `tasks/R0-1-project-scaffolding.md` are legacy/foreign format; rename them and update `spec.json.task_files`, `spec.json.task_registry`, and dependency references before passing validation.
66
+ 10. **Deterministic validator is mandatory.** The final validation verdict MUST be derived from `node .claude/scripts/validate-spec-output.cjs specs/<feature>`. If that command fails, report FAIL/BLOCKED and list the script output. Do NOT report PASS.
48
67
 
49
68
  ---
50
69
 
@@ -226,13 +245,15 @@ Save to `reports/validate-log.md`:
226
245
  Before declaring validation complete:
227
246
  1. Re-read `spec.json`, `requirements.md`, `design.md`, and all `tasks/task-*.md`
228
247
  2. Verify every accepted red-team finding and every validation action item is reflected in the correct physical file(s)
229
- 3. Fail the audit if:
248
+ 3. Run `node .claude/scripts/validate-spec-output.cjs specs/<feature>` and keep the raw result visible
249
+ 4. Fail the audit if:
230
250
  - a report says "applied" but the file still contains the old text
231
251
  - stale provider strings remain after a provider change
232
252
  - delete-data/privacy artifacts mix multiple canonical policies
233
253
  - any task path fails the CafeKit `tasks/task-R{N}-{SEQ}-<slug>.md` naming convention
234
254
  - `spec.json.updated_at`, `timestamps.review_done`, or `timestamps.validation_done` do not reflect the final reviewed state
235
- 4. Only after the audit passes may you:
255
+ - deterministic validator exits non-zero
256
+ 5. Only after the audit passes may you:
236
257
  - set `spec.json.validation.status = "completed"`
237
258
  - set `spec.json.timestamps.validation_done`
238
259
  - set `spec.json.timestamps.review_done`
@@ -241,7 +262,7 @@ Before declaring validation complete:
241
262
  #### Step 8: Final Status Write-Back
242
263
  - Update `spec.json.updated_at` to the reconciliation time
243
264
  - Ensure `red-team-report.md` and `validate-log.md` do not contradict `spec.json`
244
- - If reconciliation fails, keep `validation.status` as `not-run` or `in_progress` and list blockers explicitly
265
+ - If reconciliation or deterministic validation fails, keep `validation.status` as `not-run` or `in_progress`, keep `ready_for_implementation = false`, list blockers explicitly, and do not provide an implementation handoff.
245
266
 
246
267
  ---
247
268
 
@@ -256,6 +277,7 @@ Red Team: {N} findings ({A} accepted, {R} rejected)
256
277
  Validate: {Q} questions asked, {D} decisions confirmed
257
278
 
258
279
  Files modified: {list}
280
+ Deterministic validator: PASS via `node .claude/scripts/validate-spec-output.cjs specs/<feature>`
259
281
 
260
282
  📌 Next step: /hapo:develop <feature> (ONLY if reconciliation audit passed)
261
283
  ```
@@ -40,6 +40,8 @@ Detail bullets must include:
40
40
  **End with integration tasks** to wire everything together.
41
41
  - For UI/app/runtime workflows, the final integration task MUST name the real entrypoint (`App.tsx`, route, command, worker, extension manifest, API route, etc.) and verify every user-visible surface from the requirements is reachable from that entrypoint.
42
42
  - Components, services, routes, commands, workers, providers, and data loaders created by earlier tasks MUST be consumed by a later integration task or explicitly marked as internal support in `design.md`; orphaned deliverables are invalid.
43
+ - Prefer compact, implementation-ready task prose over large boilerplate. The golden shape is: `Context` -> `Steps` -> `Requirements` -> `Related Files` -> `Completion Criteria` -> `Evidence` -> `Risk Assessment`.
44
+ - A compact task is valid when it names exact files/contracts, maps requirements, and gives executable evidence. Do not expand it into nested filler just to satisfy a template.
43
45
 
44
46
  ### 3. Flexible Task Sizing
45
47
 
@@ -140,11 +142,11 @@ Every task file MUST contain the Risk Assessment table, even if no risks are ide
140
142
  - Never mark implementation work or integration-critical verification as optional—reserve `*` for auxiliary/deferrable test coverage that can be revisited post-MVP.
141
143
  - Never mark auth, permissions, privacy, data deletion, migration, schema, or contract verification work as optional.
142
144
 
143
- ### Mandatory Task Test Plan & Verification Evidence
145
+ ### Mandatory Evidence Section
144
146
 
145
- Every new task file MUST include a `## Task Test Plan & Verification Evidence` section. Existing specs may still use the legacy `## Verification & Evidence` heading; readers and sync tools must support both.
147
+ Every new task file MUST include a `## Evidence` section. Existing specs may still use the v0.8 heading `## Task Test Plan & Verification Evidence` or the legacy `## Verification & Evidence` heading; readers and sync tools must support all three.
146
148
 
147
- That section is the task-level test plan and MUST contain:
149
+ That section is the task-level test plan and proof checklist. It MUST contain:
148
150
  1. **Automated proof** — exact command(s) for typecheck, tests, build, or explicit `N/A`
149
151
  2. **Artifact/runtime proof** — exact files, routes, UI surfaces, generated outputs, or persisted state to inspect
150
152
  3. **Contract/negative-path proof** — at least one contract-preserving check for unauthorized, invalid, missing-permission, rollback, or failure-path behavior when relevant
@@ -159,14 +161,32 @@ Rules:
159
161
  - For provider-sensitive work, use provider-neutral wording unless the scope lock explicitly names a vendor.
160
162
  - For delete-data/privacy work, task text MUST match the single deletion/retention policy chosen in `design.md`. Mixed policies are invalid.
161
163
 
164
+ ### Test Type Selection
165
+
166
+ Choose verification by task risk and touched surface. Do not force every task to include every test type, but do not omit the test type that proves the task's actual behavior.
167
+
168
+ | Task kind | Required / expected proof |
169
+ |---|---|
170
+ | Pure logic, data transform, parser, sorting, filtering, validator | Unit test plus negative-path case |
171
+ | Stateful UI component or user interaction | Component test or integration test; add runtime UI check if the component must be mounted |
172
+ | Cross-module state, API, persistence, provider, or service boundary | Integration test that proves real contract/state handoff |
173
+ | User-facing workflow across screens/components | E2E or UI flow verification after the vertical slice exists |
174
+ | Layout, theme, responsive, visual style | Runtime/visual viewport checks; screenshot proof when practical |
175
+ | Keyboard/focus/form/modal/table interaction | Accessibility check for focus, labels, roles, and keyboard behavior |
176
+ | Scaffolding/config/release plumbing | Smoke checks: typecheck/build/test/dev-server or equivalent |
177
+ | Bug fix/regression | Regression test reproducing the old failure, then passing |
178
+ | Performance/security-sensitive requirement or touched surface | Performance/security check only when specified by requirements, design risk, or changed boundary |
179
+
180
+ `hapo:specs` writes the expected proof into each task. `hapo:develop` executes the task-local proof before marking the task done. `hapo:test` runs the broader system pass after implementation or for a requested feature scope.
181
+
162
182
  ## Task Hierarchy Rules
163
183
 
164
184
  ### Maximum 2 Levels
165
- - **Level 1**: Major tasks (1, 2, 3, 4...)
166
- - **Level 2**: Sub-tasks (1.1, 1.2, 2.1, 2.2...)
167
- - **No deeper nesting** (no 1.1.1)
168
- - If a major task would contain only a single actionable item, collapse the structure and promote the sub-task to the major level (e.g., replace `1.1` with `1.`).
169
- - When a major task exists purely as a container, keep the checkbox description concise and avoid duplicating detailed bullets—reserve specifics for its sub-tasks.
185
+ - Prefer one actionable checkbox per real implementation step.
186
+ - Use sub-tasks (`1.1`, `1.2`) only when a step has multiple separately verifiable units.
187
+ - **No deeper nesting** (no `1.1.1`).
188
+ - If a major task would contain only a single actionable item, collapse the structure and promote the sub-task to the major level.
189
+ - When a major task exists purely as a container, keep the checkbox description concise and avoid duplicating detailed bullets.
170
190
 
171
191
  ### Sequential Numbering
172
192
  - Major tasks MUST increment: 1, 2, 3, 4, 5...
@@ -216,6 +236,6 @@ Rules:
216
236
  - If gaps found: Return to requirements or design phase
217
237
  - No requirement should be left without corresponding tasks
218
238
 
219
- Use `N.M`-style numeric requirement IDs where `N` is the top-level requirement number from requirements.md (for example, Requirement 1 → 1.1, 1.2; Requirement 2 2.1, 2.2), and `M` is a local index within that requirement group.
239
+ Use the requirement ID style already present in `requirements.md` (`R1`, `REQ-01`, or `N.M`). The task filename cluster (`task-R1-01-*`) does not have to mirror every requirement ID exactly, but every requirement MUST be listed in at least one task's `## Requirements` section.
220
240
 
221
241
  Document any intentionally deferred requirements with rationale.
@@ -7,9 +7,11 @@
7
7
  **Dependencies:** {{DEPENDENCIES}}
8
8
  **Spec:** specs/{{FEATURE_NAME}}/
9
9
 
10
- ## Objective
10
+ ## Context
11
11
 
12
- {{Brief 1-2 sentence objective detailing WHAT to accomplish, not HOW. Must relate directly to requirement R{{REQ_NUMBER}}.}}
12
+ - **Why**: {{Business/user reason this task exists}}
13
+ - **Current state**: {{Relevant existing files, route, model, API, screen, or "greenfield"}}
14
+ - **Target outcome**: {{Observable behavior after this task is done}}
13
15
 
14
16
  ## Constraints
15
17
 
@@ -18,33 +20,26 @@
18
20
  - **MUST NOT**: {{Explicitly forbidden action or approach}}
19
21
  - **SCOPE**: Implement only the behavior mapped to R{{REQ_NUMBER}} and the approved `scope_lock`; do not add out-of-scope features or leave scoped acceptance criteria unwired.
20
22
 
21
- ## Implementation Steps
22
-
23
- - [ ] 1. {{MAJOR_STEP_1}}
24
- - [ ] 1.1 {{Sub-task describing specific behavior/action}}
25
- - {{Detail: business logic, behavior, target validation}}
26
- - {{Detail: edge case or constraint}}
27
- - _Requirements: {{REQ_NUMBER}}.{{X}}_
28
- - [ ] 1.2 {{Next sub-task}}
29
- - {{Detail items}}
30
- - _Requirements: {{REQ_NUMBER}}.{{Y}}_
31
-
32
- - [ ] 2. {{MAJOR_STEP_2}}
33
- - [ ] 2.1 {{Sub-task}}
34
- - {{Details}}
35
- - _Requirements: {{REQ_NUMBER}}.{{Z}}_
36
- - [ ] 2.2 {{Sub-task}}
37
- - {{Details}}
38
- - _Requirements: {{REQ_NUMBER}}.{{W}}_
39
-
40
- - [ ] 3. Test coverage for R{{REQ_NUMBER}}
41
- - [ ] 3.1 Unit tests
42
- - {{Test case 1: target behavior to verify}}
43
- - {{Test case 2: edge case / error case}}
44
- - _Requirements: {{REQ_NUMBER}}_
45
- - [ ]* 3.2 Integration tests (optional for MVP)
46
- - {{Describe end-to-end flow to verify}}
47
- - _Requirements: {{REQ_NUMBER}}_
23
+ ## Steps
24
+
25
+ - [ ] 1. {{Actionable step with exact file/path/contract}}
26
+ - {{Business intent: what user/system behavior this enables}}
27
+ - {{Code detail: schema/API/component/function/route and validation rules}}
28
+ - _Requirements: {{REQ_NUMBER}}.{{X}}_
29
+
30
+ - [ ] 2. {{Next actionable step}}
31
+ - {{Business intent}}
32
+ - {{Code detail, edge case, or integration contract}}
33
+ - _Requirements: {{REQ_NUMBER}}.{{Y}}_
34
+
35
+ - [ ] 3. Verification implementation
36
+ - {{Unit/integration/e2e test or explicit manual verification hook}}
37
+ - _Requirements: {{REQ_NUMBER}}_
38
+
39
+ ## Requirements
40
+
41
+ - {{REQ_NUMBER}}.{{X}} — {{Acceptance criterion or requirement covered}}
42
+ - {{REQ_NUMBER}}.{{Y}} {{Acceptance criterion or requirement covered}}
48
43
 
49
44
  ## Related Files
50
45
 
@@ -60,11 +55,21 @@
60
55
  - [ ] {{Criteria 3 — maps directly to acceptance criteria from requirements.md and can be proven below}}
61
56
  - [ ] {{Criteria 4 — no orphaned component/service/route/command; created runtime-facing work is reachable from the declared entrypoint or explicitly deferred to a named integration task}}
62
57
 
63
- ## Task Test Plan & Verification Evidence
58
+ ## Evidence
59
+
60
+ This section is both the task-level test plan and the proof checklist. Keep it short, exact, and executable.
61
+ Select the proof by task risk; do not run every test type for every task.
64
62
 
65
- This section is the task-level test plan. It names the exact commands, observable runtime/artifact proof, and negative-path checks required before this task can be marked done.
63
+ - Logic/data/validator task: include unit tests.
64
+ - Stateful UI/component task: include component or integration tests.
65
+ - Cross-module/API/state flow task: include integration tests.
66
+ - User-facing end-to-end workflow: include E2E/UI flow verification.
67
+ - Layout/theme/responsive task: include visual/runtime viewport checks.
68
+ - Interactive UI task: include accessibility checks when keyboard, focus, labels, or ARIA can regress.
69
+ - Scaffold/release task: include smoke build/test/dev-server checks.
70
+ - Performance/security checks are required only when the requirement, risk, or touched surface calls for them.
66
71
 
67
- - [ ] Automated verification
72
+ - [ ] Automated verification (unit/component/integration/E2E as applicable)
68
73
  - Command(s): `{{TYPECHECK / TEST / BUILD COMMANDS OR N/A}}`
69
74
  - Expected proof: {{What output, exit code, or report proves success}}
70
75
  - [ ] Artifact / runtime verification
@@ -89,4 +94,4 @@ This section is the task-level test plan. It names the exact commands, observabl
89
94
  > **Parallel marker**: Append `(P)` to the title if this task can run concurrently with another (usually when serving different requirements).
90
95
  > **Test note**: If a test coverage sub-task can be deferred post-MVP, mark it with `- [ ]*`.
91
96
  > **Requirement mapping**: Every sub-task MUST end with `_Requirements: X.X_`. No mapping = invalid task file.
92
- > **Verification rule**: No `## Task Test Plan & Verification Evidence` section = invalid task file. Existing specs may use legacy `## Verification & Evidence`; agents must support both headings.
97
+ > **Evidence rule**: No `## Evidence` section = invalid task file. Existing specs may use `## Task Test Plan & Verification Evidence` or legacy `## Verification & Evidence`; agents must support all three headings.
@@ -34,8 +34,8 @@ Scans the `spec.json` against all physical `task-R*.md` files to detect mismatch
34
34
 
35
35
  1. **Precision Edits:** Never overwrite the entire `spec.json` string blindly. Update only the required keys, while keeping JSON valid.
36
36
  2. **Machine + Human Sync:** Every task status update MUST modify both `spec.json.task_registry[...]` and the matching markdown task file header/status section.
37
- 3. **Markdown Integrity:** When marking a task `done`, only then turn `[ ]` into `[x]` inside `## Implementation Steps` and relevant `Completion Criteria` / `Task Test Plan & Verification Evidence` checkboxes that have actual proof. Legacy `Verification & Evidence` sections are supported.
38
- 4. **Verification Receipt Rule:** `done` is illegal without a human-readable verification receipt already present in `## Task Test Plan & Verification Evidence` or legacy `## Verification & Evidence` (commands executed, artifact/runtime proof, or equivalent concrete evidence). If proof is missing, keep the task `in_progress` or `blocked`.
37
+ 3. **Markdown Integrity:** When marking a task `done`, only then turn `[ ]` into `[x]` inside `## Steps` / `## Implementation Steps` and relevant `Completion Criteria` / `Evidence` checkboxes that have actual proof. `Task Test Plan & Verification Evidence` and legacy `Verification & Evidence` sections are supported.
38
+ 4. **Verification Receipt Rule:** `done` is illegal without a human-readable verification receipt already present in `## Evidence`, `## Task Test Plan & Verification Evidence`, or legacy `## Verification & Evidence` (commands executed, artifact/runtime proof, or equivalent concrete evidence). If proof is missing, keep the task `in_progress` or `blocked`.
39
39
  5. **Task Docs Hook:** Every time `hapo:sync` marks a task as `done`, it must flag that a task-level docs checkpoint is now due for that verified task.
40
40
  6. **Phase Prompt Rule:** When `hapo:sync` marks the final pending task in the whole feature as `done`, it should automatically prompt the user if they'd like to advance the phase, but only after the docs checkpoint for that last completed task has been considered.
41
41
 
@@ -15,7 +15,7 @@ When requested to update a phase or change task configuration, `spec.json` must
15
15
  - full relative path like `tasks/task-R0-02-extension-shell.md`
16
16
  * **Status Update:** If a task changes to `blocked`, the matching `task_registry[path].status` must become `"blocked"`, `task_registry[path].blocker` must record the reason, and `spec.json.status` / `spec.json.blocker` must reflect the top-level block if work is globally blocked.
17
17
  * **Timestamp Rule:** Update `task_registry[path].started_at`, `completed_at`, and `last_updated_at` consistently with the new state. Also refresh `spec.json.updated_at`.
18
- * **Done-State Rule:** Never set `task_registry[path].status = "done"` unless the matching markdown task file already contains a verification receipt in `## Task Test Plan & Verification Evidence` or legacy `## Verification & Evidence`, or the caller explicitly provides proof that can be written there first.
18
+ * **Done-State Rule:** Never set `task_registry[path].status = "done"` unless the matching markdown task file already contains a verification receipt in `## Evidence`, `## Task Test Plan & Verification Evidence`, or legacy `## Verification & Evidence`, or the caller explicitly provides proof that can be written there first.
19
19
  * **Receipt Integrity Rule:** A valid verification receipt must include the exact commands run, their outcomes, and artifact/runtime proof. Receipts containing `PRECHECK_FAIL`, `FAIL`, `UNVERIFIED`, or explicit "placeholder / simplified for MVP / production later" contract deviations are not eligible for `done`.
20
20
  * **Contract Fidelity Rule:** If the task file notes or evidence show that a named framework/auth/runtime choice from the spec was silently replaced, sync MUST refuse `done` until the spec is amended or the implementation is corrected.
21
21
  * **Task Docs Rule:** After a task is moved to `done`, emit a short alert that a task-level docs checkpoint is due for this verified task.
@@ -27,12 +27,12 @@ The structure of `tasks/task.md` relies heavily on exact keyword markers. Follow
27
27
  ### A. Completing a Task
28
28
  When `/hapo:sync <feature> <task-id> done`:
29
29
  1. Find: `**Status:** pending` (or `in_progress` / `blocked`).
30
- 2. Inspect `## Task Test Plan & Verification Evidence` first. If the task uses legacy `## Verification & Evidence`, inspect that section instead. If it has no explicit proof lines (commands run, artifact proof, runtime proof, or blockers cleared), STOP and refuse to mark the task done.
30
+ 2. Inspect `## Evidence` first. If the task uses `## Task Test Plan & Verification Evidence` or legacy `## Verification & Evidence`, inspect that section instead. If it has no explicit proof lines (commands run, artifact proof, runtime proof, or blockers cleared), STOP and refuse to mark the task done.
31
31
  3. Refuse completion if the receipt contains any non-passing marker such as `PRECHECK_FAIL`, `FAIL`, `UNVERIFIED`, or an explicit note that the implementation substituted a named contract with a placeholder/custom simplification.
32
32
  4. Replace with: `**Status:** done`.
33
- 5. Locate block: `## Implementation Steps`.
33
+ 5. Locate block: `## Steps` or `## Implementation Steps`.
34
34
  6. Convert `- [ ]` into `- [x]` strictly within that section.
35
- 7. Update relevant checkboxes in `## Completion Criteria` and `## Task Test Plan & Verification Evidence` only when the caller provides or the file already contains real proof. For legacy task files, update `## Verification & Evidence` instead.
35
+ 7. Update relevant checkboxes in `## Completion Criteria` and `## Evidence` only when the caller provides or the file already contains real proof. For v0.8 or legacy task files, update `## Task Test Plan & Verification Evidence` or `## Verification & Evidence` instead.
36
36
  8. Surface a note such as: `Docs checkpoint due: task Rn-mm just completed`.
37
37
 
38
38
  ### B. Blocking a Task
@@ -59,7 +59,7 @@ When `/hapo:sync audit <feature>` is activated:
59
59
  - Missing disk file referenced in registry → remove or flag it
60
60
  - Markdown says `done` but registry not done → registry wins only if evidence already exists; otherwise downgrade markdown or flag conflict
61
61
  - Registry says `done` but markdown still pending → update markdown only if evidence exists
62
- - Either side says `done` but `## Task Test Plan & Verification Evidence` / legacy `## Verification & Evidence` has no concrete proof → downgrade to `in_progress` or flag conflict instead of preserving fake completion
62
+ - Either side says `done` but `## Evidence` / `## Task Test Plan & Verification Evidence` / legacy `## Verification & Evidence` has no concrete proof → downgrade to `in_progress` or flag conflict instead of preserving fake completion
63
63
  - Either side says `done` but the receipt contains `PRECHECK_FAIL`, `FAIL`, `UNVERIFIED`, or explicit contract-substitution notes → downgrade to `in_progress` or flag conflict
64
64
  5. **Correction Alert:** Output a brief markdown alert detailing mismatches fixed and any unresolved conflicts requiring manual review.
65
65
  6. **Task Docs Alert:** If audit reveals tasks newly marked `done`, include whether task-level docs sync appears still due or already accounted for in the current run summary.
@@ -35,7 +35,7 @@ If tests fail, list every failure explicitly — do not summarize failures away.
35
35
 
36
36
  <SCOPE-GATE>
37
37
  When a feature name or `specs/<feature>` path is supplied, testing is spec-aware.
38
- Load `spec.json`, `requirements.md`, `design.md`, active/recent task files, and Task Test Plan evidence.
38
+ Load `spec.json`, `requirements.md`, `design.md`, active/recent task files, and task `Evidence` / test-plan proof.
39
39
  The verdict MUST compare executed/reachable behavior against `scope_lock`, requirements, design contracts, task Completion Criteria, and runtime reachability obligations.
40
40
  Build/typecheck success without scoped runtime proof is not PASS.
41
41
  </SCOPE-GATE>
@@ -85,6 +85,15 @@ If the argument resolves to `specs/<feature>` or a feature directory under `spec
85
85
  4. Treat 0 executed tests as `NO_TESTS`, even if the command exits 0
86
86
  5. In Spec-Aware Mode, inspect runtime reachability from declared entrypoints/callers and fail if scoped surfaces are missing or orphaned
87
87
 
88
+ **Spec-aware test type escalation:**
89
+ - Unit tests are mandatory when task evidence covers pure logic, transforms, validators, sorting/filtering, or regressions.
90
+ - Component/integration tests are expected when task evidence covers stateful UI, context/store wiring, API/service boundaries, or persistence.
91
+ - E2E/UI flow tests are expected once a complete user-facing workflow exists, not for isolated foundation tasks.
92
+ - Visual/responsive checks are expected for layout, theme, dashboard, and style tasks.
93
+ - Accessibility checks are expected for interactive UI surfaces where focus, roles, labels, keyboard navigation, or ARIA can regress.
94
+ - Smoke checks are enough for scaffold/config tasks unless the task requires deeper proof.
95
+ - Performance/security checks are only mandatory when the requirement, design risk, or touched runtime boundary calls for them.
96
+
88
97
  **UI verification (`--ui` / `--ui-auth` / `--ui-flow`):**
89
98
  Execute multi-page discovery, then spawn **Parallel UI Subagents** (test-runner instances) to handle Smoke, Core-Vitals, Accessibility, SEO, Security, and User Flows simultaneously.
90
99
  See `references/execution-strategy.md` Phase C for full phase breakdown.