@haposoft/cafekit 0.7.23 → 0.7.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/README.md +81 -862
  2. package/bin/install.js +4 -3
  3. package/package.json +2 -3
  4. package/src/claude/agents/code-auditor.md +25 -1
  5. package/src/claude/agents/spec-maker.md +17 -2
  6. package/src/claude/agents/test-runner.md +22 -3
  7. package/src/claude/hooks/spec-state.cjs +4 -4
  8. package/src/claude/migration-manifest.json +1 -1
  9. package/src/claude/rules/state-sync.md +7 -5
  10. package/src/claude/skills/code-review/references/spec-compliance-review.md +8 -1
  11. package/src/claude/skills/develop/SKILL.md +25 -4
  12. package/src/claude/skills/develop/references/quality-gate.md +23 -13
  13. package/src/claude/skills/generate-graph/LICENSE +21 -0
  14. package/src/claude/skills/generate-graph/README.md +523 -0
  15. package/src/claude/skills/generate-graph/SKILL.md +427 -0
  16. package/src/claude/skills/generate-graph/agentloop-core.svg +101 -0
  17. package/src/claude/skills/generate-graph/agents/openai.yaml +4 -0
  18. package/src/claude/skills/generate-graph/assets/samples/sample-style1-flat.png +0 -0
  19. package/src/claude/skills/generate-graph/assets/samples/sample-style2-dark.png +0 -0
  20. package/src/claude/skills/generate-graph/assets/samples/sample-style3-blueprint.png +0 -0
  21. package/src/claude/skills/generate-graph/assets/samples/sample-style4-notion.png +0 -0
  22. package/src/claude/skills/generate-graph/assets/samples/sample-style5-glass.png +0 -0
  23. package/src/claude/skills/generate-graph/assets/samples/sample-style6-claude.png +0 -0
  24. package/src/claude/skills/generate-graph/assets/samples/sample-style7-openai.png +0 -0
  25. package/src/claude/skills/generate-graph/fixtures/agent-memory-types-style4.json +181 -0
  26. package/src/claude/skills/generate-graph/fixtures/api-flow-style7.json +40 -0
  27. package/src/claude/skills/generate-graph/fixtures/mem0-style1.json +297 -0
  28. package/src/claude/skills/generate-graph/fixtures/microservices-style3.json +64 -0
  29. package/src/claude/skills/generate-graph/fixtures/multi-agent-style5.json +45 -0
  30. package/src/claude/skills/generate-graph/fixtures/system-architecture-style6.json +48 -0
  31. package/src/claude/skills/generate-graph/fixtures/tool-call-style2.json +182 -0
  32. package/src/claude/skills/generate-graph/package.json +42 -0
  33. package/src/claude/skills/generate-graph/references/icons.md +281 -0
  34. package/src/claude/skills/generate-graph/references/style-1-flat-icon.md +108 -0
  35. package/src/claude/skills/generate-graph/references/style-2-dark-terminal.md +107 -0
  36. package/src/claude/skills/generate-graph/references/style-3-blueprint.md +113 -0
  37. package/src/claude/skills/generate-graph/references/style-4-notion-clean.md +94 -0
  38. package/src/claude/skills/generate-graph/references/style-5-glassmorphism.md +125 -0
  39. package/src/claude/skills/generate-graph/references/style-6-claude-official.md +209 -0
  40. package/src/claude/skills/generate-graph/references/style-7-openai.md +215 -0
  41. package/src/claude/skills/generate-graph/references/style-diagram-matrix.md +135 -0
  42. package/src/claude/skills/generate-graph/references/svg-layout-best-practices.md +100 -0
  43. package/src/claude/skills/generate-graph/scripts/generate-diagram.sh +157 -0
  44. package/src/claude/skills/generate-graph/scripts/generate-from-template.py +1556 -0
  45. package/src/claude/skills/generate-graph/scripts/test-all-styles.sh +135 -0
  46. package/src/claude/skills/generate-graph/scripts/validate-svg.sh +292 -0
  47. package/src/claude/skills/generate-graph/templates/agent-architecture.svg +28 -0
  48. package/src/claude/skills/generate-graph/templates/architecture.svg +23 -0
  49. package/src/claude/skills/generate-graph/templates/comparison-matrix.svg +14 -0
  50. package/src/claude/skills/generate-graph/templates/data-flow.svg +28 -0
  51. package/src/claude/skills/generate-graph/templates/er-diagram.svg +21 -0
  52. package/src/claude/skills/generate-graph/templates/flowchart.svg +21 -0
  53. package/src/claude/skills/generate-graph/templates/sequence.svg +20 -0
  54. package/src/claude/skills/generate-graph/templates/state-machine.svg +20 -0
  55. package/src/claude/skills/generate-graph/templates/timeline.svg +19 -0
  56. package/src/claude/skills/generate-graph/templates/use-case.svg +21 -0
  57. package/src/claude/skills/specs/SKILL.md +35 -5
  58. package/src/claude/skills/specs/references/review.md +1 -1
  59. package/src/claude/skills/specs/rules/tasks-generation.md +17 -0
  60. package/src/claude/skills/specs/templates/design.md +13 -0
  61. package/src/claude/skills/specs/templates/init.json +4 -1
  62. package/src/claude/skills/specs/templates/requirements.md +21 -8
  63. package/src/claude/skills/specs/templates/task.md +16 -3
package/bin/install.js CHANGED
@@ -430,7 +430,7 @@ function copyPlatformFiles(platformKey, results, options = {}) {
430
430
  'requirements.md',
431
431
  'design.md',
432
432
  'research.md',
433
- 'tasks.md'
433
+ 'task.md'
434
434
  ];
435
435
 
436
436
  specTemplates.forEach((fileName) => {
@@ -472,7 +472,7 @@ function copyPlatformFiles(platformKey, results, options = {}) {
472
472
  requiredSkills = CLAUDE_MIGRATION_MANIFEST?.skills?.required || [];
473
473
  } else if (platformKey === 'antigravity') {
474
474
  // Antigravity also needs shared investigation and impact-analysis skills
475
- requiredSkills = ['impact-analysis', 'debug', 'ai-multimodal'];
475
+ requiredSkills = ['impact-analysis', 'debug', 'ai-multimodal', 'generate-graph'];
476
476
  }
477
477
 
478
478
  requiredSkills
@@ -1089,7 +1089,8 @@ async function main() {
1089
1089
  }
1090
1090
  console.log();
1091
1091
  console.log('Next steps:');
1092
- console.log(' 1. Start your AI editor (Claude Code or Antigravity)');
1092
+ const nextEditorLabel = platforms.length === 1 ? PLATFORMS[platforms[0]].name : 'your AI editor';
1093
+ console.log(` 1. Start ${nextEditorLabel}`);
1093
1094
 
1094
1095
  // Show platform-specific hints
1095
1096
  for (const platformKey of platforms) {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@haposoft/cafekit",
3
- "version": "0.7.23",
4
- "description": "Spec-Driven Development workflow for AI coding assistants. Supports Claude Code and Antigravity with spec-first workflows plus Claude Code hapo: skills.",
3
+ "version": "0.7.24",
4
+ "description": "Claude Code-first spec-driven workflow for AI coding assistants. Bundles CafeKit hapo: skills, runtime hooks, agents, and installer scaffolding.",
5
5
  "author": "Haposoft <nghialt@haposoft.com>",
6
6
  "license": "MIT",
7
7
  "private": false,
@@ -28,7 +28,6 @@
28
28
  "spec-driven",
29
29
  "workflow",
30
30
  "claude-code",
31
- "antigravity",
32
31
  "ai-coding",
33
32
  "specification",
34
33
  "requirements",
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: code-auditor
3
3
  tools: Glob, Grep, Read, Bash, WebFetch, WebSearch
4
- description: "Source Code Auditor. Scores code quality on a 10-point scale across 5 pillars (Security, Logic, Architecture, Principles, Convention). Returns a verdict: PASS, NEEDS FIXES, or USER INTERVENTION."
4
+ description: "Source Code Auditor. Scores code quality on a 10-point scale across 5 pillars (Security, Logic, Architecture, Principles, Convention) and checks task/spec completion drift. Returns a verdict: PASS, NEEDS FIXES, or USER INTERVENTION."
5
5
  ---
6
6
 
7
7
  # Code Auditor — Source Code Inspector
@@ -13,6 +13,18 @@ You DO NOT fix code. You only READ, SCORE, and REPORT.
13
13
 
14
14
 
15
15
 
16
+ ## Pre-Review: Task / Spec Compliance (MANDATORY)
17
+
18
+ If the prompt includes task file paths, requirement IDs, completion criteria, or design contracts, you MUST read them before reviewing code.
19
+
20
+ Extract and verify:
21
+ 1. Declared deliverables (files, routes, entrypoints, UI surfaces, schemas, migrations)
22
+ 2. Completion Criteria
23
+ 3. Verification & Evidence expectations
24
+ 4. Canonical Contracts & Invariants from the design
25
+
26
+ Any missing declared deliverable, placeholder-only wiring, or contract drift is a **Critical** issue even if tests/build pass.
27
+
16
28
  ## Pre-Review: Blast Radius Check (MANDATORY)
17
29
 
18
30
  Before reading any specific logic, you MUST run a Dependency Scope Check (Blast Radius):
@@ -37,6 +49,7 @@ Before reading any specific logic, you MUST run a Dependency Scope Check (Blast
37
49
 
38
50
  - Identify the list of newly created/modified files (received from prompt or via `git diff --name-only`).
39
51
  - Read the contents of each changed file.
52
+ - If task/spec files were provided, read them too and keep their completion criteria visible during the review.
40
53
 
41
54
  ### Step 2: Systematic Scan — 2 Passes
42
55
 
@@ -44,6 +57,7 @@ Before reading any specific logic, you MUST run a Dependency Scope Check (Blast
44
57
  - Hunt security vulnerabilities (injection, auth bypass, data leaks).
45
58
  - Hunt serious logic bugs (crashes, data loss, infinite loops).
46
59
  - Hunt severe architecture violations (circular imports, cross-layer coupling).
60
+ - Hunt missing required artifacts/runtime entrypoints and spec contract mismatches.
47
61
 
48
62
  **Pass 2 — Quality Scan (Non-Blocking Issues):**
49
63
  - Project conventions (`docs/code-standards.md` if available).
@@ -78,6 +92,11 @@ Classify each issue:
78
92
  - **Scope:** [N files, ~N lines of code]
79
93
  - **Verdict:** [PASS ≥ 9.5 | NEEDS FIXES | USER INTERVENTION REQUIRED]
80
94
 
95
+ ### Task / Spec Compliance
96
+ - [OK or issue] Required deliverables present?
97
+ - [OK or issue] Completion criteria actually satisfied?
98
+ - [OK or issue] Any contract drift vs design/task?
99
+
81
100
  ### 🔴 Critical Issues
82
101
  1. `file.ts:L42` — [Issue description] → [Suggested fix]
83
102
 
@@ -103,6 +122,11 @@ When called from `hapo:develop` Step 4 (Quality Gate Auto-Fix):
103
122
  | Score ≥ 9.5 AND Critical = 0 | ✅ **PASS** — Proceed to completion |
104
123
  | Score < 9.5 OR Critical > 0 | ❌ **FAIL** — Return issue list for AI to self-fix |
105
124
 
125
+ **Automatic Criticals:**
126
+ - Missing required entrypoint/artifact/runtime output named in the task/spec
127
+ - Placeholder scaffolding marked as complete when the task demanded real wiring
128
+ - Auth/session/transport/persistence behavior that contradicts the design contracts
129
+
106
130
  ## Operating Guidelines
107
131
 
108
132
  - Deliver actionable feedback — point out issues with specific fix examples.
@@ -33,10 +33,10 @@ Init → Requirements → Design → Tasks
33
33
  ```
34
34
 
35
35
  ### Phase Gate Rules
36
- 1. **Init → Requirements**: `spec.json` must exist with `phase: "initialized"` and valid `scope_lock`
36
+ 1. **Init → Requirements**: `spec.json` must exist with `phase: "initialized"`, `status: "in_progress"`, `current_phase: "init"`, and valid `scope_lock`
37
37
  2. **Requirements → Design**: `requirements.md` must exist with EARS-format acceptance criteria and numeric requirement IDs. `spec.json.approvals.requirements.generated` must be `true`
38
38
  3. **Design → Tasks**: `design.md` must exist. `spec.json.approvals.design.generated` must be `true`
39
- 4. **After each phase**: Update `spec.json` with correct `phase`, `progress`, `timestamps`, and approval fields
39
+ 4. **After each phase**: Update `spec.json` with correct `phase`, `current_phase`, `progress`, `timestamps`, and approval fields
40
40
 
41
41
  ### Auto-Approval Behavior
42
42
  - When running the full pipeline end-to-end, follow the auto-approval rules defined in `SKILL.md`.
@@ -62,6 +62,7 @@ All acceptance criteria MUST follow EARS syntax. Load `{{SKILLS_DIR}}/specs/rule
62
62
  ### Requirement ID Rules
63
63
  - Every requirement MUST have a unique **numeric** ID (e.g., "1", "1.1", "2")
64
64
  - NEVER use alphabetic IDs (e.g., "Requirement A")
65
+ - Non-functional requirements MUST continue the same numeric sequence. NEVER emit labels like `NFR-1`, `SEC-1`, `PERF-1`.
65
66
  - Requirement IDs are referenced downstream in design traceability and task mapping
66
67
 
67
68
  ## Design Protocol
@@ -83,6 +84,7 @@ Before writing `design.md`, select a discovery mode and record the reason:
83
84
  - For full mode: Load `{{SKILLS_DIR}}/specs/rules/design-discovery-full.md`
84
85
  - For light mode: Load `{{SKILLS_DIR}}/specs/rules/design-discovery-light.md`
85
86
  - Include Mermaid diagrams for multi-step or cross-boundary flows
87
+ - For auth/session, transport/entrypoint, persistence/schema, generated-artifact, or runtime-sensitive work: fill the `Canonical Contracts & Invariants` section and keep those decisions stable across all task files.
86
88
  - Record `discovery_mode` and `discovery_reason` in `spec.json.design_context`
87
89
 
88
90
  ### Requirements Traceability (MANDATORY)
@@ -103,6 +105,8 @@ Before writing `design.md`, select a discovery mode and record the reason:
103
105
  - Reject tasks outside `scope_lock.in_scope`
104
106
  - When requirement coverage format: list numeric IDs only, no descriptive suffixes
105
107
  - Apply `(P)` parallel markers when applicable (load `{{SKILLS_DIR}}/specs/rules/tasks-parallel-analysis.md`)
108
+ - Every task MUST include `Verification & Evidence` with exact commands, artifacts/runtime surfaces, and negative-path checks.
109
+ - Completion criteria MUST be objective enough that a downstream quality gate can prove them without guesswork.
106
110
 
107
111
  ### Sub-Task Detail Requirements (MANDATORY)
108
112
  Each task file MUST contain granular sub-tasks with the following structure:
@@ -135,6 +139,16 @@ Task(subagent_type="researcher", prompt="Research [feature topic]")
135
139
 
136
140
  Before finalizing any specification, assert all 11 points in the `Pre-Finalization Checklist` defined in `SKILL.md`. Do not exit or declare completion until verifiable.
137
141
 
142
+ ### Finalization Audit (MANDATORY)
143
+
144
+ Before marking the spec ready:
145
+ 1. Re-scan `tasks/` and write `spec.json.task_files` from the real filesystem (sorted, relative paths)
146
+ 2. Fail if any on-disk task file is missing from `task_files`
147
+ 3. Fail if any path in `task_files` does not exist
148
+ 4. Infer `design_context.validation_recommended = true` for auth, privacy, delete-data, migration, schema-change, browser-extension-permission, external-provider, or 5+ task file specs
149
+ 5. If `validation_recommended = true` and validation has not completed (or the user did not explicitly accept risk), keep `ready_for_implementation = false`
150
+ 6. Reject task files that use legacy non-numeric mappings like `NFR-1`
151
+
138
152
  ## Execution Workflow Summary
139
153
 
140
154
  ### 1. Scope Assessment
@@ -161,6 +175,7 @@ specs/<feature>/
161
175
 
162
176
  ### 4. Handoff
163
177
  - Update `spec.json` with `"status": "in_progress"` and `"current_phase": "develop"`
178
+ - Ensure `task_files` is synchronized and `ready_for_implementation` reflects the finalization audit outcome
164
179
  - Report the spec directory path to the orchestrator
165
180
  - DO NOT begin implementation yourself
166
181
 
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: test-runner
3
- description: "QA execution engine. Runs unit/integration/e2e test suites, generates coverage reports, and validates build integrity. Operates in Diff-Aware mode by default — only testing files affected by recent changes."
3
+ description: "QA execution engine. Runs unit/integration/e2e test suites, generates coverage reports, validates build integrity, and checks task-level verification evidence. Operates in Diff-Aware mode by default — only testing files affected by recent changes."
4
4
  model: haiku
5
5
  ---
6
6
 
@@ -8,6 +8,11 @@ model: haiku
8
8
 
9
9
  You are a battle-hardened QA engineer who has been burned by production incidents. You hunt for untested paths, coverage holes, and silent failures with zero tolerance. You DO NOT write code. You run tests, analyze results, and report findings.
10
10
 
11
+ ## Task-Aware Inputs
12
+
13
+ If the prompt includes task file paths, Completion Criteria, or Verification & Evidence instructions, treat them as authoritative.
14
+ Diff-aware test selection does NOT replace task-specific verification.
15
+
11
16
  ## Operating Modes
12
17
 
13
18
  ### Mode 1: Diff-Aware (Default)
@@ -36,8 +41,10 @@ Run the entire test suite without diff filtering. Use when: first run, major ref
36
41
  1. **Detect Project Type:** Scan for `package.json`, `pytest.ini`, `Cargo.toml`, `pubspec.yaml` to identify the test runner.
37
42
  2. **Pre-flight Check:** Run typecheck/lint (`npx tsc --noEmit` or equivalent) to catch syntax errors before wasting time on tests.
38
43
  3. **Execute Tests:** Run the appropriate test command for the detected project. Deploy `hapo:web-testing` and `hapo:chrome-devtools` skills for rigorous UI/E2E browser test automation when testing frontends.
39
- 4. **Coverage Analysis:** Generate coverage report. Flag any module below 80% line coverage.
40
- 5. **Verdict:** Output structured report.
44
+ 4. **Build Verification:** Run the relevant build command when available (or the exact command requested by the task evidence section).
45
+ 5. **Task Evidence Audit:** Execute or inspect every verification item provided by the task. If a check cannot run, mark it `UNVERIFIED` with the exact blocker.
46
+ 6. **Coverage Analysis:** Generate coverage report. Flag any module below 80% line coverage.
47
+ 7. **Verdict:** Output structured report.
41
48
 
42
49
  ## Supported Ecosystems
43
50
 
@@ -62,6 +69,10 @@ Run the entire test suite without diff filtering. Use when: first run, major ref
62
69
  - Total: [N] | Passed: [N] | Failed: [N] | Skipped: [N]
63
70
  - Duration: [Xs]
64
71
 
72
+ ### Pre-flight & Build
73
+ - Typecheck/Lint: PASS | FAIL | N/A
74
+ - Build: PASS | FAIL | N/A
75
+
65
76
  ### Coverage
66
77
  - Lines: [X%] | Branches: [X%] | Functions: [X%]
67
78
  - ⚠️ Below threshold: [list modules < 80%]
@@ -69,6 +80,12 @@ Run the entire test suite without diff filtering. Use when: first run, major ref
69
80
  ### Failed Tests
70
81
  1. `test/file.test.ts:L42` — [Error message] → [Root cause hint]
71
82
 
83
+ ### Task Evidence
84
+ - [PASS|FAIL|UNVERIFIED] [verification item] → [proof or blocker]
85
+
86
+ ### Unverified Items
87
+ - [list anything that could not be executed or inspected]
88
+
72
89
  ### Unmapped Files (No Tests Found)
73
90
  - `src/new-module.ts` — Consider adding tests for [function/class]
74
91
 
@@ -81,4 +98,6 @@ Run the entire test suite without diff filtering. Use when: first run, major ref
81
98
  - **Zero Tolerance for Green Lies:** You have the absolute authority to assign a **FAIL Verdict** if you detect the developer wrote "fake tests" to appease the system.
82
99
  - **No Coverage Ignorance:** Any file below 80% line/branch coverage must be flagged explicitly.
83
100
  - **Flaky Tests:** If a test is flaky (passes/fails intermittently), flag it explicitly — do not retry silently.
101
+ - **No Evidence, No PASS:** If required artifact/runtime verification is missing, omitted, or blocked, you MUST NOT return PASS.
102
+ - **Placeholder Trap:** If build succeeds but the task-required entrypoint/artifact/runtime surface is missing (for example popup, content script, route, migration, auth flow), return FAIL or NEEDS_ATTENTION with evidence.
84
103
  - Report honestly. A failing test suite with a clear diagnosis is worth more than a green lie.
@@ -48,7 +48,7 @@ try {
48
48
  if (fs.existsSync(specFile)) {
49
49
  try {
50
50
  const specData = JSON.parse(fs.readFileSync(specFile, 'utf8'));
51
- if (specData.status === 'in_progress') {
51
+ if (specData.status === 'in_progress' || specData.status === 'in-progress') {
52
52
  activeSpec = specData;
53
53
  featureName = entry.name;
54
54
  break; // take the first active one
@@ -72,9 +72,9 @@ try {
72
72
  lines.push(`- **Current Phase:** \`${phase}\``);
73
73
  lines.push('');
74
74
  lines.push(`> BẮT BUỘC (MANDATORY): Nếu bạn vừa hoàn thành một bước, bạn KHÔNG ĐƯỢC báo cáo "Đã xong" ngay.`);
75
- lines.push(`> Bạn PHẢI sử dụng công cụ Edit để cập nhật 2 tầng trạng thái dưới đây trước khi kết thúc lượt chat:`);
76
- lines.push(`> 1. Sửa file \`spec.json\` (chuyển đổi status, phase tương ứng).`);
77
- lines.push(`> 2. Sửa file \`tasks/task-*.md\` (chuyển 'pending' thành 'completed' và tick '[x]' các sub-task).`);
75
+ lines.push(`> Bạn PHẢI sử dụng công cụ Edit để cập nhật trạng thái vật sau khi đã bằng chứng verify thật (build/test/runtime/artifact), không phải chỉ vì code đã viết xong.`);
76
+ lines.push(`> 1. Sửa file \`spec.json\` (status, phase/current_phase, timestamps, \`task_files\`, validation state nếu có thay đổi).`);
77
+ lines.push(`> 2. Chỉ khi verify xong mới sửa file \`tasks/task-*.md\` (status + tick '[x]' các sub-task và completion criteria liên quan).`);
78
78
  lines.push(`> 3. NẾU VỪA HOÀN THÀNH 1 TASK CÓ SỬA SOURCE CODE, BẮT BUỘC cập nhật ngay tài liệu trong \`docs/\` (\`system-architecture.md\` hoặc Changelog) cho đồng bộ.`);
79
79
  lines.push(`> CẤM VI PHẠM LUẬT TOLLGATE NÀY NHẰM ĐẢM BẢO TÍNH ĐỒNG BỘ CỦA HỆ THỐNG.`);
80
80
  lines.push('');
@@ -11,13 +11,13 @@
11
11
  "backend-development",
12
12
  "brainstorm",
13
13
  "chrome-devtools",
14
- "code",
15
14
  "code-review",
16
15
  "develop",
17
16
  "devops",
18
17
  "docx",
19
18
  "frontend-design",
20
19
  "frontend-development",
20
+ "generate-graph",
21
21
  "git",
22
22
  "hotfix",
23
23
  "impact-analysis",
@@ -4,7 +4,7 @@
4
4
 
5
5
  In any Spec-driven workflow (`hapo:specs`), the state of the project is physically persisted in **two layers**:
6
6
  1. **Machine Layer (`spec.json`)**: Tracks phase, status, and overall completion.
7
- 2. **Human Layer (`tasks/task-0*.md`)**: Checkboxes indicating granular execution progress.
7
+ 2. **Human Layer (`tasks/task-*.md`)**: Checkboxes indicating granular execution progress.
8
8
 
9
9
  ## The Sync-back Rule (Mandatory)
10
10
 
@@ -12,13 +12,15 @@ Whenever an agent finishes a task or blocks due to an issue, it **MUST NOT** sim
12
12
  Before returning control to the user or orchestrator, the agent **MUST**:
13
13
 
14
14
  ### On Success:
15
- 1. Update `spec.json`: Modify `current_phase` if moving forward, and ensure `status` accurately reflects progress.
16
- 2. Edit `task-XX.md`: Change `Trạng thái: pending` to `Trạng thái: completed` and check `[x]` the sub-task boxes.
17
- 3. Call `TaskUpdate` if Claude Tasks are active, setting the status to "completed" to unblock downstream agents.
15
+ 1. Update `spec.json`: Modify `current_phase` if moving forward, ensure `status` accurately reflects progress, and keep `task_files` synchronized with the real files on disk.
16
+ 2. Edit `task-XX.md`: Change `Status` only after real verification has passed (build/test/runtime/artifact). Then check `[x]` the sub-task boxes and relevant completion criteria.
17
+ 3. Call `TaskUpdate` if Claude Tasks are active, setting the status to "completed" only after the physical files were updated.
18
18
 
19
19
  ### On Block/Failure (>3 retries):
20
20
  1. Update `spec.json`: Set `"status": "blocked"` and fill out the `"blocker"` string with the root cause.
21
21
  2. Edit `task-XX.md`: Change `Trạng thái: pending` (or `in_progress`) to `Trạng thái: blocked` with a note.
22
22
  3. Alert the orchestrator or user via `AskUserQuestion` or explicit warning.
23
23
 
24
- **Golden Rule:** If the current phase changes, or a task completes, the agent must update the physical files. The context is intentionally NOT persisted in the chat to save tokens. An injected Hook (`spec-state.cjs`) constantly enforces and validates this state.
24
+ **Canonical state values:** New specs MUST use `status: "in_progress"` for active work. Legacy `in-progress` may be read for compatibility, but must not be emitted in new files.
25
+
26
+ **Golden Rule:** If the current phase changes, or a task completes, the agent must update the physical files. Never mark a task completed before there is execution proof. The context is intentionally NOT persisted in the chat to save tokens. An injected Hook (`spec-state.cjs`) constantly enforces and validates this state.
@@ -11,6 +11,7 @@ Code that runs smoothly, follows Clean Code principles, and has high performance
11
11
  - Prevent "feature creep": Developers arbitrarily adding unrequested features.
12
12
  - Prevent "dropped requirements": Developers forgetting core business logic requirements.
13
13
  - Ensure the User Interface perfectly matches the Design mockups.
14
+ - Prevent "fake done": Developers claiming completion while required runtime outputs, entrypoints, or artifacts are still missing.
14
15
 
15
16
  ## 2. Multimodal Invocation Process
16
17
 
@@ -20,7 +21,12 @@ Do not attempt a standard text-based review if the project includes Visual Specs
20
21
  1. Check if the `.specs/` directory, user instructions, or Jira tickets contain attached Image files (`.png`, `.jpg`, `.svg`) or Documents (`.pdf`).
21
22
  2. If YES: IMMEDIATELY halt static code analysis. Delegate the generated Frontend code / Logic code along with the Image/PDF to the **`hapo:ai-multimodal` analysis gateway**.
22
23
  - *Prompt:* "Hey `hapo:ai-multimodal`, please look at this design mockup/document and compare it with the layout/logic described in this Code. Are there any discrepancies?"
23
- 3. If NO (Markdown Spec only): Read the Spec directly and extract the requirement bullets to verify against the changed files.
24
+ 3. If NO (Markdown Spec only): Read the spec directly and extract:
25
+ - requirement bullets
26
+ - task `Completion Criteria`
27
+ - task `Verification & Evidence`
28
+ - canonical contracts/invariants from `design.md`
29
+ Then verify the changed files against those concrete obligations.
24
30
 
25
31
  ## 3. Verdict Scale
26
32
 
@@ -29,6 +35,7 @@ Each Requirement in the Spec must return 1 of 3 states:
29
35
  - `[MISSING]` Forgotten feature. Force the Developer to add it immediately (BLOCK MERGE).
30
36
  - `[EXTRA]` The code has bloated with spontaneous features not in the spec card. If unjustified -> FAIL.
31
37
  - `[VISUAL_MISMATCH]` (For UI Design): The report from `ai-multimodal` indicates this screen will break layout or violate the Design System.
38
+ - `[UNPROVEN]` Required artifact/runtime behavior or verification evidence is missing, so completion cannot be trusted.
32
39
 
33
40
  ## 4. Red Flags
34
41
  - Praising "Clean Code" without measuring against Requirements.
@@ -23,6 +23,11 @@ DO NOT write implementation code until an approved spec exists.
23
23
  - If the directory `specs/<feature-name>` DOES NOT EXIST or `spec.json` is not ready, automatically trigger `/hapo:specs <feature-name>` first to create the specification. Do not improvise.
24
24
  </HARD-GATE>
25
25
 
26
+ <DEFINITION-OF-DONE>
27
+ A task is NOT done because code compiles or a placeholder renders.
28
+ A task is done only when the task file's Completion Criteria AND Verification & Evidence section are satisfied with real execution proof.
29
+ </DEFINITION-OF-DONE>
30
+
26
31
  ## Anti-Rationalization Protocol
27
32
 
28
33
  | Thought (Excuse) | Reality (Rule) |
@@ -38,9 +43,9 @@ flowchart TD
38
43
  B -->|Missing| Z[Stop: Run /hapo:specs]
39
44
  B -->|Ready| C[Step 2: Scout Codebase (inspect)]
40
45
  C --> D[Step 3: Implement Code (god-developer)]
41
- D --> E[Step 4: Auto-Fix Code Review / Max 3 rounds]
46
+ D --> E[Step 4: Quality Gate: Test + Review + Evidence]
42
47
  E -->|Fail (code-auditor)| D
43
- E -->|Pass| F[Step 5: Incremental Docs Sync]
48
+ E -->|Pass| F[Step 5: State Sync + Incremental Docs Sync]
44
49
  F --> G[Report Completion]
45
50
  ```
46
51
 
@@ -50,6 +55,14 @@ flowchart TD
50
55
  - **Task Scoping (CRITICAL):**
51
56
  - If the user specifies a particular task file (e.g., `task-R0-02...md`), load **ONLY** that specific file into working memory.
52
57
  - If no specific task is mentioned, list and load all Markdown files in `specs/<feature-name>/tasks/*.md`.
58
+ - **Task Packet Extraction (MANDATORY):** Before coding, extract from the active task file(s):
59
+ - Objective + Constraints
60
+ - Related Files
61
+ - Completion Criteria
62
+ - Verification & Evidence
63
+ - Requirement IDs referenced by the task
64
+ - Relevant `Canonical Contracts & Invariants` from `design.md`
65
+ - If the task file is missing actionable completion or verification detail, STOP and route back to spec correction. Do not guess.
53
66
 
54
67
  ### Step 2: Scout (Codebase Inspection)
55
68
  - **Mandatory:** Call agent `Task(subagent_type="inspect", ...)` to scan the overall codebase structure (e.g., where components live, where utils are). Avoid wandering into forbidden zones.
@@ -57,17 +70,25 @@ flowchart TD
57
70
  ### Step 3: Implement Code
58
71
  - Act as `god-developer` OR directly write code, executing tasks specified in the loaded Markdown file(s) sequentially.
59
72
  - **Important:** You may create and modify files directly, but must faithfully follow the design from the Spec.
60
- - Progress tracking: Temporarily change `[ ]` to `[/]` in Spec files while coding is in progress.
73
+ - Progress tracking: Temporarily change `[ ]` to `[/]` in Spec files while coding is in progress. Do NOT mark `[x]` before Step 4 passes.
61
74
  - **Hard Stop Protocol:** If you were asked to implement a specific task file, you MUST STOP completely after that task is verified. DO NOT auto-chain or jump to "Next Task" simply because you see it in the spec. Wait for the user's next command.
62
75
  - **Test Integrity Protocol:** You MUST NOT delete, replace, or reduce the scope of existing test cases to make tests pass. If a test fails, you must fix the **implementation code** or fix the **test setup/mock**, NOT remove the assertion. Reducing test count or weakening assertions (e.g., removing `toHaveBeenCalledWith` and replacing with `toEqual(expect.any(...))`) is a Critical violation.
76
+ - **Contract Integrity Protocol:** If implementation appears to require changing auth/session, transport, persistence, entrypoint wiring, or generated artifact behavior beyond what `design.md` states, STOP and route back to spec correction instead of inventing a new contract in code.
63
77
 
64
78
  ### Step 4: Self-Healing (Quality Gate Auto-Fix)
65
79
  The moment you finish coding, DO NOT proceed further. Switch to `references/quality-gate.md` and run the automatic review loop.
66
80
  **Mantra:** All feedback from code-auditor must be addressed thoroughly: Score >= 9.5 & Zero Critical issues.
67
81
 
82
+ - Passing Step 4 requires ALL of the following:
83
+ 1. Automated verification passes (typecheck/test/build as applicable)
84
+ 2. Code review passes
85
+ 3. Task evidence passes (artifacts/runtime surfaces/negative-path checks from the task file are proven)
86
+ - If build/test passes but task evidence is missing, the task is still FAIL.
68
87
  - Only escalate to the user after 3 consecutive failed review rounds.
69
88
 
70
- ### Step 5: Incremental Docs Sync
89
+ ### Step 5: State Sync + Incremental Docs Sync
90
+ - Only after Step 4 passes may you mark task checkboxes completed and sync `spec.json` progress/timestamps.
91
+ - If verification is partial or blocked by environment, keep the task in `pending` or `in_progress` and record the blocker instead of pretending completion.
71
92
  - After passing the Quality Gate, evaluate if any actual codebase modifications occurred (e.g., check pending files via git status).
72
93
  - If files were created or modified: Trigger `docs-keeper` automatically to execute `repomix` and update the global `/docs/` and project logs.
73
94
  - **CWD Protocol (CRITICAL):** When spawning `docs-keeper`, you MUST ensure the agent's Current Working Directory (CWD context) is explicitly set to the **Workspace Root**, NOT the inner package directory you were just coding in. Otherwise, `docs-keeper` will search for the root `docs/` folder in the wrong place and crash.
@@ -2,6 +2,10 @@
2
2
 
3
3
  This is the critical checkpoint protecting codebase quality at Step 4 of `hapo:develop`.
4
4
  Runs AUTOMATICALLY. Only escalates to user after 3 consecutive failures or a critical block.
5
+ Green tests are NOT enough. The gate requires three proofs:
6
+ 1. Automated verification (typecheck/test/build)
7
+ 2. Code/spec review
8
+ 3. Task evidence (completion criteria + runtime/artifact proof from the task file)
5
9
 
6
10
  ## Parallel Quality Cycle
7
11
 
@@ -10,17 +14,22 @@ Maximum retry counter: **3 attempts**. Exceeding 3 triggers a collapse warning.
10
14
  ```text
11
15
  Variable: retry_count = 0
12
16
 
17
+ Before START_LOOP:
18
+ - Read the active task file(s)
19
+ - Extract Related Files, Completion Criteria, Verification & Evidence
20
+ - Extract relevant design contracts/invariants for the touched area
21
+ - If any of these are missing or too vague to verify, FAIL immediately and route back to spec correction
22
+
13
23
  START_LOOP:
14
24
  ---------------------------------------------------------------
15
25
  PARALLEL GATE: Spawn BOTH agents simultaneously
16
26
  ---------------------------------------------------------------
17
27
  → Task(subagent_type="test-runner",
18
- prompt="Run tests for recently implemented code. Blast-radius mode.",
28
+ prompt="Run task-aware verification for the recently implemented code. Read the active task file(s) and execute: pre-flight typecheck/lint, relevant tests, build commands, and every Verification & Evidence item that is executable. Inspect named artifacts/runtime outputs. Return PASS only if automated checks and task evidence both pass. Mark anything unexecuted as UNVERIFIED.",
19
29
  description="Test [feature]")
20
30
 
21
31
  → Task(subagent_type="code-auditor",
22
- prompt="Review all recently written code. Check security, performance,
23
- YAGNI/KISS/DRY. Return score (X/10), critical count, warning list.",
32
+ prompt="Review all recently written code against the active task file(s), referenced requirements, and design contracts. Missing deliverables, placeholder-only wiring, missing runtime entrypoints, or contract drift are Critical even if build/tests pass. Check security, logic, architecture, YAGNI/KISS/DRY. Return score (X/10), critical count, warning list, and evidence gaps.",
24
33
  description="Review [feature]")
25
34
 
26
35
  Wait for BOTH to return results.
@@ -29,29 +38,29 @@ START_LOOP:
29
38
  COMBINE RESULTS
30
39
  ---------------------------------------------------------------
31
40
 
32
- CASE 1 — Test FAIL:
41
+ CASE 1 — Test FAIL OR Evidence FAIL / UNVERIFIED:
33
42
  - Increment retry_count++
34
43
  - If retry_count >= 3:
35
- → COLLAPSE! AskUserQuestion: "Tests critically failing! User intervention required!"
44
+ → COLLAPSE! AskUserQuestion: "Quality gate cannot prove this task is complete! User intervention required!"
36
45
  - If retry_count < 3:
37
- → Return to Step 3 (god-developer). Fix the failing tests first.
46
+ → Return to Step 3 (god-developer). Fix the failing checks or missing evidence first.
38
47
  → GOTO START_LOOP (re-run BOTH test + review)
39
48
 
40
- CASE 2 — Test PASS + Review FAIL (Score < 9.5 OR Critical > 0):
49
+ CASE 2 — Test PASS + Evidence PASS + Review FAIL (Score < 9.5 OR Critical > 0):
41
50
  - Increment retry_count++
42
51
  - If retry_count >= 3:
43
52
  → COLLAPSE! AskUserQuestion: "Code does not meet minimum standards! User intervention required!"
44
53
  - If retry_count < 3:
45
54
  → Fix each review issue from warning log.
46
- → GOTO REVIEW_ONLY (skip re-test tests already passed)
55
+ → GOTO REVIEW_ONLY (skip re-test only if the fixes cannot affect automated evidence; otherwise rerun full loop)
47
56
 
48
- CASE 3 — Test PASS + Review PASS (Score >= 9.5 AND Critical = 0):
57
+ CASE 3 — Test PASS + Evidence PASS + Review PASS (Score >= 9.5 AND Critical = 0):
49
58
  → PASS! Auto-approved.
50
59
  → PROCEED to completion report.
51
60
 
52
61
  REVIEW_ONLY:
53
62
  ---------------------------------------------------------------
54
- Re-run ONLY code-auditor (tests already passed no re-test)
63
+ Re-run ONLY code-auditor (tests already passed and no new evidence-producing code changed)
55
64
  ---------------------------------------------------------------
56
65
  → Task(subagent_type="code-auditor", ...)
57
66
 
@@ -67,12 +76,13 @@ REVIEW_ONLY:
67
76
  - **Performance:** Bottlenecks, O(n³) algorithms, unbounded loops over DB calls.
68
77
  - **Architecture:** Breaking MVC boundaries, cross-module coupling, convention violations.
69
78
  - **Principles:** YAGNI violations, KISS violations, DRY violations (excessive code duplication).
79
+ - **Evidence / Done-Criteria Drift:** Missing required artifacts, placeholder-only wiring, missing entrypoints, unproven completion criteria, or runtime contract mismatches.
70
80
 
71
81
  ## Terminal Log Format
72
82
 
73
83
  Must log the Quality Gate result to the terminal for user visibility:
74
84
 
75
- - **Quick Pass:** `✓ Step 4 Quality Gate: Test PASS + Review 9.5/10 - Auto-Approved`
76
- - **Hard-Won Pass:** `✓ Step 4 Quality Gate: Failed 2 rounds → Test PASS + Review 9.6/10`
77
- - **Test Fix Needed:** `[~] Step 4 Quality Gate: Tests failed → returned to god-developer`
85
+ - **Quick Pass:** `✓ Step 4 Quality Gate: Test PASS + Evidence PASS + Review 9.5/10 - Auto-Approved`
86
+ - **Hard-Won Pass:** `✓ Step 4 Quality Gate: Failed 2 rounds → Test PASS + Evidence PASS + Review 9.6/10`
87
+ - **Fix Needed:** `[~] Step 4 Quality Gate: Tests/evidence failed → returned to god-developer`
78
88
  - **Awaiting Rescue:** `[!] Step 4 Quality Gate: Failed 3 rounds! Awaiting user intervention...`
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 fireworks-tech-graph contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.