@curdx/flow 1.1.4 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +25 -0
- package/.claude-plugin/plugin.json +43 -0
- package/CHANGELOG.md +279 -0
- package/agent-preamble/preamble.md +214 -0
- package/agents/flow-adversary.md +216 -0
- package/agents/flow-architect.md +190 -0
- package/agents/flow-debugger.md +325 -0
- package/agents/flow-edge-hunter.md +273 -0
- package/agents/flow-executor.md +246 -0
- package/agents/flow-planner.md +204 -0
- package/agents/flow-product-designer.md +146 -0
- package/agents/flow-qa-engineer.md +276 -0
- package/agents/flow-researcher.md +155 -0
- package/agents/flow-reviewer.md +280 -0
- package/agents/flow-security-auditor.md +398 -0
- package/agents/flow-triage-analyst.md +290 -0
- package/agents/flow-ui-researcher.md +227 -0
- package/agents/flow-ux-designer.md +247 -0
- package/agents/flow-verifier.md +283 -0
- package/agents/persona-amelia.md +128 -0
- package/agents/persona-david.md +141 -0
- package/agents/persona-emma.md +179 -0
- package/agents/persona-john.md +105 -0
- package/agents/persona-mary.md +95 -0
- package/agents/persona-oliver.md +136 -0
- package/agents/persona-rachel.md +126 -0
- package/agents/persona-serena.md +175 -0
- package/agents/persona-winston.md +117 -0
- package/bin/curdx-flow.js +5 -2
- package/cli/install.js +44 -5
- package/commands/audit.md +170 -0
- package/commands/autoplan.md +184 -0
- package/commands/debug.md +199 -0
- package/commands/design.md +155 -0
- package/commands/discuss.md +162 -0
- package/commands/doctor.md +124 -0
- package/commands/fast.md +128 -0
- package/commands/help.md +119 -0
- package/commands/implement.md +381 -0
- package/commands/index.md +261 -0
- package/commands/init.md +105 -0
- package/commands/install-deps.md +128 -0
- package/commands/party.md +241 -0
- package/commands/plan-ceo.md +117 -0
- package/commands/plan-design.md +107 -0
- package/commands/plan-dx.md +104 -0
- package/commands/plan-eng.md +108 -0
- package/commands/qa.md +118 -0
- package/commands/requirements.md +146 -0
- package/commands/research.md +141 -0
- package/commands/review.md +168 -0
- package/commands/security.md +109 -0
- package/commands/sketch.md +118 -0
- package/commands/spec.md +135 -0
- package/commands/spike.md +181 -0
- package/commands/start.md +189 -0
- package/commands/status.md +139 -0
- package/commands/switch.md +95 -0
- package/commands/tasks.md +189 -0
- package/commands/triage.md +160 -0
- package/commands/verify.md +124 -0
- package/gates/adversarial-review-gate.md +219 -0
- package/gates/coverage-audit-gate.md +184 -0
- package/gates/devex-gate.md +255 -0
- package/gates/edge-case-gate.md +194 -0
- package/gates/karpathy-gate.md +130 -0
- package/gates/security-gate.md +218 -0
- package/gates/tdd-gate.md +188 -0
- package/gates/verification-gate.md +183 -0
- package/hooks/hooks.json +56 -0
- package/hooks/scripts/fail-tracker.sh +31 -0
- package/hooks/scripts/inject-karpathy.sh +52 -0
- package/hooks/scripts/quick-mode-guard.sh +64 -0
- package/hooks/scripts/session-start.sh +76 -0
- package/hooks/scripts/stop-watcher.sh +166 -0
- package/knowledge/atomic-commits.md +262 -0
- package/knowledge/epic-decomposition.md +307 -0
- package/knowledge/execution-strategies.md +278 -0
- package/knowledge/karpathy-guidelines.md +219 -0
- package/knowledge/planning-reviews.md +211 -0
- package/knowledge/poc-first-workflow.md +227 -0
- package/knowledge/spec-driven-development.md +183 -0
- package/knowledge/systematic-debugging.md +384 -0
- package/knowledge/two-stage-review.md +233 -0
- package/knowledge/wave-execution.md +387 -0
- package/package.json +14 -3
- package/schemas/config.schema.json +100 -0
- package/schemas/spec-frontmatter.schema.json +42 -0
- package/schemas/spec-state.schema.json +117 -0
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
---
|
|
2
|
+
gate: security-gate
|
|
3
|
+
category: enterprise-mode
|
|
4
|
+
severity: blocking
|
|
5
|
+
depends_on: []
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# Security Gate — Security Baseline Enforcement
|
|
9
|
+
|
|
10
|
+
> Enabled by default in Enterprise mode. Violating "high-risk" items blocks release.
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Trigger Timing
|
|
15
|
+
|
|
16
|
+
- When `/curdx-flow:security` runs
|
|
17
|
+
- Before `/curdx-flow:ship` (auto-triggered, Phase 6+)
|
|
18
|
+
- When committing specs involving auth / payments / PII
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Core Red Lines (high-risk, blocking)
|
|
23
|
+
|
|
24
|
+
### SR-01: Hardcoded Credentials
|
|
25
|
+
|
|
26
|
+
Scan:
|
|
27
|
+
```bash
|
|
28
|
+
grep -rnE "(api[_-]?key|secret|password|token)[[:space:]]*[:=][[:space:]]*['\"][^'\"]{12,}" src/
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Hit → block release + force rotate credential.
|
|
32
|
+
|
|
33
|
+
### SR-02: SQL/Command Injection Points
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
# String-concatenated SQL
|
|
37
|
+
grep -rn "db.query.*\${.*req\." src/
|
|
38
|
+
grep -rn "execute.*\${.*user" src/
|
|
39
|
+
|
|
40
|
+
# Command injection
|
|
41
|
+
grep -rn "exec.*\${\|spawn.*\${" src/
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Hit → block, must switch to parameterized queries or shell escape.
|
|
45
|
+
|
|
46
|
+
### SR-03: XSS Injection Points
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
grep -rn "innerHTML\|dangerouslySetInnerHTML" src/
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Hit → must review data source. If it comes from user input without sanitization → block.
|
|
53
|
+
|
|
54
|
+
### SR-04: Sensitive Data in Logs
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
grep -rnE "(console|logger)\.(log|info|warn|error).*(password|token|secret|creditCard|ssn)" src/
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Hit → block, switch to a redact wrapper.
|
|
61
|
+
|
|
62
|
+
### SR-05: Secret Management
|
|
63
|
+
|
|
64
|
+
- JWT secret / DB password must be env variables
|
|
65
|
+
- Validate at startup (fail fast)
|
|
66
|
+
- Must not fall back to default values
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Warning Items (non-blocking, must fix)
|
|
71
|
+
|
|
72
|
+
### SW-01: Error Message Leaks Existence
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
"User not found" vs "Wrong password"
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Different messages = can be enumerated. Recommend unifying to "Invalid credentials".
|
|
79
|
+
|
|
80
|
+
### SW-02: Timing Attack
|
|
81
|
+
|
|
82
|
+
Response-time differences leak information. bcrypt should run even for unknown users (using a fake hash).
|
|
83
|
+
|
|
84
|
+
### SW-03: CORS Too Permissive
|
|
85
|
+
|
|
86
|
+
`Access-Control-Allow-Origin: *` must be fixed before release.
|
|
87
|
+
|
|
88
|
+
### SW-04: Rate Limiting Missing
|
|
89
|
+
|
|
90
|
+
Login, registration, and password-reset paths without rate limit → can be brute-forced.
|
|
91
|
+
|
|
92
|
+
### SW-05: Dependency CVE
|
|
93
|
+
|
|
94
|
+
`npm audit` reports high/critical → must upgrade or exempt.
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Mandatory Items (Enterprise default requirements)
|
|
99
|
+
|
|
100
|
+
### SM-01: All APIs Have Authorization
|
|
101
|
+
|
|
102
|
+
Not "public" by default. New endpoints default to `requireAuth`.
|
|
103
|
+
|
|
104
|
+
### SM-02: User Data Isolation
|
|
105
|
+
|
|
106
|
+
`WHERE user_id = ?` must use the current session's user_id; cannot trust the frontend parameter.
|
|
107
|
+
|
|
108
|
+
### SM-03: HTTPS Enforced
|
|
109
|
+
|
|
110
|
+
Production environment only accepts HTTPS. HTTP requests → 301 to HTTPS.
|
|
111
|
+
|
|
112
|
+
### SM-04: Cookie Security Flags
|
|
113
|
+
|
|
114
|
+
- HttpOnly (prevent XSS reads)
|
|
115
|
+
- Secure (HTTPS only)
|
|
116
|
+
- SameSite=Strict/Lax (prevent CSRF)
|
|
117
|
+
|
|
118
|
+
### SM-05: Password Storage
|
|
119
|
+
|
|
120
|
+
- Must be bcrypt/argon2 (not md5/sha)
|
|
121
|
+
- cost factor ≥ 12
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## Checking Methods
|
|
126
|
+
|
|
127
|
+
### Automated Scan
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
# Run all scans
|
|
131
|
+
bash scripts/security-scan.sh # provided by project (if available)
|
|
132
|
+
|
|
133
|
+
# Or use flow-security-auditor agent
|
|
134
|
+
/curdx-flow:security
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Dependency CVE
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
npm audit --audit-level=high
|
|
141
|
+
# or
|
|
142
|
+
pnpm audit
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### Manual Review (design layer)
|
|
146
|
+
|
|
147
|
+
- Check if AD-NN in design.md has security relevance
|
|
148
|
+
- Check NFR-S in requirements.md
|
|
149
|
+
- Threat modeling (STRIDE)
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
## Violation Handling
|
|
154
|
+
|
|
155
|
+
### Blocking Items
|
|
156
|
+
|
|
157
|
+
- If SR-01 ~ SR-05 are found → block immediately, prohibit `/curdx-flow:ship`
|
|
158
|
+
- Must fix or explicitly exempt (record in STATE.md as tech debt + commitment to fix before release)
|
|
159
|
+
|
|
160
|
+
### Warning Items
|
|
161
|
+
|
|
162
|
+
- If SW-01 ~ SW-05 are found → warning, non-blocking
|
|
163
|
+
- But record in `security-debt.md`
|
|
164
|
+
- Re-check in the next audit
|
|
165
|
+
|
|
166
|
+
### Mandatory Items
|
|
167
|
+
|
|
168
|
+
- Missing SM-01 ~ SM-05 → warning (new features), blocking (production paths)
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## Exemption Path
|
|
173
|
+
|
|
174
|
+
If you truly need to skip a security check:
|
|
175
|
+
|
|
176
|
+
1. Record in `.flow/STATE.md`:
|
|
177
|
+
```markdown
|
|
178
|
+
## Security Exemptions
|
|
179
|
+
- D-SEC-01 | 2026-04-19 | temporarily hardcoding JWT_SECRET in dev environment
|
|
180
|
+
- Exemption scope: dev environment only
|
|
181
|
+
- Risk owner: wdx
|
|
182
|
+
- Fix commitment: migrate to env before 2026-04-26
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
2. Explicitly mention in PR description
|
|
186
|
+
|
|
187
|
+
3. Next audit must re-check whether it has been fixed
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## Output Format
|
|
192
|
+
|
|
193
|
+
```markdown
|
|
194
|
+
## Security Gate Report
|
|
195
|
+
|
|
196
|
+
Scan: commits abc..xyz + npm audit
|
|
197
|
+
Time: YYYY-MM-DD
|
|
198
|
+
|
|
199
|
+
### Blockers (SR): 1
|
|
200
|
+
- [SR-04] src/auth/login.ts:60 — logger records password field
|
|
201
|
+
|
|
202
|
+
### Warnings (SW): 2
|
|
203
|
+
- [SW-01] Inconsistent login error messages → enumerable
|
|
204
|
+
- [SW-05] axios 1.5.0 has CVE → `npm install axios@^1.6.0`
|
|
205
|
+
|
|
206
|
+
### Mandatory (SM): all satisfied
|
|
207
|
+
|
|
208
|
+
Verdict: BLOCKED (1 SR)
|
|
209
|
+
|
|
210
|
+
Fix list:
|
|
211
|
+
1. SR-04: wrap logger with redactPassword() (blocking, required)
|
|
212
|
+
2. SW-01: unify error messages (recommended)
|
|
213
|
+
3. SW-05: upgrade axios (recommended)
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
---
|
|
217
|
+
|
|
218
|
+
_source: OWASP Top 10 + STRIDE + accumulated project experience._
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
---
|
|
2
|
+
gate: tdd-gate
|
|
3
|
+
category: standard-mode
|
|
4
|
+
severity: blocking
|
|
5
|
+
depends_on: []
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# TDD Gate — Red/Green/Yellow Cycle Enforcement
|
|
9
|
+
|
|
10
|
+
> **Iron rule**: NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST.
|
|
11
|
+
>
|
|
12
|
+
> Source: superpowers' test-driven-development skill.
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## Trigger Timing
|
|
17
|
+
|
|
18
|
+
- All tasks in Phase 3 (Testing)
|
|
19
|
+
- Production code changes outside the POC phase
|
|
20
|
+
- Tasks explicitly marked `[RED]` / `[GREEN]` / `[YELLOW]`
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Applicability
|
|
25
|
+
|
|
26
|
+
✓ **TDD enforced**:
|
|
27
|
+
- Adding production logic (business rules, algorithms, data transformations)
|
|
28
|
+
- Modifying existing logic (even one line)
|
|
29
|
+
- Bug fixes (must have a failing test that reproduces the bug)
|
|
30
|
+
|
|
31
|
+
⊘ **Exemptible from TDD**:
|
|
32
|
+
- POC phase (Phase 1 of POC-First)
|
|
33
|
+
- Pure configuration changes (`.json` / `.yaml`)
|
|
34
|
+
- Documentation (`.md`)
|
|
35
|
+
- Formatting (does not change behavior)
|
|
36
|
+
- Dependency upgrades (only modify package.json, unless major version)
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## RED → GREEN → YELLOW Enforcement Rules
|
|
41
|
+
|
|
42
|
+
### RED (failing test)
|
|
43
|
+
|
|
44
|
+
**Rules**:
|
|
45
|
+
- The test **must actually fail** before continuing
|
|
46
|
+
- "I wrote the test but haven't run it" is not allowed
|
|
47
|
+
- "The test passes as soon as written" is not allowed (this means it doesn't actually test anything)
|
|
48
|
+
|
|
49
|
+
**Check**:
|
|
50
|
+
```bash
|
|
51
|
+
# Expect a non-zero exit code
|
|
52
|
+
npm test -- <test-file>
|
|
53
|
+
echo "Exit: $?" # must be non-0
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**Commit format**: `test(scope): red - <what the test verifies>`
|
|
57
|
+
|
|
58
|
+
**Violations**:
|
|
59
|
+
- ✗ Wrote the test but didn't run it (no fresh evidence)
|
|
60
|
+
- ✗ Test passes the first time it runs
|
|
61
|
+
- ✗ Started writing implementation before writing the test
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
### GREEN (minimal implementation)
|
|
66
|
+
|
|
67
|
+
**Rules**:
|
|
68
|
+
- Write the **least code** needed to pass the RED test
|
|
69
|
+
- Don't think about elegance, abstraction, or extensibility
|
|
70
|
+
- Focus on making the test pass
|
|
71
|
+
|
|
72
|
+
**Check**:
|
|
73
|
+
```bash
|
|
74
|
+
npm test -- <test-file>
|
|
75
|
+
echo "Exit: $?" # must be 0
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Commit format**: `feat(scope): green - <what was implemented>`
|
|
79
|
+
|
|
80
|
+
**Violations**:
|
|
81
|
+
- ✗ Added functionality beyond what makes the test pass
|
|
82
|
+
- ✗ Did abstraction and implementation at the same time
|
|
83
|
+
- ✗ Other tests also pass but those tests were written outside this cycle
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
### YELLOW (refactor)
|
|
88
|
+
|
|
89
|
+
**Rules**:
|
|
90
|
+
- Clean up GREEN-phase code; **tests must still pass**
|
|
91
|
+
- No new behavior
|
|
92
|
+
- No new tests
|
|
93
|
+
|
|
94
|
+
**Check**:
|
|
95
|
+
```bash
|
|
96
|
+
# Tests run before and after, both exit 0
|
|
97
|
+
npm test -- <test-file>
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
**Commit format**: `refactor(scope): yellow - <what was cleaned up>`
|
|
101
|
+
|
|
102
|
+
**Violations**:
|
|
103
|
+
- ✗ Test fails during YELLOW (means behavior changed)
|
|
104
|
+
- ✗ YELLOW added new functionality
|
|
105
|
+
- ✗ Deleted hard-to-change code (violates surgical changes)
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Reasons to Refuse (agent must reject these excuses)
|
|
110
|
+
|
|
111
|
+
| Excuse | Agent Response |
|
|
112
|
+
|------|---------|
|
|
113
|
+
| "This is too simple to test" | Simple code can break too. Write a minimal test. |
|
|
114
|
+
| "Write code first, test later" | After-the-fact tests miss edge cases. Go back to RED. |
|
|
115
|
+
| "I already tested manually" | No automation record means no regression. Write an automated test. |
|
|
116
|
+
| "Existing code has no tests, I won't either" | Coverage only goes up or down. New code must at least cover the new logic. |
|
|
117
|
+
| "Time pressure" | fast mode or /curdx-flow:fast can exempt. Otherwise go through full TDD. |
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## Exemption Path (explicit)
|
|
122
|
+
|
|
123
|
+
If you really need to skip TDD, you must:
|
|
124
|
+
|
|
125
|
+
1. Mark in `.flow/CONTEXT.md` or `.state.json`
|
|
126
|
+
2. Provide an exemption reason
|
|
127
|
+
3. Add a "tech debt" entry to `.flow/STATE.md`, with a commitment to fix later
|
|
128
|
+
|
|
129
|
+
```markdown
|
|
130
|
+
# STATE.md
|
|
131
|
+
## Tech Debt
|
|
132
|
+
- D-TDD-01 | 2026-04-19 | auth-system's refresh-token module skipped TDD
|
|
133
|
+
- Reason: urgent hotfix, CI takes 1 hour to complete
|
|
134
|
+
- Commitment: add tests next sprint, deadline 2026-04-26
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
No exemption record → TDD enforced.
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## Checking Methods
|
|
142
|
+
|
|
143
|
+
### flow-reviewer's Stage 2 invokes this gate
|
|
144
|
+
|
|
145
|
+
Scan git log; for each `feat(xxx):` commit:
|
|
146
|
+
1. Find the preceding `test(xxx): red -` commit
|
|
147
|
+
2. If none, that feat violates TDD
|
|
148
|
+
3. Exception: commit message contains `[skip-tdd]` and a corresponding record exists in STATE.md
|
|
149
|
+
|
|
150
|
+
### Coverage Check (auxiliary)
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
npm test -- --coverage
|
|
154
|
+
# Line coverage for new code must be ≥ 80%
|
|
155
|
+
# Uncovered lines must be explained in STATE.md
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## Output Format
|
|
161
|
+
|
|
162
|
+
```markdown
|
|
163
|
+
## TDD Gate Check Result
|
|
164
|
+
|
|
165
|
+
Scan range: commits abc123..def456
|
|
166
|
+
Feat commits: 5
|
|
167
|
+
Test commits (red): 4
|
|
168
|
+
|
|
169
|
+
[T1] commit abc123 "feat(auth): add login endpoint"
|
|
170
|
+
Preceding RED: ✓ commit 789xyz "test(auth): red - login endpoint tests"
|
|
171
|
+
Verdict: compliant
|
|
172
|
+
|
|
173
|
+
[T2] commit def456 "feat(auth): add password hashing"
|
|
174
|
+
Preceding RED: ✗ no preceding test commit
|
|
175
|
+
Verdict: violation
|
|
176
|
+
Exception check: no [skip-tdd] marker, no STATE.md exemption
|
|
177
|
+
Block: yes
|
|
178
|
+
|
|
179
|
+
Violations: 1
|
|
180
|
+
Compliant: 4
|
|
181
|
+
|
|
182
|
+
Fix recommendations:
|
|
183
|
+
T2: add test test(auth): red - password hashing, verify it fails, then redo GREEN
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
_Source: superpowers' test-driven-development skill._
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
---
|
|
2
|
+
gate: verification-gate
|
|
3
|
+
category: always-on
|
|
4
|
+
severity: blocking
|
|
5
|
+
depends_on: []
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# Verification Gate — Verification Required Before Completion
|
|
9
|
+
|
|
10
|
+
> **Always enabled**. No evidence = no completion. This is the Superpowers "Verification Before Completion" iron rule.
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Core Rule
|
|
15
|
+
|
|
16
|
+
**Do not declare `done`, `fixed`, `passed`, `working`, `okay` unless there is fresh execution evidence.**
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Trigger Timing
|
|
21
|
+
|
|
22
|
+
- Before any agent outputs a "done/fixed/passed" conclusion
|
|
23
|
+
- Before commit messages contain forbidden words
|
|
24
|
+
- Before Phase transitions (research → requirements, requirements → design, etc.)
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Forbidden Word List (English + Chinese)
|
|
29
|
+
|
|
30
|
+
Fresh evidence required to use:
|
|
31
|
+
|
|
32
|
+
**English**:
|
|
33
|
+
- `done`, `fixed`, `working`, `passed`, `resolved`, `completed`
|
|
34
|
+
- `should work`, `probably`, `likely`, `might work`
|
|
35
|
+
- `seems to`, `appears to`, `looks good`, `looks right`
|
|
36
|
+
- `great!`, `perfect!`, `all set`
|
|
37
|
+
|
|
38
|
+
**Chinese**:
|
|
39
|
+
- `完成`, `搞定`, `好了`, `可以了`, `修好了`
|
|
40
|
+
- `应该`, `可能`, `大概`, `似乎`, `好像`
|
|
41
|
+
- `看起来没问题`, `应该能工作`
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Allowed Conclusions Must Carry Evidence
|
|
46
|
+
|
|
47
|
+
✗ **Violation**:
|
|
48
|
+
> I fixed the login bug
|
|
49
|
+
|
|
50
|
+
✓ **Compliant**:
|
|
51
|
+
> I fixed the login bug. Re-running `npm test -- auth/login` produced:
|
|
52
|
+
> ```
|
|
53
|
+
> ✓ login endpoint rejects empty email
|
|
54
|
+
> ✓ login endpoint accepts valid credentials
|
|
55
|
+
> Test Suites: 1 passed, Tests: 2 passed
|
|
56
|
+
> ```
|
|
57
|
+
|
|
58
|
+
✗ **Violation**:
|
|
59
|
+
> The code looks fine
|
|
60
|
+
|
|
61
|
+
✓ **Compliant**:
|
|
62
|
+
> Ran `npx tsc --noEmit` → 0 errors
|
|
63
|
+
> Ran `npx eslint src/auth/` → 0 errors, 0 warnings
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## Evidence Types
|
|
68
|
+
|
|
69
|
+
| Claim | Required Evidence |
|
|
70
|
+
|------|---------|
|
|
71
|
+
| "Fixed X" | command reproducing X + execution output |
|
|
72
|
+
| "Tests pass" | full `npm test` output including pass count |
|
|
73
|
+
| "Code is valid" | `tsc --noEmit` exit code + output |
|
|
74
|
+
| "API works" | `curl` response + status code |
|
|
75
|
+
| "Deployment succeeded" | deployment log + health check response |
|
|
76
|
+
| "User can log in" | browser / chrome-devtools test screenshot or log |
|
|
77
|
+
| "Performance meets target" | benchmark command + numbers |
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Checking Methods
|
|
82
|
+
|
|
83
|
+
### Agent Built-in (self-check)
|
|
84
|
+
|
|
85
|
+
Each agent runs an internal check before outputting:
|
|
86
|
+
|
|
87
|
+
```
|
|
88
|
+
For each conclusion sentence, ask yourself:
|
|
89
|
+
1. Does this sentence contain a forbidden word?
|
|
90
|
+
2. If so, do I have fresh evidence supporting it?
|
|
91
|
+
3. Is the evidence from a just-executed command, or older / assumed?
|
|
92
|
+
4. If the evidence is not fresh or does not exist, re-run or delete this sentence.
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### flow-reviewer Agent (external check)
|
|
96
|
+
|
|
97
|
+
Scan:
|
|
98
|
+
- `commit messages` for forbidden words
|
|
99
|
+
- declarative sentences in `.progress.md`
|
|
100
|
+
- the conclusion section of agent output
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Violation Handling
|
|
105
|
+
|
|
106
|
+
### Severe (block)
|
|
107
|
+
|
|
108
|
+
- commit message contains forbidden word without evidence
|
|
109
|
+
- `.progress.md` says "done" but has no verify output
|
|
110
|
+
|
|
111
|
+
**Actions**:
|
|
112
|
+
- Block the commit (pre-commit hook, Phase 4+)
|
|
113
|
+
- Or dispatch flow-executor to re-run Verify and update the record
|
|
114
|
+
- Rewrite the commit message
|
|
115
|
+
|
|
116
|
+
### Medium (warning)
|
|
117
|
+
|
|
118
|
+
- Verbally saying "looks good" (not in commit or file)
|
|
119
|
+
- Using "should" as a hypothetical statement (acceptable)
|
|
120
|
+
|
|
121
|
+
**Action**: mark, non-blocking
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## Special Cases
|
|
126
|
+
|
|
127
|
+
### "I already checked"
|
|
128
|
+
|
|
129
|
+
Not enough. "Checked" is a subjective claim. You need **execution output**.
|
|
130
|
+
|
|
131
|
+
```
|
|
132
|
+
✗ "I checked that all tests pass"
|
|
133
|
+
✓ "Ran npm test: Test Suites: 5 passed, Tests: 47 passed, Snapshots: 0 total"
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### Partial Completion
|
|
137
|
+
|
|
138
|
+
Do not describe partial completion as complete. Clearly categorize:
|
|
139
|
+
|
|
140
|
+
```
|
|
141
|
+
✓ Completed:
|
|
142
|
+
- FR-01 login endpoint: passed (test output)
|
|
143
|
+
- FR-02 password encryption: passed (test output)
|
|
144
|
+
|
|
145
|
+
⚠ Partially completed:
|
|
146
|
+
- FR-03 Token refresh: code written but tests not yet run
|
|
147
|
+
|
|
148
|
+
✗ Not started:
|
|
149
|
+
- FR-04 Logout
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
## Output Format
|
|
155
|
+
|
|
156
|
+
```markdown
|
|
157
|
+
## Verification Gate Check Result
|
|
158
|
+
|
|
159
|
+
Scan range: commit abc123..def456
|
|
160
|
+
Statements containing forbidden words: 3
|
|
161
|
+
|
|
162
|
+
[V1] "Login bug fixed" (commit abc123)
|
|
163
|
+
Evidence: ✗ none (no corresponding test output or verify record found)
|
|
164
|
+
Verdict: block
|
|
165
|
+
|
|
166
|
+
[V2] "All tests pass" (.progress.md line 12)
|
|
167
|
+
Evidence: ✓ "npm test: 47/47 passed" (same file, line 11)
|
|
168
|
+
Verdict: compliant
|
|
169
|
+
|
|
170
|
+
[V3] "Should handle concurrency" (design.md AD-05)
|
|
171
|
+
Evidence: ⚠ "should" is hypothetical tone, acceptable
|
|
172
|
+
Verdict: warning (recommend adding a spike to verify)
|
|
173
|
+
|
|
174
|
+
Blockers: 1
|
|
175
|
+
Warnings: 1
|
|
176
|
+
|
|
177
|
+
Fix recommendations:
|
|
178
|
+
V1: dispatch flow-executor to run tests, add evidence to the commit message body
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
_Source: superpowers' verification-before-completion skill. CurDX-Flow turns it into a gate._
|
package/hooks/hooks.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
{
|
|
2
|
+
"hooks": {
|
|
3
|
+
"SessionStart": [
|
|
4
|
+
{
|
|
5
|
+
"hooks": [
|
|
6
|
+
{
|
|
7
|
+
"type": "command",
|
|
8
|
+
"command": "${CLAUDE_PLUGIN_ROOT}/hooks/scripts/session-start.sh"
|
|
9
|
+
}
|
|
10
|
+
]
|
|
11
|
+
}
|
|
12
|
+
],
|
|
13
|
+
"InstructionsLoaded": [
|
|
14
|
+
{
|
|
15
|
+
"hooks": [
|
|
16
|
+
{
|
|
17
|
+
"type": "command",
|
|
18
|
+
"command": "${CLAUDE_PLUGIN_ROOT}/hooks/scripts/inject-karpathy.sh"
|
|
19
|
+
}
|
|
20
|
+
]
|
|
21
|
+
}
|
|
22
|
+
],
|
|
23
|
+
"PostToolUseFailure": [
|
|
24
|
+
{
|
|
25
|
+
"matcher": "Bash|Edit|Write",
|
|
26
|
+
"hooks": [
|
|
27
|
+
{
|
|
28
|
+
"type": "command",
|
|
29
|
+
"command": "${CLAUDE_PLUGIN_ROOT}/hooks/scripts/fail-tracker.sh"
|
|
30
|
+
}
|
|
31
|
+
]
|
|
32
|
+
}
|
|
33
|
+
],
|
|
34
|
+
"Stop": [
|
|
35
|
+
{
|
|
36
|
+
"hooks": [
|
|
37
|
+
{
|
|
38
|
+
"type": "command",
|
|
39
|
+
"command": "${CLAUDE_PLUGIN_ROOT}/hooks/scripts/stop-watcher.sh"
|
|
40
|
+
}
|
|
41
|
+
]
|
|
42
|
+
}
|
|
43
|
+
],
|
|
44
|
+
"PreToolUse": [
|
|
45
|
+
{
|
|
46
|
+
"matcher": "AskUserQuestion",
|
|
47
|
+
"hooks": [
|
|
48
|
+
{
|
|
49
|
+
"type": "command",
|
|
50
|
+
"command": "${CLAUDE_PLUGIN_ROOT}/hooks/scripts/quick-mode-guard.sh"
|
|
51
|
+
}
|
|
52
|
+
]
|
|
53
|
+
}
|
|
54
|
+
]
|
|
55
|
+
}
|
|
56
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# CurDX-Flow PostToolUseFailure Hook
|
|
3
|
+
# Tracks consecutive tool failures to enable pua integration (Phase 4+).
|
|
4
|
+
# For now, just maintains a counter in plugin data directory.
|
|
5
|
+
#
|
|
6
|
+
# Future: when pua is installed and fail_count >= threshold, auto-invoke /pua:pua.
|
|
7
|
+
|
|
8
|
+
set -u
|
|
9
|
+
|
|
10
|
+
DATA_DIR="${CLAUDE_PLUGIN_DATA:-$HOME/.claude/plugins/data/curdx-flow}"
|
|
11
|
+
COUNTER="$DATA_DIR/fail-count"
|
|
12
|
+
|
|
13
|
+
mkdir -p "$DATA_DIR" 2>/dev/null || true
|
|
14
|
+
|
|
15
|
+
# Read current count
|
|
16
|
+
CURRENT=0
|
|
17
|
+
[ -f "$COUNTER" ] && CURRENT="$(cat "$COUNTER" 2>/dev/null || echo 0)"
|
|
18
|
+
|
|
19
|
+
# Increment
|
|
20
|
+
NEXT=$((CURRENT + 1))
|
|
21
|
+
echo "$NEXT" > "$COUNTER" 2>/dev/null || true
|
|
22
|
+
|
|
23
|
+
# Placeholder for future pua escalation (Phase 4+):
|
|
24
|
+
# if [ "$NEXT" -ge 2 ] && command -v claude >/dev/null 2>&1; then
|
|
25
|
+
# if claude plugin list 2>/dev/null | grep -q 'pua'; then
|
|
26
|
+
# # Inject escalation suggestion via hook output
|
|
27
|
+
# ...
|
|
28
|
+
# fi
|
|
29
|
+
# fi
|
|
30
|
+
|
|
31
|
+
exit 0
|