@kodevibe/harness 0.11.3 → 0.11.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ko.md +4 -2
- package/README.md +3 -2
- package/harness/agents/lead.md +1 -0
- package/harness/agents/pm.md +5 -7
- package/harness/agents/reviewer.md +15 -14
- package/harness/skills/setup.md +2 -4
- package/harness/skills/state-check.md +17 -1
- package/harness/skills/wrap-up.md +27 -2
- package/package.json +1 -1
- package/src/guard.js +196 -0
package/README.ko.md
CHANGED
|
@@ -409,7 +409,7 @@ Bootstrap이 `docs/crew/`, `docs/PM/`, `docs/Analyst/`, `docs/ARB/`에서 crew
|
|
|
409
409
|
|
|
410
410
|
## 로드맵
|
|
411
411
|
|
|
412
|
-
kode:harness는 현재 **v0.11.
|
|
412
|
+
kode:harness는 현재 **v0.11.4** — v0.11 proof-first 기반 위에 R16 recovery hardening(거짓 clean state-check claim, surface-specific Story Contract, reviewer dependency evidence, dirty wrap-up guard)을 추가했습니다.
|
|
413
413
|
|
|
414
414
|
| 단계 | 버전 | 상태 | 초점 |
|
|
415
415
|
|------|------|------|------|
|
|
@@ -425,7 +425,9 @@ kode:harness는 현재 **v0.11.2** — v0.11의 proof-first와 uninstall safety
|
|
|
425
425
|
| **Confidence Loop** | v0.10.0 | ✅ 완료 | Goal Card, Quiet Navigator, Evidence-Gated Progress Board, Proof Ledger, QA/content 회귀 테스트 |
|
|
426
426
|
| **Proof-First Enforcement** | v0.11.0 | ✅ 완료 | Mandatory Proof Plan, lead proof blocker, reviewer proof blocker, state-check Proof Ledger coverage |
|
|
427
427
|
| **Uninstall Safety** | v0.11.1 | ✅ 완료 | Manifest 기반 uninstall, state 기본 보존, shared owner 복원, purge cleanup |
|
|
428
|
-
| **Deterministic Release Guard** | v0.11.2 | ✅
|
|
428
|
+
| **Deterministic Release Guard** | v0.11.2 | ✅ 완료 | R1-R10 guard scripts, package-boundary scan, dependency-map scan, R10 manifest-sealed bench workflow |
|
|
429
|
+
| **Experiment Hardening** | v0.11.3 | ✅ 완료 | R15 Recent Changes integrity, Wave Scope boundary drift checks, enum/filter coverage honesty |
|
|
430
|
+
| **Recovery Hardening** | v0.11.4 | ✅ 현재 | R16 false PASS claim guard, surface-specific Story Contract checks, reviewer dependency evidence, dirty wrap-up guard |
|
|
429
431
|
| **Docs Bridge** | v0.11.1 | 🧪 Experimental | Project Docs Hub Index, docs-bridge 스킬, visibility 경계를 가진 로컬 docs hub 인덱스 |
|
|
430
432
|
| **Safety & Branding** | v0.9.6 | ✅ 완료 | init overwrite 백업, 배포 파일 pm 네이밍 정리, LICENSE 브랜딩 정리 |
|
|
431
433
|
| **Validation** | v1.0 | 🔜 다음 | 실사용 검증, 사용자 피드백 수집 |
|
package/README.md
CHANGED
|
@@ -389,7 +389,7 @@ It adds a Project Docs Hub Index to `project-brief.md` with each local source, r
|
|
|
389
389
|
|
|
390
390
|
## Roadmap
|
|
391
391
|
|
|
392
|
-
kode:harness is at **v0.11.
|
|
392
|
+
kode:harness is at **v0.11.4** — adds R16 recovery hardening for false clean state-check claims, surface-specific Story Contracts, reviewer dependency evidence, and dirty wrap-up truthfulness on top of the v0.11 proof-first and deterministic release guard foundation.
|
|
393
393
|
|
|
394
394
|
| Phase | Version | Status | Focus |
|
|
395
395
|
|---|---|---|---|
|
|
@@ -406,7 +406,8 @@ kode:harness is at **v0.11.3** — adds R15 experiment hardening for section int
|
|
|
406
406
|
| **Proof-First Enforcement** | v0.11.0 | ✅ Complete | Mandatory Proof Plan, lead proof blockers, reviewer proof blockers, state-check Proof Ledger coverage |
|
|
407
407
|
| **Uninstall Safety** | v0.11.1 | ✅ Complete | Manifest-based uninstall, default state preservation, shared owner restore, purge cleanup |
|
|
408
408
|
| **Deterministic Release Guard** | v0.11.2 | ✅ Complete | R1-R10 guard scripts, package-boundary scan, dependency-map scan, R10 manifest-sealed bench workflow |
|
|
409
|
-
| **Experiment Hardening** | v0.11.3 | ✅
|
|
409
|
+
| **Experiment Hardening** | v0.11.3 | ✅ Complete | R15 Recent Changes integrity, Wave Scope boundary drift checks, enum/filter coverage honesty, R15 bench scenarios |
|
|
410
|
+
| **Recovery Hardening** | v0.11.4 | ✅ Current | R16 false PASS claim guard, surface-specific Story Contract checks, reviewer dependency evidence, dirty wrap-up guard |
|
|
410
411
|
| **Docs Bridge** | v0.11.1 | 🧪 Experimental | Project Docs Hub Index, docs-bridge skill, local docs hub index with visibility boundaries |
|
|
411
412
|
| **Safety & Branding** | v0.9.6 | ✅ Done | init overwrite backups, shipped pm naming cleanup, LICENSE branding cleanup |
|
|
412
413
|
| **Validation** | v1.0 | 🔜 Next | Real-world project adoption, user feedback collection |
|
package/harness/agents/lead.md
CHANGED
|
@@ -157,6 +157,7 @@ When a Story contains multiple Tasks/Waves (from breakdown):
|
|
|
157
157
|
- Only allowed files changed → continue.
|
|
158
158
|
- Extra files changed → output `[SCOPE-DRIFT: WAVE_BOUNDARY]`, record the extra files, and ask whether the Wave should be collapsed/approved before proceeding.
|
|
159
159
|
- Record a mini Proof Ledger row inline: Evidence, Result, Command / Observation
|
|
160
|
+
- For semantic contracts with "always/every/all/항상", include public surfaces in the Wave proof target (for example: `create/list/get/resolve` return paths). A test that covers only one return path is partial proof.
|
|
160
161
|
- Only after verification passes, prompt: "Wave {N} 완료 (tests pass). Wave {N+1}로 넘어갈까요?"
|
|
161
162
|
- If tests fail → output `[BLOCKER: WAVE_PROOF_FAILING]`, fix within the current Wave, and do NOT advance.
|
|
162
163
|
- This prevents context overload from modifying too many modules simultaneously
|
package/harness/agents/pm.md
CHANGED
|
@@ -42,24 +42,22 @@ One of:
|
|
|
42
42
|
|
|
43
43
|
### Step 0: State File Readiness
|
|
44
44
|
|
|
45
|
-
Before proceeding, verify
|
|
45
|
+
Before proceeding, verify required state files have content:
|
|
46
46
|
- `docs/project-brief.md` — Must have Vision and Goals filled
|
|
47
47
|
- `docs/features.md` — Must have at least one feature row
|
|
48
48
|
- `docs/dependency-map.md` — Must have at least one module row (for existing projects)
|
|
49
49
|
|
|
50
|
-
If ALL files are empty/placeholder-only → **Stop and run
|
|
51
|
-
If `docs/project-brief.md` alone is empty → **Stop.** Without Vision/Goals, pm cannot
|
|
50
|
+
If ALL files are empty/placeholder-only → **Stop and run `setup` first.**
|
|
51
|
+
If `docs/project-brief.md` alone is empty → **Stop.** Without Vision/Goals, pm cannot provide direction guard.
|
|
52
52
|
|
|
53
53
|
> Step 0 runs BEFORE Step 1. If Step 0 stops (empty brief), Step 1 never executes. When Step 0 passes, Step 1 reads the now-confirmed non-empty project-brief.md for detailed content.
|
|
54
54
|
|
|
55
55
|
### Step 0.5: Load Agent Memory
|
|
56
56
|
|
|
57
57
|
Read `docs/agent-memory/pm.md` for past learnings:
|
|
58
|
-
-
|
|
59
|
-
- Architecture patterns that worked or failed in this project
|
|
60
|
-
- Repeated planning mistakes to avoid
|
|
58
|
+
- estimation accuracy, architecture patterns, repeated planning mistakes
|
|
61
59
|
|
|
62
|
-
Apply these
|
|
60
|
+
Apply these when planning. If memory is empty/placeholders only, skip.
|
|
63
61
|
|
|
64
62
|
### Step 0.7: Roadmap Draft
|
|
65
63
|
|
|
@@ -36,17 +36,15 @@ Before reviewing, verify that required state files exist and are not empty:
|
|
|
36
36
|
- `docs/failure-patterns.md` — Must exist (needed for Step 5 cross-check)
|
|
37
37
|
- `docs/project-state.md` — Must have current Sprint info (needed for scope check)
|
|
38
38
|
|
|
39
|
-
If state files are empty/placeholder-only →
|
|
40
|
-
If `docs/failure-patterns.md` is empty, FP
|
|
39
|
+
If state files are empty/placeholder-only → warn that scope and FP checks are limited; suggest `setup`.
|
|
40
|
+
If `docs/failure-patterns.md` is empty, skip FP cross-check.
|
|
41
41
|
|
|
42
42
|
### Step 0.5: Load Agent Memory
|
|
43
43
|
|
|
44
44
|
Read `docs/agent-memory/reviewer.md` for past learnings:
|
|
45
|
-
-
|
|
46
|
-
- Common code patterns that caused issues
|
|
47
|
-
- Review statistics (pass rate, common failure categories)
|
|
45
|
+
- missed review items, risky code patterns, review statistics
|
|
48
46
|
|
|
49
|
-
|
|
47
|
+
If memory is empty/placeholders only, skip.
|
|
50
48
|
|
|
51
49
|
### Input
|
|
52
50
|
|
|
@@ -68,11 +66,11 @@ Changed file list (user-provided or from `git diff --name-only`)
|
|
|
68
66
|
**Step 2.2: Acceptance Contract Gate**
|
|
69
67
|
|
|
70
68
|
If `docs/project-state.md` has `## Story Contracts` rows for the Story:
|
|
71
|
-
1.
|
|
72
|
-
2.
|
|
73
|
-
3.
|
|
74
|
-
4.
|
|
75
|
-
5.
|
|
69
|
+
1. Compare each row against code, tests, API/UI, and proof.
|
|
70
|
+
2. Output **Story Contract Review**: `Contract | Status | Evidence`.
|
|
71
|
+
3. `FAIL`, `NOT_PROVEN`, blank Proof Status, or `needs-user-confirmation` blocks `DONE`.
|
|
72
|
+
4. Wrong-contract tests fail.
|
|
73
|
+
5. **R16 surface rule**: `always/every/all/항상` contracts must name/prove relevant public paths, e.g. `create/list/get/resolve`. Missing surfaces → `[CONTRACT-GAP: SURFACE_UNSPECIFIED]`.
|
|
76
74
|
|
|
77
75
|
<!-- CREW_MODE_START -->
|
|
78
76
|
**Step 2.5: CI Standards Compliance (🟣 Pipeline only)**
|
|
@@ -123,10 +121,9 @@ Record the result as a **Proof Ledger** entry. Keep it short:
|
|
|
123
121
|
If state files are in scope, write/request Proof Ledger / Evidence Summary immediately after proof passes.
|
|
124
122
|
|
|
125
123
|
**Step 4: Security Check (secure skill)**
|
|
126
|
-
- [ ] No credentials,
|
|
127
|
-
- [ ] No hardcoded API keys or passwords
|
|
128
|
-
- [ ] No injection vulnerabilities (SQL, XSS)
|
|
124
|
+
- [ ] No credentials, hardcoded secrets, injection risks, or temp files
|
|
129
125
|
- [ ] Evaluator artifacts require approval (`harness-owner: evaluator` → `harness-edit-approved`)
|
|
126
|
+
- [ ] **R16 scope/dependency evidence**: For "no external deps/auth/persistence", cite `package.json` and actual `require`/`import` lines. Do not name absent modules; hallucinated deps block `DONE`.
|
|
130
127
|
|
|
131
128
|
**Step 5: Failure Pattern Cross-Check**
|
|
132
129
|
- Compare current changes against all FP-NNN items in docs/failure-patterns.md
|
|
@@ -176,6 +173,8 @@ After running state-check, also verify:
|
|
|
176
173
|
- [ ] **docs/failure-patterns.md**: If a bug was fixed that matched a pattern, was frequency incremented?
|
|
177
174
|
- [ ] **docs/project-brief.md**: If a technology or architectural decision was made, is it in Decision Log?
|
|
178
175
|
- [ ] **docs/agent-memory/*.md**: If an agent (reviewer/pm/lead) was used this session, was its memory updated by the wrap-up skill?
|
|
176
|
+
- [ ] **R16 guard evidence**: Run/request the guard command and include its exact summary. Any guard error forbids `DONE`/`DONE_WITH_CONCERNS`:
|
|
177
|
+
`HARNESS_GUARD_ROOT="$PWD" node /path/to/k-harness/scripts/harness-guard.js docs/project-state.md`
|
|
179
178
|
|
|
180
179
|
For each missing update: flag as `[STATE-AUDIT]` in the output and provide the exact update that should be made.
|
|
181
180
|
**Severity**:
|
|
@@ -205,6 +204,8 @@ When review result is DONE or DONE_WITH_CONCERNS (no blockers):
|
|
|
205
204
|
|
|
206
205
|
If review is BLOCKED → do NOT suggest commit. Fix first.
|
|
207
206
|
|
|
207
|
+
Before commit guidance, run `git status --short`; do not imply a commit exists unless `git log --oneline -1` confirms it.
|
|
208
|
+
|
|
208
209
|
### Output Format
|
|
209
210
|
|
|
210
211
|
```
|
package/harness/skills/setup.md
CHANGED
|
@@ -61,10 +61,8 @@ Use `--overwrite` only to reset corrupted state after backup; then rerun setup t
|
|
|
61
61
|
- `sprint-manager.md` → should be renamed to `lead.md`
|
|
62
62
|
- `navigator.md` → should be renamed to `lead.md`
|
|
63
63
|
- `builder.md` → should be renamed to `pm.md`
|
|
64
|
-
- For each legacy file
|
|
65
|
-
|
|
66
|
-
- If BOTH exist → ask the user which to keep, or merge contents into the new name and delete the legacy
|
|
67
|
-
- Confirm with the user before renaming. Record the migration in `docs/project-state.md` Recent Changes.
|
|
64
|
+
- For each legacy file: offer rename if the new name is absent; if both exist, ask whether to keep or merge.
|
|
65
|
+
- Confirm before renaming and record the migration in Recent Changes.
|
|
68
66
|
|
|
69
67
|
**Do NOT modify any code files in this phase.**
|
|
70
68
|
|
|
@@ -174,6 +174,18 @@ If `docs/project-brief.md` maps one FR/KPI/ARB row to multiple Story IDs, requir
|
|
|
174
174
|
|
|
175
175
|
This catches wrap-up corruption where `## Recent Changes` is inserted in the middle of `FR-008 Durable UI Evidence` and steals the remaining proof content.
|
|
176
176
|
|
|
177
|
+
### Check 14: Self-Verify Claim Integrity (R16)
|
|
178
|
+
|
|
179
|
+
If `docs/project-state.md` or the caller output claims `state-check PASS`, `0 FAIL`, `0 WARN`, or `guard no issues`, the claim must be backed by deterministic evidence:
|
|
180
|
+
|
|
181
|
+
1. Prefer running the installed guard command:
|
|
182
|
+
`HARNESS_GUARD_ROOT="$PWD" node /path/to/k-harness/scripts/harness-guard.js docs/project-state.md`
|
|
183
|
+
2. If CLI execution is unavailable, do not claim `0 FAIL, 0 WARN`; say `manual state-check only`.
|
|
184
|
+
3. FAIL if any markdown/state/contract/handoff/env-seal issue is visible while the file claims clean self-verify.
|
|
185
|
+
4. FAIL if the guard output is summarized but not shown.
|
|
186
|
+
|
|
187
|
+
This catches reports such as "state-check PASS: 0 FAIL, 0 WARN" when a Proof Ledger table is malformed or Environment Seal is missing.
|
|
188
|
+
|
|
177
189
|
## Output Format
|
|
178
190
|
|
|
179
191
|
```
|
|
@@ -213,6 +225,10 @@ This catches wrap-up corruption where `## Recent Changes` is inserted in the mid
|
|
|
213
225
|
### Check 13: Recent Changes Section Integrity
|
|
214
226
|
- Recent Changes contains only changelog entries / {M} misplaced evidence lines
|
|
215
227
|
|
|
228
|
+
### Check 14: Self-Verify Claim Integrity
|
|
229
|
+
- Guard output: shown / missing
|
|
230
|
+
- Clean PASS claim matches deterministic result: yes/no
|
|
231
|
+
|
|
216
232
|
<!-- CREW_MODE_START -->
|
|
217
233
|
### Check 6: Validation Tracker (🟣)
|
|
218
234
|
- {N} FR references checked / {M} drifted
|
|
@@ -252,7 +268,7 @@ When invoked by another agent (pm/reviewer/wrap-up), control returns to the call
|
|
|
252
268
|
|
|
253
269
|
- Do NOT invent data. Read the files and report exactly what you find.
|
|
254
270
|
- Do NOT modify state files in this skill — diagnosis only. Caller decides remediation.
|
|
255
|
-
- Do NOT
|
|
271
|
+
- Do NOT invent deterministic results. If a guard CLI is available, run it; otherwise mark clean PASS claims as manual-only, not `0 FAIL, 0 WARN`.
|
|
256
272
|
- If a check cannot be performed (e.g., `docs/` missing entirely), report it as FAIL and stop — further checks are meaningless.
|
|
257
273
|
|
|
258
274
|
## Anti-patterns
|
|
@@ -27,8 +27,7 @@ This is kode:harness's memory mechanism — without it, the same mistakes repeat
|
|
|
27
27
|
### Step 1: Review Session Activity
|
|
28
28
|
|
|
29
29
|
1. Scan recent git changes: `git log --oneline -10` and `git diff --stat HEAD~3`
|
|
30
|
-
2. Identify
|
|
31
|
-
3. Identify any errors, failures, or unexpected issues that occurred
|
|
30
|
+
2. Identify accomplishments and unexpected issues
|
|
32
31
|
|
|
33
32
|
**Edge Case: Zero-Change Session**
|
|
34
33
|
If `git diff --stat` shows no changes and `git log` shows no new commits this session:
|
|
@@ -141,6 +140,22 @@ For each issue/error that occurred in this session:
|
|
|
141
140
|
|
|
142
141
|
> **Self-check**: New modules are registered in `docs/dependency-map.md`; state-check is PASS/WARN.
|
|
143
142
|
|
|
143
|
+
#### Step 5.5b: Guard Evidence (R16) ⚠️ MANDATORY
|
|
144
|
+
|
|
145
|
+
Before saying `state-check PASS`, `0 FAIL`, `0 WARN`, `STATUS: DONE`, or `Session Learn Complete`, run and quote one guard summary:
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
HARNESS_GUARD_ROOT="$PWD" node /path/to/k-harness/scripts/harness-guard.js docs/project-state.md
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
or installed script:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
npm run harness:guard:wrap-up
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
Rules: paste the exact guard summary. Errors block `STATUS: DONE`; warnings must be listed. Never write `0 FAIL, 0 WARN` unless guard says no issues.
|
|
158
|
+
|
|
144
159
|
### Step 5.55: Refresh Project Docs Hub Index (if applicable)
|
|
145
160
|
|
|
146
161
|
Run only if user used/requested `docs-bridge`, or Project Docs Hub Index has real rows.
|
|
@@ -182,6 +197,16 @@ State file 변경사항을 커밋합니다. Learn 실행 결과가 커밋되지
|
|
|
182
197
|
|
|
183
198
|
> **Self-check**: `git status`에 docs/ 아래 unstaged 파일이 없어야 합니다.
|
|
184
199
|
|
|
200
|
+
#### Step 5.65b: Dirty Worktree Truth (R16) ⚠️ MANDATORY
|
|
201
|
+
|
|
202
|
+
Run:
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
git status --short
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
Rules: paste exact `git status --short` or `clean`. Dirty `src/`, `test/`, `public/`, or app files mean work is not fully committed. If they remain by policy, report `Session End: DIRTY WORKTREE`.
|
|
209
|
+
|
|
185
210
|
### Step 5.7: Git Push Check (session end)
|
|
186
211
|
|
|
187
212
|
Before ending the session, check for unpushed commits:
|
package/package.json
CHANGED
package/src/guard.js
CHANGED
|
@@ -21,6 +21,8 @@
|
|
|
21
21
|
// R13 checkSmokeEvidence — browser/manual proof must leave durable evidence
|
|
22
22
|
// R14 checkScopeSplitApproval — FR/KPI/ARB split mappings need approval
|
|
23
23
|
// R15 checkRecentChangesIntegrity — wrap-up must not corrupt state sections
|
|
24
|
+
// R16 checkSelfVerifyClaim — claimed PASS must match deterministic guard
|
|
25
|
+
// R16 checkReviewerAuditEvidence — scope audits must cite real deps/imports
|
|
24
26
|
//
|
|
25
27
|
// Severity: 'error' blocks the commit (exit 1). 'warn' is informational.
|
|
26
28
|
|
|
@@ -285,6 +287,8 @@ function checkStateFile(content) {
|
|
|
285
287
|
|
|
286
288
|
const STORY_CONTRACT_PASS = /✅|pass(?:ed)?|proven|verified|reviewed|done|ok/i;
|
|
287
289
|
const STORY_CONTRACT_NOT_PROVEN = /❌|fail(?:ed)?|not[_ -]?proven|not[_ -]?verified|pending|todo|tbd|blank|needs[_ -]?user[_ -]?confirmation|needs[_ -]?confirmation|⬜|🚫|blocked/i;
|
|
290
|
+
const STORY_CONTRACT_ALWAYS = /\balways\b|\bevery\b|\ball\b|항상|모든|전체/i;
|
|
291
|
+
const STORY_CONTRACT_SURFACE = /\b(create|list|get|resolve|update|delete|api|ui|endpoint|route|public\s+surface|return\s+path)\b|생성|목록|조회|해결|수정|삭제|반환면|공개\s*표면/i;
|
|
288
292
|
|
|
289
293
|
/**
|
|
290
294
|
* Semantic Story Contract gate. This is intentionally project-agnostic:
|
|
@@ -363,12 +367,56 @@ function checkStoryContracts(input = {}) {
|
|
|
363
367
|
message: `Story ${id} is done but Story Contract "${contract}" is not proven (status: ${status || 'blank'}). Prove every contract row before Done (R11).`,
|
|
364
368
|
});
|
|
365
369
|
}
|
|
370
|
+
const assertionText = Object.values(row).filter((v) => typeof v === 'string').join(' ');
|
|
371
|
+
if (STORY_CONTRACT_ALWAYS.test(assertionText)) {
|
|
372
|
+
const surfaceMatches = assertionText.match(new RegExp(STORY_CONTRACT_SURFACE.source, 'gi')) || [];
|
|
373
|
+
const uniqueSurfaces = new Set(surfaceMatches.map((s) => s.toLowerCase()));
|
|
374
|
+
if (uniqueSurfaces.size < 2) {
|
|
375
|
+
violations.push({
|
|
376
|
+
check: 'story-contract',
|
|
377
|
+
severity: 'error',
|
|
378
|
+
line: 0,
|
|
379
|
+
message: `Story ${id} contract "${contract}" uses an always/every assertion but does not enumerate public surfaces (e.g. create/list/get/resolve/API/UI). R16 requires surface-specific proof so one return path cannot drift.`,
|
|
380
|
+
});
|
|
381
|
+
}
|
|
382
|
+
}
|
|
366
383
|
}
|
|
367
384
|
}
|
|
368
385
|
|
|
369
386
|
return violations;
|
|
370
387
|
}
|
|
371
388
|
|
|
389
|
+
// ─── Self-Verify Claim Gate (R16) ───────────────────────────────────
|
|
390
|
+
|
|
391
|
+
const CLEAN_SELF_VERIFY_CLAIM = /\b(?:state-check|harness-guard|guard)\b[\s\S]{0,80}\bPASS\b|\b0\s+FAIL\b[\s,;/|]*\b0\s+WARN\b|\b0\s+error\(s\)\b[\s,;/|]*\b0\s+warning\(s\)\b/i;
|
|
392
|
+
|
|
393
|
+
/**
|
|
394
|
+
* Catch the Experiment #6 failure mode: a state file claims state-check/guard
|
|
395
|
+
* passed cleanly while deterministic checks still find errors or warnings.
|
|
396
|
+
*
|
|
397
|
+
* @param {string} content project-state.md
|
|
398
|
+
* @param {Array} deterministicViolations violations found for the same file
|
|
399
|
+
* @returns {Array}
|
|
400
|
+
*/
|
|
401
|
+
function checkSelfVerifyClaim(content, deterministicViolations = []) {
|
|
402
|
+
const visible = stripHtmlComments(content);
|
|
403
|
+
if (!CLEAN_SELF_VERIFY_CLAIM.test(visible)) return [];
|
|
404
|
+
|
|
405
|
+
const relevant = deterministicViolations
|
|
406
|
+
.filter((v) => v.check !== 'self-verify-claim')
|
|
407
|
+
.filter((v) => v.severity === 'error' || v.severity === 'warn');
|
|
408
|
+
if (relevant.length === 0) return [];
|
|
409
|
+
|
|
410
|
+
const errors = relevant.filter((v) => v.severity === 'error').length;
|
|
411
|
+
const warnings = relevant.filter((v) => v.severity === 'warn').length;
|
|
412
|
+
return [{
|
|
413
|
+
check: 'self-verify-claim',
|
|
414
|
+
severity: 'error',
|
|
415
|
+
line: 0,
|
|
416
|
+
message: `State file claims clean self-verify/PASS, but deterministic guard found ${errors} error(s) and ${warnings} warning(s). Paste/fix the real guard output before reporting DONE (R16).`,
|
|
417
|
+
}];
|
|
418
|
+
}
|
|
419
|
+
|
|
372
420
|
// ─── Reviewer Handoff Gate (R3) ──────────────────────────────────────
|
|
373
421
|
|
|
374
422
|
/**
|
|
@@ -786,6 +834,109 @@ function checkEvaluatorArtifact(content, filename = '') {
|
|
|
786
834
|
}];
|
|
787
835
|
}
|
|
788
836
|
|
|
837
|
+
// ─── Reviewer Audit Evidence Gate (R16) ─────────────────────────────
|
|
838
|
+
|
|
839
|
+
const REVIEWER_AUDIT_MODULE_LINE = /^\s*[-*]?\s*(?:\*\*)?(?:Verified modules|Verified imports|Dependencies verified|검증(?:된)?\s*(?:모듈|의존성)|확인(?:된)?\s*(?:모듈|의존성))(?:\*\*)?\s*:/im;
|
|
840
|
+
const REVIEWER_AUDIT_IGNORE = new Set([
|
|
841
|
+
'project',
|
|
842
|
+
'project-local',
|
|
843
|
+
'local',
|
|
844
|
+
'internal',
|
|
845
|
+
'built-in',
|
|
846
|
+
'builtin',
|
|
847
|
+
'node',
|
|
848
|
+
'none',
|
|
849
|
+
'n/a',
|
|
850
|
+
]);
|
|
851
|
+
|
|
852
|
+
function packageNamesFromJson(packageJson = '') {
|
|
853
|
+
if (!packageJson.trim()) return new Set();
|
|
854
|
+
try {
|
|
855
|
+
const pkg = JSON.parse(packageJson);
|
|
856
|
+
return new Set([
|
|
857
|
+
...Object.keys(pkg.dependencies || {}),
|
|
858
|
+
...Object.keys(pkg.devDependencies || {}),
|
|
859
|
+
...Object.keys(pkg.peerDependencies || {}),
|
|
860
|
+
...Object.keys(pkg.optionalDependencies || {}),
|
|
861
|
+
]);
|
|
862
|
+
} catch {
|
|
863
|
+
return new Set();
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
function moduleNamesFromSourceFiles(sourceFiles = []) {
|
|
868
|
+
const names = new Set();
|
|
869
|
+
for (const { file = '', content = '' } of sourceFiles) {
|
|
870
|
+
const base = file.split(/[\\/]/).pop() || '';
|
|
871
|
+
if (base.includes('.')) names.add(base.replace(/\.[^.]+$/, ''));
|
|
872
|
+
const requireRe = /\brequire\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
873
|
+
const importRe = /\bfrom\s+['"]([^'"]+)['"]/g;
|
|
874
|
+
for (const re of [requireRe, importRe]) {
|
|
875
|
+
let match;
|
|
876
|
+
while ((match = re.exec(content)) !== null) {
|
|
877
|
+
const spec = match[1];
|
|
878
|
+
if (spec.startsWith('.')) {
|
|
879
|
+
const local = spec.split('/').pop();
|
|
880
|
+
if (local) names.add(local.replace(/\.[^.]+$/, ''));
|
|
881
|
+
continue;
|
|
882
|
+
}
|
|
883
|
+
names.add(spec.split('/')[0]);
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
return names;
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
function extractAuditModuleNames(content) {
|
|
891
|
+
const names = new Set();
|
|
892
|
+
const lines = content.split('\n');
|
|
893
|
+
for (const line of lines) {
|
|
894
|
+
if (!REVIEWER_AUDIT_MODULE_LINE.test(line)) continue;
|
|
895
|
+
const codeNames = [...line.matchAll(/`([^`]+)`/g)].map((m) => m[1]);
|
|
896
|
+
const source = codeNames.length > 0 ? codeNames.join(',') : line.split(':').slice(1).join(':');
|
|
897
|
+
for (const raw of source.split(/[,\s/]+/)) {
|
|
898
|
+
const token = raw.replace(/^[`"'(*\-\s]+|[`"',.)*\s]+$/g, '');
|
|
899
|
+
if (!token || token.length < 2) continue;
|
|
900
|
+
if (!/^[A-Za-z][A-Za-z0-9_.:-]*$/.test(token)) continue;
|
|
901
|
+
if (REVIEWER_AUDIT_IGNORE.has(token.toLowerCase())) continue;
|
|
902
|
+
names.add(token.replace(/^node:/, ''));
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
return [...names];
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
/**
|
|
909
|
+
* Reviewer scope audits are allowed to be judgmental, but dependency evidence
|
|
910
|
+
* must be grounded in package.json or actual require/import lines. This catches
|
|
911
|
+
* hallucinated modules such as "express" in a Node http-only project.
|
|
912
|
+
*
|
|
913
|
+
* @param {string} content reviewer.md
|
|
914
|
+
* @param {{packageJson?: string, sourceFiles?: Array<{file:string, content:string}>, filename?: string}} context
|
|
915
|
+
* @returns {Array}
|
|
916
|
+
*/
|
|
917
|
+
function checkReviewerAuditEvidence(content, { packageJson = '', sourceFiles = [], filename = '' } = {}) {
|
|
918
|
+
const asserted = extractAuditModuleNames(content);
|
|
919
|
+
if (asserted.length === 0) return [];
|
|
920
|
+
|
|
921
|
+
const builtins = new Set(require('module').builtinModules.map((name) => name.replace(/^node:/, '')));
|
|
922
|
+
const deps = packageNamesFromJson(packageJson);
|
|
923
|
+
const sourceNames = moduleNamesFromSourceFiles(sourceFiles);
|
|
924
|
+
const allowed = new Set([...builtins, ...deps, ...sourceNames]);
|
|
925
|
+
|
|
926
|
+
const violations = [];
|
|
927
|
+
for (const name of asserted) {
|
|
928
|
+
const normalized = name.replace(/^node:/, '');
|
|
929
|
+
if (allowed.has(normalized)) continue;
|
|
930
|
+
violations.push({
|
|
931
|
+
check: 'reviewer-audit',
|
|
932
|
+
severity: 'error',
|
|
933
|
+
line: 0,
|
|
934
|
+
message: `${filename ? filename + ': ' : ''}reviewer audit cites "${name}" as a verified module/dependency, but it is not in package.json or actual require/import lines (R16). Cite exact evidence or remove it.`,
|
|
935
|
+
});
|
|
936
|
+
}
|
|
937
|
+
return violations;
|
|
938
|
+
}
|
|
939
|
+
|
|
789
940
|
// ─── Markdown lint (R6 / L3-8) ───────────────────────────────────────
|
|
790
941
|
|
|
791
942
|
/**
|
|
@@ -929,6 +1080,10 @@ function isProjectBriefFile(file) {
|
|
|
929
1080
|
return /(?:^|\/)(?:docs|\.harness)\/project-brief\.md$/.test(file);
|
|
930
1081
|
}
|
|
931
1082
|
|
|
1083
|
+
function isReviewerMemoryFile(file) {
|
|
1084
|
+
return /(?:^|\/)(?:docs|\.harness)\/agent-memory\/reviewer\.md$/.test(file);
|
|
1085
|
+
}
|
|
1086
|
+
|
|
932
1087
|
function isScannableForSecrets(file) {
|
|
933
1088
|
return /\.(js|ts|jsx|tsx|json|jsonc|ya?ml|env|sh|py|java|md|properties|toml)$/i.test(file)
|
|
934
1089
|
&& !/\.lock$/.test(file);
|
|
@@ -939,6 +1094,35 @@ function isPublicPackageFile(file) {
|
|
|
939
1094
|
return PUBLIC_PACKAGE_PATHS.some((re) => re.test(normalized));
|
|
940
1095
|
}
|
|
941
1096
|
|
|
1097
|
+
function sourceFilesForAudit(cwd) {
|
|
1098
|
+
const files = [];
|
|
1099
|
+
const roots = ['src', 'lib', 'app', 'server.js', 'index.js'];
|
|
1100
|
+
const addFile = (rel) => {
|
|
1101
|
+
const abs = path.join(cwd, rel);
|
|
1102
|
+
if (!fs.existsSync(abs) || !fs.statSync(abs).isFile()) return;
|
|
1103
|
+
if (!/\.(?:js|mjs|cjs|ts|tsx|jsx)$/.test(rel)) return;
|
|
1104
|
+
files.push({ file: rel, content: fs.readFileSync(abs, 'utf8') });
|
|
1105
|
+
};
|
|
1106
|
+
const walkSource = (relDir) => {
|
|
1107
|
+
const absDir = path.join(cwd, relDir);
|
|
1108
|
+
if (!fs.existsSync(absDir) || !fs.statSync(absDir).isDirectory()) return;
|
|
1109
|
+
for (const name of fs.readdirSync(absDir)) {
|
|
1110
|
+
if (name.startsWith('.') || name === 'node_modules') continue;
|
|
1111
|
+
const rel = path.join(relDir, name);
|
|
1112
|
+
const abs = path.join(cwd, rel);
|
|
1113
|
+
if (fs.statSync(abs).isDirectory()) walkSource(rel);
|
|
1114
|
+
else addFile(rel);
|
|
1115
|
+
}
|
|
1116
|
+
};
|
|
1117
|
+
for (const root of roots) {
|
|
1118
|
+
const abs = path.join(cwd, root);
|
|
1119
|
+
if (!fs.existsSync(abs)) continue;
|
|
1120
|
+
if (fs.statSync(abs).isDirectory()) walkSource(root);
|
|
1121
|
+
else addFile(root);
|
|
1122
|
+
}
|
|
1123
|
+
return files;
|
|
1124
|
+
}
|
|
1125
|
+
|
|
942
1126
|
/**
|
|
943
1127
|
* Run all guard checks over a set of files.
|
|
944
1128
|
* @param {{files: string[], cwd?: string}} opts
|
|
@@ -954,6 +1138,7 @@ function runGuard({ files, cwd = process.cwd() }) {
|
|
|
954
1138
|
const content = fs.readFileSync(abs, 'utf8');
|
|
955
1139
|
const rel = path.relative(cwd, abs);
|
|
956
1140
|
scanned++;
|
|
1141
|
+
const beforeFile = all.length;
|
|
957
1142
|
|
|
958
1143
|
if (isScannableForSecrets(file)) {
|
|
959
1144
|
all.push(...scanSecrets(content, rel));
|
|
@@ -979,10 +1164,19 @@ function runGuard({ files, cwd = process.cwd() }) {
|
|
|
979
1164
|
if (STATE_LINE_LIMITS[base]) {
|
|
980
1165
|
all.push(...lintLineLimit(content, STATE_LINE_LIMITS[base], rel));
|
|
981
1166
|
}
|
|
1167
|
+
all.push(...checkSelfVerifyClaim(content, all.slice(beforeFile)));
|
|
982
1168
|
}
|
|
983
1169
|
if (isProjectBriefFile(file)) {
|
|
984
1170
|
all.push(...checkScopeSplitApproval({ projectBrief: content }));
|
|
985
1171
|
}
|
|
1172
|
+
if (isReviewerMemoryFile(file)) {
|
|
1173
|
+
const pkgPath = path.join(cwd, 'package.json');
|
|
1174
|
+
all.push(...checkReviewerAuditEvidence(content, {
|
|
1175
|
+
filename: rel,
|
|
1176
|
+
packageJson: fs.existsSync(pkgPath) ? fs.readFileSync(pkgPath, 'utf8') : '',
|
|
1177
|
+
sourceFiles: sourceFilesForAudit(cwd),
|
|
1178
|
+
}));
|
|
1179
|
+
}
|
|
986
1180
|
}
|
|
987
1181
|
|
|
988
1182
|
const errorCount = all.filter((v) => v.severity === 'error').length;
|
|
@@ -999,11 +1193,13 @@ module.exports = {
|
|
|
999
1193
|
checkStateSync,
|
|
1000
1194
|
checkScopeSplitApproval,
|
|
1001
1195
|
checkRecentChangesIntegrity,
|
|
1196
|
+
checkSelfVerifyClaim,
|
|
1002
1197
|
checkIntegrationDoD,
|
|
1003
1198
|
checkSmokeEvidence,
|
|
1004
1199
|
checkEnvSeal,
|
|
1005
1200
|
checkPublicBoundary,
|
|
1006
1201
|
checkEvaluatorArtifact,
|
|
1202
|
+
checkReviewerAuditEvidence,
|
|
1007
1203
|
lintMarkdownTables,
|
|
1008
1204
|
lintLineLimit,
|
|
1009
1205
|
checkInstructionBudget,
|