@kodevibe/harness 0.11.2 → 0.11.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ko.md +4 -2
- package/README.md +4 -2
- package/harness/agents/lead.md +13 -4
- package/harness/agents/pm.md +25 -19
- package/harness/agents/reviewer.md +36 -38
- package/harness/project-state.md +11 -0
- package/harness/skills/breakdown.md +2 -1
- package/harness/skills/pr-review.md +16 -0
- package/harness/skills/setup.md +3 -4
- package/harness/skills/state-check.md +72 -1
- package/harness/skills/wrap-up.md +59 -27
- package/package.json +1 -1
- package/src/guard.js +496 -0
package/src/guard.js
CHANGED
|
@@ -16,6 +16,13 @@
|
|
|
16
16
|
// R8 checkPublicBoundary — public package surface does not leak internal refs
|
|
17
17
|
// R9 checkEnvSeal — proof records include reproducible environment seal
|
|
18
18
|
// R10 checkInstructionBudget — instruction files fit model-tier budgets
|
|
19
|
+
// R11 checkStoryContracts — done Stories must prove their semantic contracts
|
|
20
|
+
// R12 checkEvaluatorArtifact — evaluator-owned artifacts are not rewritten silently
|
|
21
|
+
// R13 checkSmokeEvidence — browser/manual proof must leave durable evidence
|
|
22
|
+
// R14 checkScopeSplitApproval — FR/KPI/ARB split mappings need approval
|
|
23
|
+
// R15 checkRecentChangesIntegrity — wrap-up must not corrupt state sections
|
|
24
|
+
// R16 checkSelfVerifyClaim — claimed PASS must match deterministic guard
|
|
25
|
+
// R16 checkReviewerAuditEvidence — scope audits must cite real deps/imports
|
|
19
26
|
//
|
|
20
27
|
// Severity: 'error' blocks the commit (exit 1). 'warn' is informational.
|
|
21
28
|
|
|
@@ -153,6 +160,12 @@ function parseMarkdownTable(section) {
|
|
|
153
160
|
});
|
|
154
161
|
}
|
|
155
162
|
|
|
163
|
+
function markdownTableHeaders(section) {
|
|
164
|
+
const lines = section.split('\n').map((l) => l.trim()).filter((l) => l.startsWith('|'));
|
|
165
|
+
if (lines.length < 2) return [];
|
|
166
|
+
return lines[0].replace(/^\|/, '').replace(/\|$/, '').split('|').map((c) => c.trim());
|
|
167
|
+
}
|
|
168
|
+
|
|
156
169
|
function storyIdFromRow(row) {
|
|
157
170
|
return row.ID || row.Id || row.Story || row['Story ID'] || row['Story'] || '';
|
|
158
171
|
}
|
|
@@ -161,6 +174,56 @@ function rowStatus(row) {
|
|
|
161
174
|
return row.Status || row.status || '';
|
|
162
175
|
}
|
|
163
176
|
|
|
177
|
+
function storyContractStatus(row) {
|
|
178
|
+
return row['Proof Status'] || row.Status || row.status || row.Result || row.result || '';
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
const RECENT_CHANGES_EVIDENCE_TERMS = /\b(?:scenario|expected|result|observer|url|screenshot|playwright|dropdown|column|color coding|ui elements?)\b|시나리오|기대|결과|관찰|드롭다운|컬럼|색상|스크린샷/i;
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Recent Changes is the session changelog. A recurring wrap-up failure is
|
|
185
|
+
* inserting it in the middle of an evidence/proof section, which silently moves
|
|
186
|
+
* the rest of that section under "Recent Changes". Catch obvious structure
|
|
187
|
+
* corruption deterministically.
|
|
188
|
+
*
|
|
189
|
+
* @param {string} content project-state.md
|
|
190
|
+
* @returns {Array}
|
|
191
|
+
*/
|
|
192
|
+
function checkRecentChangesIntegrity(content) {
|
|
193
|
+
const violations = [];
|
|
194
|
+
const visible = stripHtmlComments(content);
|
|
195
|
+
const recent = getSection(visible, 'Recent Changes');
|
|
196
|
+
if (recent === null) return violations;
|
|
197
|
+
|
|
198
|
+
const lines = recent.split('\n');
|
|
199
|
+
const nestedHeading = lines.find((line) => /^#{3,}\s+/.test(line.trim()));
|
|
200
|
+
if (nestedHeading) {
|
|
201
|
+
violations.push({
|
|
202
|
+
check: 'state-structure',
|
|
203
|
+
severity: 'error',
|
|
204
|
+
line: 0,
|
|
205
|
+
message: `Recent Changes contains nested heading "${nestedHeading.trim()}". It was likely inserted inside an evidence/proof section; move Recent Changes after the completed evidence block (R15).`,
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
const misplacedEvidence = lines.find((line) => {
|
|
210
|
+
const trimmed = line.trim();
|
|
211
|
+
if (!/^[-*]\s+/.test(trimmed)) return false;
|
|
212
|
+
if (/\d{4}-\d{2}-\d{2}|\bS\d+-\d+\b|session|wrap-up|recent/i.test(trimmed)) return false;
|
|
213
|
+
return RECENT_CHANGES_EVIDENCE_TERMS.test(trimmed);
|
|
214
|
+
});
|
|
215
|
+
if (misplacedEvidence) {
|
|
216
|
+
violations.push({
|
|
217
|
+
check: 'state-structure',
|
|
218
|
+
severity: 'error',
|
|
219
|
+
line: 0,
|
|
220
|
+
message: `Recent Changes contains evidence/checklist content (${misplacedEvidence.trim()}). Do not place proof details under Recent Changes; keep durable evidence in its own section (R15).`,
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return violations;
|
|
225
|
+
}
|
|
226
|
+
|
|
164
227
|
/**
|
|
165
228
|
* Validate docs/project-state.md content for handoff + proof-first integrity.
|
|
166
229
|
* @param {string} content
|
|
@@ -215,9 +278,145 @@ function checkStateFile(content) {
|
|
|
215
278
|
}
|
|
216
279
|
}
|
|
217
280
|
|
|
281
|
+
violations.push(...checkRecentChangesIntegrity(content));
|
|
282
|
+
|
|
218
283
|
return violations;
|
|
219
284
|
}
|
|
220
285
|
|
|
286
|
+
// ─── Story Contract Gate (R11 / semantic acceptance) ────────────────
|
|
287
|
+
|
|
288
|
+
const STORY_CONTRACT_PASS = /✅|pass(?:ed)?|proven|verified|reviewed|done|ok/i;
|
|
289
|
+
const STORY_CONTRACT_NOT_PROVEN = /❌|fail(?:ed)?|not[_ -]?proven|not[_ -]?verified|pending|todo|tbd|blank|needs[_ -]?user[_ -]?confirmation|needs[_ -]?confirmation|⬜|🚫|blocked/i;
|
|
290
|
+
const STORY_CONTRACT_ALWAYS = /\balways\b|\bevery\b|\ball\b|항상|모든|전체/i;
|
|
291
|
+
const STORY_CONTRACT_SURFACE = /\b(create|list|get|resolve|update|delete|api|ui|endpoint|route|public\s+surface|return\s+path)\b|생성|목록|조회|해결|수정|삭제|반환면|공개\s*표면/i;
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Semantic Story Contract gate. This is intentionally project-agnostic:
|
|
295
|
+
* the harness does not parse the app domain itself; it requires the agent to
|
|
296
|
+
* write a compact Story Contract table and prove every row before Done.
|
|
297
|
+
*
|
|
298
|
+
* Backward compatibility: existing projects with done Stories but no contract
|
|
299
|
+
* rows receive WARN, not FAIL. Once a contract row exists, an unproven row is
|
|
300
|
+
* blocking.
|
|
301
|
+
*
|
|
302
|
+
* @param {{projectState?: string}|string} input project-state.md content or object
|
|
303
|
+
* @returns {Array}
|
|
304
|
+
*/
|
|
305
|
+
function checkStoryContracts(input = {}) {
|
|
306
|
+
const projectState = typeof input === 'string' ? input : (input.projectState || '');
|
|
307
|
+
const violations = [];
|
|
308
|
+
const visible = stripHtmlComments(projectState);
|
|
309
|
+
const doneStories = parseMarkdownTable(getSection(visible, 'Story Status') || '')
|
|
310
|
+
.filter((row) => /✅\s*done/i.test(rowStatus(row)));
|
|
311
|
+
if (doneStories.length === 0) return violations;
|
|
312
|
+
|
|
313
|
+
const section = getSection(visible, 'Story Contracts');
|
|
314
|
+
if (section === null) {
|
|
315
|
+
for (const story of doneStories) {
|
|
316
|
+
const id = storyIdFromRow(story);
|
|
317
|
+
violations.push({
|
|
318
|
+
check: 'story-contract',
|
|
319
|
+
severity: 'warn',
|
|
320
|
+
line: 0,
|
|
321
|
+
message: `Story ${id || '(unknown)'} is done but project-state.md has no Story Contracts section (R11). Add semantic contract rows for new work.`,
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
return violations;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
const headers = markdownTableHeaders(section);
|
|
328
|
+
const requiredHeaders = ['Story', 'Contract', 'Proof Status'];
|
|
329
|
+
const hasContractTable = headers.length > 0;
|
|
330
|
+
const hasRequiredHeaders = requiredHeaders.every((name) => headers.includes(name));
|
|
331
|
+
if (!hasContractTable || !hasRequiredHeaders) {
|
|
332
|
+
violations.push({
|
|
333
|
+
check: 'story-contract',
|
|
334
|
+
severity: 'error',
|
|
335
|
+
line: 0,
|
|
336
|
+
message: 'Story Contracts table is malformed. Required columns: Story, Contract, Proof Status (R11).',
|
|
337
|
+
});
|
|
338
|
+
return violations;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const rows = parseMarkdownTable(section);
|
|
342
|
+
for (const story of doneStories) {
|
|
343
|
+
const id = storyIdFromRow(story);
|
|
344
|
+
const contracts = rows.filter((row) => {
|
|
345
|
+
const cell = String(row.Story || row['Story ID'] || '');
|
|
346
|
+
return id && cell.includes(id);
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
if (contracts.length === 0) {
|
|
350
|
+
violations.push({
|
|
351
|
+
check: 'story-contract',
|
|
352
|
+
severity: 'warn',
|
|
353
|
+
line: 0,
|
|
354
|
+
message: `Story ${id || '(unknown)'} is done but has no Story Contract rows (R11). Add semantic acceptance contracts for new work.`,
|
|
355
|
+
});
|
|
356
|
+
continue;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
for (const row of contracts) {
|
|
360
|
+
const status = storyContractStatus(row);
|
|
361
|
+
const contract = row.Contract || row['Required Assertion'] || '(unnamed contract)';
|
|
362
|
+
if (!status || STORY_CONTRACT_NOT_PROVEN.test(status) || !STORY_CONTRACT_PASS.test(status)) {
|
|
363
|
+
violations.push({
|
|
364
|
+
check: 'story-contract',
|
|
365
|
+
severity: 'error',
|
|
366
|
+
line: 0,
|
|
367
|
+
message: `Story ${id} is done but Story Contract "${contract}" is not proven (status: ${status || 'blank'}). Prove every contract row before Done (R11).`,
|
|
368
|
+
});
|
|
369
|
+
}
|
|
370
|
+
const assertionText = Object.values(row).filter((v) => typeof v === 'string').join(' ');
|
|
371
|
+
if (STORY_CONTRACT_ALWAYS.test(assertionText)) {
|
|
372
|
+
const surfaceMatches = assertionText.match(new RegExp(STORY_CONTRACT_SURFACE.source, 'gi')) || [];
|
|
373
|
+
const uniqueSurfaces = new Set(surfaceMatches.map((s) => s.toLowerCase()));
|
|
374
|
+
if (uniqueSurfaces.size < 2) {
|
|
375
|
+
violations.push({
|
|
376
|
+
check: 'story-contract',
|
|
377
|
+
severity: 'error',
|
|
378
|
+
line: 0,
|
|
379
|
+
message: `Story ${id} contract "${contract}" uses an always/every assertion but does not enumerate public surfaces (e.g. create/list/get/resolve/API/UI). R16 requires surface-specific proof so one return path cannot drift.`,
|
|
380
|
+
});
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
return violations;
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// ─── Self-Verify Claim Gate (R16) ───────────────────────────────────
|
|
390
|
+
|
|
391
|
+
const CLEAN_SELF_VERIFY_CLAIM = /\b(?:state-check|harness-guard|guard)\b[\s\S]{0,80}\bPASS\b|\b0\s+FAIL\b[\s,;/|]*\b0\s+WARN\b|\b0\s+error\(s\)\b[\s,;/|]*\b0\s+warning\(s\)\b/i;
|
|
392
|
+
|
|
393
|
+
/**
|
|
394
|
+
* Catch the Experiment #6 failure mode: a state file claims state-check/guard
|
|
395
|
+
* passed cleanly while deterministic checks still find errors or warnings.
|
|
396
|
+
*
|
|
397
|
+
* @param {string} content project-state.md
|
|
398
|
+
* @param {Array} deterministicViolations violations found for the same file
|
|
399
|
+
* @returns {Array}
|
|
400
|
+
*/
|
|
401
|
+
function checkSelfVerifyClaim(content, deterministicViolations = []) {
|
|
402
|
+
const visible = stripHtmlComments(content);
|
|
403
|
+
if (!CLEAN_SELF_VERIFY_CLAIM.test(visible)) return [];
|
|
404
|
+
|
|
405
|
+
const relevant = deterministicViolations
|
|
406
|
+
.filter((v) => v.check !== 'self-verify-claim')
|
|
407
|
+
.filter((v) => v.severity === 'error' || v.severity === 'warn');
|
|
408
|
+
if (relevant.length === 0) return [];
|
|
409
|
+
|
|
410
|
+
const errors = relevant.filter((v) => v.severity === 'error').length;
|
|
411
|
+
const warnings = relevant.filter((v) => v.severity === 'warn').length;
|
|
412
|
+
return [{
|
|
413
|
+
check: 'self-verify-claim',
|
|
414
|
+
severity: 'error',
|
|
415
|
+
line: 0,
|
|
416
|
+
message: `State file claims clean self-verify/PASS, but deterministic guard found ${errors} error(s) and ${warnings} warning(s). Paste/fix the real guard output before reporting DONE (R16).`,
|
|
417
|
+
}];
|
|
418
|
+
}
|
|
419
|
+
|
|
221
420
|
// ─── Reviewer Handoff Gate (R3) ──────────────────────────────────────
|
|
222
421
|
|
|
223
422
|
/**
|
|
@@ -385,6 +584,44 @@ function checkStateSync({ projectState = '', features = '', dependencyMap = '' }
|
|
|
385
584
|
return violations;
|
|
386
585
|
}
|
|
387
586
|
|
|
587
|
+
// ─── Scope Split Approval Gate (R14) ────────────────────────────────
|
|
588
|
+
|
|
589
|
+
const STORY_ID_RE = /\bS\d+-\d+\b/g;
|
|
590
|
+
const TRACKER_ROW_RE = /\b(?:FR|KPI|ARB|ARB-FAIL)[-_]?\d+\b/i;
|
|
591
|
+
const SCOPE_SPLIT_APPROVAL_RE = /\bScope split approved\b|범위\s*분할\s*승인|<!--\s*harness-scope-split-approved:/i;
|
|
592
|
+
|
|
593
|
+
/**
|
|
594
|
+
* When a Validation Tracker maps a single FR/KPI/ARB item to multiple Stories,
|
|
595
|
+
* the split is a product decision. Require an explicit approval marker so a
|
|
596
|
+
* model cannot silently shrink or repartition scope.
|
|
597
|
+
*
|
|
598
|
+
* @param {{projectBrief?: string}|string} input project-brief.md content or object
|
|
599
|
+
* @returns {Array}
|
|
600
|
+
*/
|
|
601
|
+
function checkScopeSplitApproval(input = {}) {
|
|
602
|
+
const projectBrief = typeof input === 'string' ? input : (input.projectBrief || '');
|
|
603
|
+
const visible = stripHtmlComments(projectBrief);
|
|
604
|
+
if (!/Validation Tracker|FR Coverage|KPI Coverage|ARB Fail Resolution/i.test(visible)) return [];
|
|
605
|
+
if (SCOPE_SPLIT_APPROVAL_RE.test(projectBrief)) return [];
|
|
606
|
+
|
|
607
|
+
const violations = [];
|
|
608
|
+
const lines = visible.split('\n');
|
|
609
|
+
for (let i = 0; i < lines.length; i++) {
|
|
610
|
+
const line = lines[i];
|
|
611
|
+
if (!TRACKER_ROW_RE.test(line)) continue;
|
|
612
|
+
const storyIds = [...new Set(line.match(STORY_ID_RE) || [])];
|
|
613
|
+
if (storyIds.length > 1) {
|
|
614
|
+
violations.push({
|
|
615
|
+
check: 'scope-split',
|
|
616
|
+
severity: 'error',
|
|
617
|
+
line: i + 1,
|
|
618
|
+
message: `Validation Tracker maps one item to multiple Stories (${storyIds.join(', ')}) without Scope split approved marker (R14). Add "Scope split approved: <reason>" before Done.`,
|
|
619
|
+
});
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
return violations;
|
|
623
|
+
}
|
|
624
|
+
|
|
388
625
|
// ─── Integration / Persistence DoD (R4) ──────────────────────────────
|
|
389
626
|
|
|
390
627
|
// Evidence terms that prove only in-memory/unit behaviour (insufficient alone).
|
|
@@ -392,6 +629,11 @@ const UNIT_ONLY_TERMS = /\bunit\b|단위\s*테스트/i;
|
|
|
392
629
|
// Evidence terms that prove integration / persistence reached real boundaries.
|
|
393
630
|
const INTEGRATION_TERMS = /\bintegration\b|통합|\bpersist|영속|\brow count|적재|\bcontext test|\be2e\b|database|\bdb\b|repository|commit boundary/i;
|
|
394
631
|
|
|
632
|
+
const SMOKE_EVIDENCE_TERMS = /\b(?:ui|browser|smoke|manual)\b|브라우저|수동|화면/i;
|
|
633
|
+
const DURABLE_SMOKE_ARTIFACT_TERMS = /\b(?:screenshot|playwright|cypress|selenium|trace|video|manual checklist|checklist)\b|\.(?:png|jpe?g|webp|mp4)\b/i;
|
|
634
|
+
const SMOKE_URL_TERMS = /https?:\/\/|localhost(?::\d+)?|127\.0\.0\.1(?::\d+)?/i;
|
|
635
|
+
const SMOKE_OBSERVATION_TERMS = /\b(?:observed|rows?|counts?|items?|open|closed|breached|at-risk|normal|columns?|filters?)\b|렌더|확인|컬럼|필터|개\s*open|\d+/i;
|
|
636
|
+
|
|
395
637
|
/**
|
|
396
638
|
* Integration/Persistence DoD: a Story marked "✅ done" must have at least one
|
|
397
639
|
* Proof Ledger row whose evidence indicates integration/persistence — not only
|
|
@@ -429,6 +671,52 @@ function checkIntegrationDoD(content) {
|
|
|
429
671
|
return violations;
|
|
430
672
|
}
|
|
431
673
|
|
|
674
|
+
/**
|
|
675
|
+
* Browser/manual/smoke proof is too easy to fake as prose. If a done Story has
|
|
676
|
+
* a passing UI/manual/smoke Proof Ledger row, require a durable artifact or a
|
|
677
|
+
* URL plus concrete observed UI data.
|
|
678
|
+
*
|
|
679
|
+
* @param {string} content project-state.md
|
|
680
|
+
* @returns {Array}
|
|
681
|
+
*/
|
|
682
|
+
function checkSmokeEvidence(content) {
|
|
683
|
+
const violations = [];
|
|
684
|
+
const visible = stripHtmlComments(content);
|
|
685
|
+
const doneIds = parseMarkdownTable(getSection(visible, 'Story Status') || '')
|
|
686
|
+
.filter((row) => /✅\s*done/i.test(rowStatus(row)))
|
|
687
|
+
.map((row) => storyIdFromRow(row))
|
|
688
|
+
.filter(Boolean);
|
|
689
|
+
if (doneIds.length === 0) return violations;
|
|
690
|
+
|
|
691
|
+
const ledgerRows = parseMarkdownTable(getSection(visible, 'Proof Ledger') || '');
|
|
692
|
+
for (const row of ledgerRows) {
|
|
693
|
+
const values = Object.values(row).filter((v) => typeof v === 'string');
|
|
694
|
+
const raw = values.join(' ');
|
|
695
|
+
const story = row.Story || row['Story ID'] || '';
|
|
696
|
+
const result = row.Result || row.result || raw;
|
|
697
|
+
if (!/(✅|pass)/i.test(result)) continue;
|
|
698
|
+
if (story && !doneIds.some((id) => story.includes(id))) continue;
|
|
699
|
+
if (!SMOKE_EVIDENCE_TERMS.test(raw)) continue;
|
|
700
|
+
|
|
701
|
+
const artifact = row.Artifact || row.artifact || '';
|
|
702
|
+
const command = row['Command / Observation'] || row.Command || row.Observation || '';
|
|
703
|
+
const hasDurableArtifact = artifact.trim() && !/^[-—n/a]*$/i.test(artifact.trim());
|
|
704
|
+
const hasToolProof = DURABLE_SMOKE_ARTIFACT_TERMS.test(raw);
|
|
705
|
+
const hasConcreteManualProof = SMOKE_URL_TERMS.test(command || raw) && SMOKE_OBSERVATION_TERMS.test(command || raw);
|
|
706
|
+
|
|
707
|
+
if (!(hasDurableArtifact || hasToolProof || hasConcreteManualProof)) {
|
|
708
|
+
violations.push({
|
|
709
|
+
check: 'smoke-evidence',
|
|
710
|
+
severity: 'error',
|
|
711
|
+
line: 0,
|
|
712
|
+
message: `Passing UI/manual/smoke proof for Story ${story || '(unknown)'} is not durable. Add screenshot/Playwright artifact, checklist row, or URL plus exact observed UI counts/elements (R13).`,
|
|
713
|
+
});
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
return violations;
|
|
718
|
+
}
|
|
719
|
+
|
|
432
720
|
// ─── Environment Seal Gate (R9) ──────────────────────────────────────
|
|
433
721
|
|
|
434
722
|
/**
|
|
@@ -501,6 +789,154 @@ function checkPublicBoundary(content, filename = '') {
|
|
|
501
789
|
return violations;
|
|
502
790
|
}
|
|
503
791
|
|
|
792
|
+
// ─── Evaluator Artifact Protection (R12) ────────────────────────────
|
|
793
|
+
|
|
794
|
+
const EVALUATOR_ARTIFACT_PATHS = [
|
|
795
|
+
/^experiment\/(?:kode-harness-scorecard|run-card|evaluator-).*\.md$/i,
|
|
796
|
+
/^docs\/experiment\/(?:kode-harness-scorecard|run-card|evaluator-).*\.md$/i,
|
|
797
|
+
/^docs\/evaluator-.*\.md$/i,
|
|
798
|
+
];
|
|
799
|
+
const EVALUATOR_OWNER_MARKER = /<!--\s*harness-owner:\s*evaluator\s*-->/i;
|
|
800
|
+
const EVALUATOR_APPROVAL_MARKER = /<!--\s*harness-edit-approved:\s*[^>]+-->/i;
|
|
801
|
+
|
|
802
|
+
function isEvaluatorArtifact(file = '') {
|
|
803
|
+
const normalized = file.replace(/\\/g, '/');
|
|
804
|
+
return EVALUATOR_ARTIFACT_PATHS.some((re) => re.test(normalized));
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
/**
|
|
808
|
+
* Protect evaluator/run-card/scorecard artifacts from model-under-test rewrites.
|
|
809
|
+
* Ordinary evaluator-looking paths warn. Files explicitly marked
|
|
810
|
+
* `harness-owner: evaluator` fail unless a human approval marker is present.
|
|
811
|
+
*
|
|
812
|
+
* @param {string} content
|
|
813
|
+
* @param {string} [filename]
|
|
814
|
+
* @returns {Array}
|
|
815
|
+
*/
|
|
816
|
+
function checkEvaluatorArtifact(content, filename = '') {
|
|
817
|
+
if (!isEvaluatorArtifact(filename)) return [];
|
|
818
|
+
const owned = EVALUATOR_OWNER_MARKER.test(content);
|
|
819
|
+
const approved = EVALUATOR_APPROVAL_MARKER.test(content);
|
|
820
|
+
if (owned && !approved) {
|
|
821
|
+
return [{
|
|
822
|
+
check: 'evaluator-artifact',
|
|
823
|
+
severity: 'error',
|
|
824
|
+
line: 0,
|
|
825
|
+
message: `${filename}: evaluator-owned artifact changed without explicit approval marker (R12). Add <!-- harness-edit-approved: <reason> --> only when the user explicitly requested this edit.`,
|
|
826
|
+
}];
|
|
827
|
+
}
|
|
828
|
+
if (owned && approved) return [];
|
|
829
|
+
return [{
|
|
830
|
+
check: 'evaluator-artifact',
|
|
831
|
+
severity: 'warn',
|
|
832
|
+
line: 0,
|
|
833
|
+
message: `${filename}: evaluator/run-card artifact changed. Confirm the user explicitly requested this edit (R12).`,
|
|
834
|
+
}];
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
// ─── Reviewer Audit Evidence Gate (R16) ─────────────────────────────
|
|
838
|
+
|
|
839
|
+
const REVIEWER_AUDIT_MODULE_LINE = /^\s*[-*]?\s*(?:\*\*)?(?:Verified modules|Verified imports|Dependencies verified|검증(?:된)?\s*(?:모듈|의존성)|확인(?:된)?\s*(?:모듈|의존성))(?:\*\*)?\s*:/im;
|
|
840
|
+
const REVIEWER_AUDIT_IGNORE = new Set([
|
|
841
|
+
'project',
|
|
842
|
+
'project-local',
|
|
843
|
+
'local',
|
|
844
|
+
'internal',
|
|
845
|
+
'built-in',
|
|
846
|
+
'builtin',
|
|
847
|
+
'node',
|
|
848
|
+
'none',
|
|
849
|
+
'n/a',
|
|
850
|
+
]);
|
|
851
|
+
|
|
852
|
+
function packageNamesFromJson(packageJson = '') {
|
|
853
|
+
if (!packageJson.trim()) return new Set();
|
|
854
|
+
try {
|
|
855
|
+
const pkg = JSON.parse(packageJson);
|
|
856
|
+
return new Set([
|
|
857
|
+
...Object.keys(pkg.dependencies || {}),
|
|
858
|
+
...Object.keys(pkg.devDependencies || {}),
|
|
859
|
+
...Object.keys(pkg.peerDependencies || {}),
|
|
860
|
+
...Object.keys(pkg.optionalDependencies || {}),
|
|
861
|
+
]);
|
|
862
|
+
} catch {
|
|
863
|
+
return new Set();
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
function moduleNamesFromSourceFiles(sourceFiles = []) {
|
|
868
|
+
const names = new Set();
|
|
869
|
+
for (const { file = '', content = '' } of sourceFiles) {
|
|
870
|
+
const base = file.split(/[\\/]/).pop() || '';
|
|
871
|
+
if (base.includes('.')) names.add(base.replace(/\.[^.]+$/, ''));
|
|
872
|
+
const requireRe = /\brequire\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
873
|
+
const importRe = /\bfrom\s+['"]([^'"]+)['"]/g;
|
|
874
|
+
for (const re of [requireRe, importRe]) {
|
|
875
|
+
let match;
|
|
876
|
+
while ((match = re.exec(content)) !== null) {
|
|
877
|
+
const spec = match[1];
|
|
878
|
+
if (spec.startsWith('.')) {
|
|
879
|
+
const local = spec.split('/').pop();
|
|
880
|
+
if (local) names.add(local.replace(/\.[^.]+$/, ''));
|
|
881
|
+
continue;
|
|
882
|
+
}
|
|
883
|
+
names.add(spec.split('/')[0]);
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
return names;
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
function extractAuditModuleNames(content) {
|
|
891
|
+
const names = new Set();
|
|
892
|
+
const lines = content.split('\n');
|
|
893
|
+
for (const line of lines) {
|
|
894
|
+
if (!REVIEWER_AUDIT_MODULE_LINE.test(line)) continue;
|
|
895
|
+
const codeNames = [...line.matchAll(/`([^`]+)`/g)].map((m) => m[1]);
|
|
896
|
+
const source = codeNames.length > 0 ? codeNames.join(',') : line.split(':').slice(1).join(':');
|
|
897
|
+
for (const raw of source.split(/[,\s/]+/)) {
|
|
898
|
+
const token = raw.replace(/^[`"'(*\-\s]+|[`"',.)*\s]+$/g, '');
|
|
899
|
+
if (!token || token.length < 2) continue;
|
|
900
|
+
if (!/^[A-Za-z][A-Za-z0-9_.:-]*$/.test(token)) continue;
|
|
901
|
+
if (REVIEWER_AUDIT_IGNORE.has(token.toLowerCase())) continue;
|
|
902
|
+
names.add(token.replace(/^node:/, ''));
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
return [...names];
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
/**
|
|
909
|
+
* Reviewer scope audits are allowed to be judgmental, but dependency evidence
|
|
910
|
+
* must be grounded in package.json or actual require/import lines. This catches
|
|
911
|
+
* hallucinated modules such as "express" in a Node http-only project.
|
|
912
|
+
*
|
|
913
|
+
* @param {string} content reviewer.md
|
|
914
|
+
* @param {{packageJson?: string, sourceFiles?: Array<{file:string, content:string}>, filename?: string}} context
|
|
915
|
+
* @returns {Array}
|
|
916
|
+
*/
|
|
917
|
+
function checkReviewerAuditEvidence(content, { packageJson = '', sourceFiles = [], filename = '' } = {}) {
|
|
918
|
+
const asserted = extractAuditModuleNames(content);
|
|
919
|
+
if (asserted.length === 0) return [];
|
|
920
|
+
|
|
921
|
+
const builtins = new Set(require('module').builtinModules.map((name) => name.replace(/^node:/, '')));
|
|
922
|
+
const deps = packageNamesFromJson(packageJson);
|
|
923
|
+
const sourceNames = moduleNamesFromSourceFiles(sourceFiles);
|
|
924
|
+
const allowed = new Set([...builtins, ...deps, ...sourceNames]);
|
|
925
|
+
|
|
926
|
+
const violations = [];
|
|
927
|
+
for (const name of asserted) {
|
|
928
|
+
const normalized = name.replace(/^node:/, '');
|
|
929
|
+
if (allowed.has(normalized)) continue;
|
|
930
|
+
violations.push({
|
|
931
|
+
check: 'reviewer-audit',
|
|
932
|
+
severity: 'error',
|
|
933
|
+
line: 0,
|
|
934
|
+
message: `${filename ? filename + ': ' : ''}reviewer audit cites "${name}" as a verified module/dependency, but it is not in package.json or actual require/import lines (R16). Cite exact evidence or remove it.`,
|
|
935
|
+
});
|
|
936
|
+
}
|
|
937
|
+
return violations;
|
|
938
|
+
}
|
|
939
|
+
|
|
504
940
|
// ─── Markdown lint (R6 / L3-8) ───────────────────────────────────────
|
|
505
941
|
|
|
506
942
|
/**
|
|
@@ -640,6 +1076,14 @@ function isStateMarkdownFile(file) {
|
|
|
640
1076
|
return /(?:^|\/)(?:docs|\.harness)\/(?:project-state|features|dependency-map|project-brief|failure-patterns)\.md$/.test(file);
|
|
641
1077
|
}
|
|
642
1078
|
|
|
1079
|
+
function isProjectBriefFile(file) {
|
|
1080
|
+
return /(?:^|\/)(?:docs|\.harness)\/project-brief\.md$/.test(file);
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
function isReviewerMemoryFile(file) {
|
|
1084
|
+
return /(?:^|\/)(?:docs|\.harness)\/agent-memory\/reviewer\.md$/.test(file);
|
|
1085
|
+
}
|
|
1086
|
+
|
|
643
1087
|
function isScannableForSecrets(file) {
|
|
644
1088
|
return /\.(js|ts|jsx|tsx|json|jsonc|ya?ml|env|sh|py|java|md|properties|toml)$/i.test(file)
|
|
645
1089
|
&& !/\.lock$/.test(file);
|
|
@@ -650,6 +1094,35 @@ function isPublicPackageFile(file) {
|
|
|
650
1094
|
return PUBLIC_PACKAGE_PATHS.some((re) => re.test(normalized));
|
|
651
1095
|
}
|
|
652
1096
|
|
|
1097
|
+
function sourceFilesForAudit(cwd) {
|
|
1098
|
+
const files = [];
|
|
1099
|
+
const roots = ['src', 'lib', 'app', 'server.js', 'index.js'];
|
|
1100
|
+
const addFile = (rel) => {
|
|
1101
|
+
const abs = path.join(cwd, rel);
|
|
1102
|
+
if (!fs.existsSync(abs) || !fs.statSync(abs).isFile()) return;
|
|
1103
|
+
if (!/\.(?:js|mjs|cjs|ts|tsx|jsx)$/.test(rel)) return;
|
|
1104
|
+
files.push({ file: rel, content: fs.readFileSync(abs, 'utf8') });
|
|
1105
|
+
};
|
|
1106
|
+
const walkSource = (relDir) => {
|
|
1107
|
+
const absDir = path.join(cwd, relDir);
|
|
1108
|
+
if (!fs.existsSync(absDir) || !fs.statSync(absDir).isDirectory()) return;
|
|
1109
|
+
for (const name of fs.readdirSync(absDir)) {
|
|
1110
|
+
if (name.startsWith('.') || name === 'node_modules') continue;
|
|
1111
|
+
const rel = path.join(relDir, name);
|
|
1112
|
+
const abs = path.join(cwd, rel);
|
|
1113
|
+
if (fs.statSync(abs).isDirectory()) walkSource(rel);
|
|
1114
|
+
else addFile(rel);
|
|
1115
|
+
}
|
|
1116
|
+
};
|
|
1117
|
+
for (const root of roots) {
|
|
1118
|
+
const abs = path.join(cwd, root);
|
|
1119
|
+
if (!fs.existsSync(abs)) continue;
|
|
1120
|
+
if (fs.statSync(abs).isDirectory()) walkSource(root);
|
|
1121
|
+
else addFile(root);
|
|
1122
|
+
}
|
|
1123
|
+
return files;
|
|
1124
|
+
}
|
|
1125
|
+
|
|
653
1126
|
/**
|
|
654
1127
|
* Run all guard checks over a set of files.
|
|
655
1128
|
* @param {{files: string[], cwd?: string}} opts
|
|
@@ -665,6 +1138,7 @@ function runGuard({ files, cwd = process.cwd() }) {
|
|
|
665
1138
|
const content = fs.readFileSync(abs, 'utf8');
|
|
666
1139
|
const rel = path.relative(cwd, abs);
|
|
667
1140
|
scanned++;
|
|
1141
|
+
const beforeFile = all.length;
|
|
668
1142
|
|
|
669
1143
|
if (isScannableForSecrets(file)) {
|
|
670
1144
|
all.push(...scanSecrets(content, rel));
|
|
@@ -672,6 +1146,7 @@ function runGuard({ files, cwd = process.cwd() }) {
|
|
|
672
1146
|
if (isPublicPackageFile(rel)) {
|
|
673
1147
|
all.push(...checkPublicBoundary(content, rel));
|
|
674
1148
|
}
|
|
1149
|
+
all.push(...checkEvaluatorArtifact(content, rel));
|
|
675
1150
|
if (file.endsWith('.md') && isStateMarkdownFile(file)) {
|
|
676
1151
|
all.push(...lintMarkdownTables(content, rel));
|
|
677
1152
|
}
|
|
@@ -682,11 +1157,25 @@ function runGuard({ files, cwd = process.cwd() }) {
|
|
|
682
1157
|
const base = path.basename(file);
|
|
683
1158
|
all.push(...checkStateFile(content));
|
|
684
1159
|
all.push(...checkReviewerHandoff(content));
|
|
1160
|
+
all.push(...checkStoryContracts({ projectState: content }));
|
|
685
1161
|
all.push(...checkIntegrationDoD(content));
|
|
1162
|
+
all.push(...checkSmokeEvidence(content));
|
|
686
1163
|
all.push(...checkEnvSeal(content));
|
|
687
1164
|
if (STATE_LINE_LIMITS[base]) {
|
|
688
1165
|
all.push(...lintLineLimit(content, STATE_LINE_LIMITS[base], rel));
|
|
689
1166
|
}
|
|
1167
|
+
all.push(...checkSelfVerifyClaim(content, all.slice(beforeFile)));
|
|
1168
|
+
}
|
|
1169
|
+
if (isProjectBriefFile(file)) {
|
|
1170
|
+
all.push(...checkScopeSplitApproval({ projectBrief: content }));
|
|
1171
|
+
}
|
|
1172
|
+
if (isReviewerMemoryFile(file)) {
|
|
1173
|
+
const pkgPath = path.join(cwd, 'package.json');
|
|
1174
|
+
all.push(...checkReviewerAuditEvidence(content, {
|
|
1175
|
+
filename: rel,
|
|
1176
|
+
packageJson: fs.existsSync(pkgPath) ? fs.readFileSync(pkgPath, 'utf8') : '',
|
|
1177
|
+
sourceFiles: sourceFilesForAudit(cwd),
|
|
1178
|
+
}));
|
|
690
1179
|
}
|
|
691
1180
|
}
|
|
692
1181
|
|
|
@@ -699,11 +1188,18 @@ module.exports = {
|
|
|
699
1188
|
scanSecrets,
|
|
700
1189
|
checkStateFile,
|
|
701
1190
|
checkReviewerHandoff,
|
|
1191
|
+
checkStoryContracts,
|
|
702
1192
|
checkLearnCompletion,
|
|
703
1193
|
checkStateSync,
|
|
1194
|
+
checkScopeSplitApproval,
|
|
1195
|
+
checkRecentChangesIntegrity,
|
|
1196
|
+
checkSelfVerifyClaim,
|
|
704
1197
|
checkIntegrationDoD,
|
|
1198
|
+
checkSmokeEvidence,
|
|
705
1199
|
checkEnvSeal,
|
|
706
1200
|
checkPublicBoundary,
|
|
1201
|
+
checkEvaluatorArtifact,
|
|
1202
|
+
checkReviewerAuditEvidence,
|
|
707
1203
|
lintMarkdownTables,
|
|
708
1204
|
lintLineLimit,
|
|
709
1205
|
checkInstructionBudget,
|