roadmapsmith 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,32 +6,86 @@ const { walkFiles, detectTestFrameworks } = require('../io');
6
6
  const { collectPluginContributions } = require('../config');
7
7
  const { escapeRegExp, tokenize } = require('../utils');
8
8
 
9
+ const CONFIDENCE_RANK = { low: 0, medium: 1, high: 2 };
10
+
9
11
  const CODE_EXTENSIONS = new Set([
10
12
  '.js', '.cjs', '.mjs', '.ts', '.tsx', '.jsx', '.py', '.go', '.rs', '.java', '.kt', '.swift', '.rb', '.php', '.cs'
11
13
  ]);
12
14
 
13
- const DOC_HINTS = ['readme', 'changelog', 'docs', 'documentation', 'spec', 'diagram', 'runbook'];
15
+ // "docs" omitted from DOC_HINTS it is a path prefix in scan tasks, not a doc-authoring keyword.
16
+ const DOC_HINTS = ['readme', 'changelog', 'documentation', 'spec', 'diagram', 'runbook'];
14
17
  const CODE_HINTS = ['implement', 'add', 'create', 'build', 'refactor', 'fix', 'module', 'function', 'api', 'endpoint', 'command'];
15
18
  const GENERIC_TASK_TOKENS = new Set([
16
- 'implement',
17
- 'implementation',
18
- 'module',
19
- 'function',
20
- 'class',
21
- 'method',
22
- 'command',
23
- 'create',
24
- 'add',
25
- 'build',
26
- 'refactor',
27
- 'fix',
28
- 'test',
29
- 'tests'
19
+ // Action verbs too broad to be evidence signals
20
+ 'implement', 'implementation', 'create', 'add', 'build', 'refactor', 'fix',
21
+ 'detect', 'detection', 'support', 'handle', 'handler', 'update', 'check', 'run',
22
+ 'process', 'processing', 'generate', 'generation', 'format', 'report',
23
+ // Structural concepts shared by every codebase
24
+ 'module', 'function', 'class', 'method', 'command', 'type', 'value', 'values',
25
+ 'output', 'input', 'data',
26
+ // Test vocabulary
27
+ 'test', 'tests',
28
+ // Infrastructure names present in nearly every Node/JS project
29
+ 'config', 'configuration', 'package', 'json', 'project', 'roadmap',
30
+ // Domain words specific to this tool that appear in non-feature source files
31
+ 'confidence', 'profile', 'validation', 'evidence',
32
+ // Package/module field names that appear naturally in any Node.js generator or config file
33
+ 'main', 'exports', 'files', 'fields', 'without', 'field',
34
+ // Terminology used in architecture/detection task descriptions that overlaps with source identifiers
35
+ 'signals', 'directory', 'directories', 'headers', 'site', 'shebang',
36
+ // Common directory names that appear in import paths — too generic for evidence
37
+ 'src', 'lib',
38
+ // Broad task-description verbs and nouns that pollute evidence matching across every codebase
39
+ 'task', 'tasks', 'file', 'source', 'code', 'artifact', 'artifacts',
40
+ 'generic', 'feature', 'features', 'section', 'sections',
41
+ 'user', 'users', 'workflow', 'workflows', 'mode', 'modes', 'replace',
42
+ // Tool-internal vocabulary that appears in non-feature implementation files
43
+ 'audit', 'debug', 'signal', 'signals', 'log',
44
+ // English stopwords and function words that appear everywhere — not useful as evidence signals
45
+ 'only', 'must', 'what', 'which', 'kind', 'never', 'also', 'each',
46
+ 'detected', 'generated', 'existing', 'available',
47
+ // Tool-commentary vocabulary that appears in source comments but describes past/intended behavior
48
+ 'phrases', 'conceptual',
30
49
  ]);
31
50
 
51
+ const CANONICAL_FILES = {
52
+ security: 'SECURITY.md',
53
+ readme: 'README.md',
54
+ changelog: 'CHANGELOG.md',
55
+ license: 'LICENSE'
56
+ };
57
+
58
+ // The roadmap file must never be included in the evidence pool: its task descriptions
59
+ // contain the exact vocabulary of the tasks being validated, which would cause every
60
+ // task to validate itself.
61
+ const SELF_REFERENTIAL_FILES = new Set(['ROADMAP.md']);
62
+
63
+ // Maps task-ID namespace prefix to a predicate on (normalized) file paths.
64
+ // When a task ID has a known namespace, at least one evidence file must satisfy
65
+ // the predicate — otherwise generic token overlap alone cannot pass the task.
66
+ const NAMESPACE_STRUCTURAL_PATTERNS = {
67
+ cls: (p) => /classif(?:ier|y)|archetype/.test(p),
68
+ dsg: (p) => /generator[/\\](?:domain|web|landing|profiles?)|(?:domain|web|landing)[/\\](?:profile|generator)/.test(p),
69
+ evh2: (p) => p.includes('/validator/') || p.includes('\\validator\\'),
70
+ cst: (p) => /smoke|integration[-_]test|e2e/.test(p),
71
+ uxf: (p) => p.includes('/renderer/') || p.includes('\\renderer\\') || /renderer\.[jt]sx?$/.test(p),
72
+ cfgo: (p) => /config[/\\]|schema[/\\]|config\.[jt]s$|schema\.[jt]s$/.test(p),
73
+ doc3: (p) => /(?:^|[/\\])docs[/\\]|readme\.md$/i.test(p),
74
+ };
75
+
76
+ // Test fixture directories contain synthetic code created to drive test scenarios,
77
+ // not real implementations. Including them pollutes the evidence pool with vocabulary
78
+ // that was deliberately seeded for testing purposes (e.g. namespace-vocab fixtures).
79
+ function isFixturePath(relativePath) {
80
+ return /(?:^|[/\\])fixtures[/\\]/.test(relativePath);
81
+ }
82
+
32
83
  function readFileIndex(projectRoot, files) {
33
84
  const index = [];
34
85
  for (const relativePath of files) {
86
+ if (SELF_REFERENTIAL_FILES.has(relativePath)) continue;
87
+ if (isFixturePath(relativePath)) continue;
88
+
35
89
  const absolutePath = path.resolve(projectRoot, relativePath);
36
90
  const ext = path.extname(relativePath).toLowerCase();
37
91
  let content = '';
@@ -70,10 +124,31 @@ function isLikelyPath(token) {
70
124
  if (/^\.{1,2}\/|^\//.test(token)) return true;
71
125
  if (hasFileExtension(token)) return true;
72
126
  if (KNOWN_PATH_ROOTS.some((root) => token.startsWith(root))) return true;
73
- if ((token.match(/\//g) || []).length >= 2) return true;
127
+ // The ">= 2 slashes" rule was intentionally removed: it caused conceptual slash phrases
128
+ // like "code/test/artifact" or "build/test/deploy" to be treated as file paths.
129
+ // Real multi-segment paths are caught by the extension or known-root rules above.
74
130
  return false;
75
131
  }
76
132
 
133
+ // Matches standalone filenames without a slash — e.g. "roadmap-skill.config.json",
134
+ // "package.json", "vite.config.ts". These are path references whose component tokens
135
+ // (e.g. "roadmap", "skill") must be excluded from code evidence scoring to prevent
136
+ // circular vocabulary: a task mentioning a filename would otherwise score hits in any
137
+ // source file that happens to reference the same filename for unrelated reasons.
138
+ // Numeric-only tokens like "1.0.0" or "v0.8" are excluded via the leading-digit guard.
139
+ const STANDALONE_FILE_RE = /\b([A-Za-z][A-Za-z0-9_.+-]*\.[A-Za-z0-9]{2,10})\b/g;
140
+ const KNOWN_FILE_EXTENSIONS = new Set([
141
+ '.js', '.cjs', '.mjs', '.ts', '.tsx', '.jsx', '.py', '.go', '.rs',
142
+ '.java', '.kt', '.swift', '.rb', '.php', '.cs', '.json', '.yaml', '.yml',
143
+ '.toml', '.md', '.txt', '.sh', '.bash', '.env', '.html', '.css', '.scss', '.lock'
144
+ ]);
145
+
146
+ function hasKnownFileExtension(token) {
147
+ const lastDot = token.lastIndexOf('.');
148
+ if (lastDot < 0) return false;
149
+ return KNOWN_FILE_EXTENSIONS.has(token.slice(lastDot).toLowerCase());
150
+ }
151
+
77
152
  function extractExplicitPaths(text) {
78
153
  const results = new Set();
79
154
  const quoted = String(text).match(/`([^`]+)`/g) || [];
@@ -93,6 +168,25 @@ function extractExplicitPaths(text) {
93
168
  return Array.from(results).sort((left, right) => left.localeCompare(right));
94
169
  }
95
170
 
171
+ // Standalone filenames (no slash) mentioned in task prose — e.g. "roadmap-skill.config.json",
172
+ // "package.json". These are filename *references*, NOT path-existence assertions: the author
173
+ // is describing which file contains a feature, not asserting that the file must exist.
174
+ // Used only for pathDerivedToken extraction (to prevent circular vocabulary), never for
175
+ // findFilesByPathHints (which would pass any task whose config file already exists).
176
+ function extractStandaloneFilenames(text) {
177
+ const results = new Set();
178
+ STANDALONE_FILE_RE.lastIndex = 0;
179
+ let m = STANDALONE_FILE_RE.exec(String(text));
180
+ while (m) {
181
+ const token = m[1].replace(/[.,;:!?)]+$/, '');
182
+ if (hasKnownFileExtension(token) && !token.startsWith('.')) {
183
+ results.add(token);
184
+ }
185
+ m = STANDALONE_FILE_RE.exec(String(text));
186
+ }
187
+ return Array.from(results);
188
+ }
189
+
96
190
  function extractSymbolHints(text) {
97
191
  const symbols = new Set();
98
192
  const patterns = [
@@ -119,7 +213,12 @@ function isCodeTask(taskText) {
119
213
 
120
214
  function isDocTask(taskText) {
121
215
  const normalized = String(taskText).toLowerCase();
122
- return DOC_HINTS.some((hint) => normalized.includes(hint));
216
+ // Use word-boundary matching to avoid substring false positives (e.g. "specific" ≠ "spec").
217
+ const hasDocKeyword = DOC_HINTS.some((hint) => new RegExp(`(?<![a-z])${hint}(?![a-z])`).test(normalized));
218
+ if (!hasDocKeyword) return false;
219
+ // Also require a creation/update verb so that policy tasks mentioning doc files
220
+ // ("README must not be used as evidence") don't trigger doc-artifact evidence.
221
+ return /\b(add|create|write|update|init|initialize|introduce|setup|document)\b/.test(normalized);
123
222
  }
124
223
 
125
224
  function findFilesByPathHints(pathHints, fileIndex) {
@@ -157,9 +256,31 @@ function findFilesBySymbols(symbolHints, fileIndex) {
157
256
  return Array.from(matches).sort((left, right) => left.localeCompare(right));
158
257
  }
159
258
 
160
- function findCodeEvidence(taskText, fileIndex) {
259
+ // Tokens extracted from a referenced file path (e.g. "roadmap-skill" from
260
+ // "roadmap-skill.config.json") must not be reused as code evidence signals.
261
+ // Those tokens appear in any file that mentions the same path — creating circular
262
+ // vocabulary where a task about "X in path/to/file" passes because the source
263
+ // code references the same path for unrelated reasons.
264
+ function extractPathDerivedTokens(pathHints) {
265
+ const tokens = new Set();
266
+ for (const hint of pathHints) {
267
+ // Char-split: "roadmap-skill.config.json" → ["roadmap", "skill", "config", "json"]
268
+ const parts = hint.replace(/[.\-_/\\]/g, ' ').toLowerCase().split(/\s+/).filter(Boolean);
269
+ for (const part of parts) {
270
+ if (part.length >= 3) tokens.add(part);
271
+ }
272
+ // Tokenizer-split: also adds compound tokens the char-split misses, e.g. "roadmap-skill"
273
+ // (the tokenizer preserves hyphens in identifiers; the char-split strips them).
274
+ for (const token of tokenize(hint)) {
275
+ if (token.length >= 3) tokens.add(token);
276
+ }
277
+ }
278
+ return tokens;
279
+ }
280
+
281
+ function findCodeEvidence(taskText, fileIndex, pathDerivedTokens = new Set()) {
161
282
  const tokens = tokenize(taskText)
162
- .filter((token) => token.length >= 3 && !GENERIC_TASK_TOKENS.has(token))
283
+ .filter((token) => token.length >= 3 && !GENERIC_TASK_TOKENS.has(token) && !token.endsWith('/') && !pathDerivedTokens.has(token))
163
284
  .slice(0, 8);
164
285
  if (tokens.length === 0) {
165
286
  return [];
@@ -182,7 +303,9 @@ function findCodeEvidence(taskText, fileIndex) {
182
303
  }
183
304
  }
184
305
 
185
- const threshold = tokens.length === 1 ? 1 : 2;
306
+ // Require more matches proportional to how many specific tokens the task has.
307
+ // Tasks with 4+ meaningful tokens need 3 files to match to prevent vocabulary overlap.
308
+ const threshold = tokens.length >= 4 ? 3 : tokens.length >= 2 ? 2 : 1;
186
309
  if (score >= threshold) {
187
310
  matches.push(file.relativePath);
188
311
  }
@@ -193,18 +316,46 @@ function findCodeEvidence(taskText, fileIndex) {
193
316
 
194
317
  function findTestEvidence(taskText, fileIndex) {
195
318
  const tokens = tokenize(taskText)
196
- .filter((token) => token.length >= 3 && !GENERIC_TASK_TOKENS.has(token))
319
+ .filter((token) => token.length >= 3 && !GENERIC_TASK_TOKENS.has(token) && !token.endsWith('/'))
197
320
  .slice(0, 8);
321
+
322
+ if (tokens.length === 0) return [];
323
+
324
+ // Only tokens of length >= 4 are used for import-reference matching.
325
+ // Very short tokens (e.g. "app", "web") are too generic: they appear as substrings in
326
+ // many import paths that have nothing to do with the feature being validated.
327
+ // The single-short-token fallback below handles the narrow case of one-word module names.
328
+ const importTokens = tokens.filter((token) => token.length >= 4);
329
+
198
330
  const matches = [];
199
331
 
200
332
  for (const file of fileIndex) {
201
- if (!file.isTestFile) {
333
+ if (!file.isTestFile) continue;
334
+
335
+ // A test file counts as evidence only when it imports a module whose path contains
336
+ // one of the task's meaningful tokens. Content-keyword matching is intentionally absent:
337
+ // test content (descriptions, literals) can contain future-task vocabulary,
338
+ // producing self-referential false positives.
339
+ //
340
+ // Trailing slashes are NOT stripped: "app/" is a directory reference, not a module name.
341
+ // "../src/app" (a real import) does not contain the string "app/" so it won't match.
342
+ const importRefs = (
343
+ file.content.match(/require\s*\(\s*['"`]([^'"`]+)['"`]\s*\)|from\s+['"`]([^'"`]+)['"`]/g) || []
344
+ ).join(' ').toLowerCase();
345
+
346
+ if (importTokens.length > 0 && importTokens.some((token) => importRefs.includes(token))) {
347
+ matches.push(file.relativePath);
202
348
  continue;
203
349
  }
204
- const lowered = file.content.toLowerCase();
205
- const hasMatch = tokens.some((token) => lowered.includes(token));
206
- if (hasMatch) {
207
- matches.push(file.relativePath);
350
+
351
+ // Narrow fallback: single very-short token (e.g. "app", "cli").
352
+ // Import paths for these are too short to distinguish reliably, so fall back to a
353
+ // content match — but only when there is exactly one such token (no multi-token dilution).
354
+ if (tokens.length === 1 && tokens[0].length < 4) {
355
+ const lowered = file.content.toLowerCase();
356
+ if (lowered.includes(tokens[0])) {
357
+ matches.push(file.relativePath);
358
+ }
208
359
  }
209
360
  }
210
361
 
@@ -213,10 +364,30 @@ function findTestEvidence(taskText, fileIndex) {
213
364
 
214
365
  function findArtifactEvidence(taskText, fileIndex) {
215
366
  const normalized = String(taskText).toLowerCase();
216
- const matches = [];
367
+ const files = [];
368
+ const heuristicArtifacts = [];
369
+
370
+ // Canonical file detection only applies to short tasks (≤8 words) that are about
371
+ // creating or referencing that specific file. Long sentences that merely MENTION
372
+ // "readme" or "security" in a policy/constraint context are excluded.
373
+ const wordCount = normalized.trim().split(/\s+/).length;
374
+ if (wordCount <= 8) {
375
+ for (const [keyword, filename] of Object.entries(CANONICAL_FILES)) {
376
+ // Use hyphen-aware word boundaries: "security-headers" must not match "security".
377
+ if (new RegExp(`(?<![a-z-])${keyword}(?![a-z-])`).test(normalized)) {
378
+ const hit = fileIndex.find(
379
+ (f) => f.relativePath === filename || f.relativePath.endsWith('/' + filename)
380
+ );
381
+ if (hit) {
382
+ files.push(hit.relativePath);
383
+ heuristicArtifacts.push(hit.relativePath);
384
+ }
385
+ }
386
+ }
387
+ }
217
388
 
218
- if (!isDocTask(taskText) && !normalized.includes('artifact') && !normalized.includes('release')) {
219
- return matches;
389
+ if (!isDocTask(taskText)) {
390
+ return { files, heuristicArtifacts };
220
391
  }
221
392
 
222
393
  const artifactPatterns = [
@@ -229,12 +400,88 @@ function findArtifactEvidence(taskText, fileIndex) {
229
400
  ];
230
401
 
231
402
  for (const file of fileIndex) {
232
- if (artifactPatterns.some((pattern) => pattern.test(file.relativePath))) {
233
- matches.push(file.relativePath);
403
+ if (artifactPatterns.some((pattern) => pattern.test(file.relativePath)) && !files.includes(file.relativePath)) {
404
+ files.push(file.relativePath);
234
405
  }
235
406
  }
236
407
 
237
- return matches.slice(0, 20);
408
+ return { files: files.slice(0, 20), heuristicArtifacts };
409
+ }
410
+
411
+ function extractTaskNamespace(taskId) {
412
+ if (!taskId) return null;
413
+ const match = String(taskId).match(/^([a-z][a-z0-9]*)-/);
414
+ return match ? match[1] : null;
415
+ }
416
+
417
+ function isAcceptanceCriteria(taskId) {
418
+ return /ph\d+[_-]st\d+[_-]exit/.test(String(taskId || ''));
419
+ }
420
+
421
+ // Gate: returns { applicable, passed, structuralFiles, reason }.
422
+ // For namespaces with a defined structural pattern:
423
+ // 1. If no files in fileIndex match the pattern → immediate fail.
424
+ // 2. For acceptance-criteria tasks (phN-stN-exit IDs): path match alone is enough.
425
+ // 3. For implementation tasks: feature tokens from task text must score ≥ ceil(n/2)
426
+ // against namespace-matched files, preventing vocabulary overlap from generic
427
+ // infrastructure code (io.js, generator/index.js) from serving as evidence.
428
+ function checkNamespaceStructuralEvidence(taskId, taskText, fileIndex) {
429
+ const namespace = extractTaskNamespace(taskId);
430
+ if (!namespace || !NAMESPACE_STRUCTURAL_PATTERNS[namespace]) {
431
+ return { applicable: false, passed: true, structuralFiles: [], reason: null };
432
+ }
433
+
434
+ const predicate = NAMESPACE_STRUCTURAL_PATTERNS[namespace];
435
+ const namespaceFiles = fileIndex.filter((f) => predicate(f.relativePath));
436
+
437
+ if (namespaceFiles.length === 0) {
438
+ return {
439
+ applicable: true,
440
+ passed: false,
441
+ structuralFiles: [],
442
+ reason: `namespace "${namespace}" has no implementation files`,
443
+ };
444
+ }
445
+
446
+ const featureTokens = tokenize(taskText)
447
+ .filter((t) => t.length >= 4 && !GENERIC_TASK_TOKENS.has(t) && !t.endsWith('/'))
448
+ .slice(0, 8);
449
+
450
+ if (featureTokens.length === 0) {
451
+ return {
452
+ applicable: true,
453
+ passed: true,
454
+ structuralFiles: namespaceFiles.map((f) => f.relativePath),
455
+ reason: null,
456
+ };
457
+ }
458
+
459
+ let bestScore = 0;
460
+ for (const nsFile of namespaceFiles) {
461
+ const lowered = nsFile.content.toLowerCase();
462
+ let score = 0;
463
+ for (const token of featureTokens) {
464
+ if (lowered.includes(token)) score++;
465
+ }
466
+ if (score > bestScore) bestScore = score;
467
+ }
468
+
469
+ const threshold = Math.max(1, Math.ceil(featureTokens.length / 2));
470
+ if (bestScore >= threshold) {
471
+ return {
472
+ applicable: true,
473
+ passed: true,
474
+ structuralFiles: namespaceFiles.map((f) => f.relativePath),
475
+ reason: null,
476
+ };
477
+ }
478
+
479
+ return {
480
+ applicable: true,
481
+ passed: false,
482
+ structuralFiles: namespaceFiles.map((f) => f.relativePath),
483
+ reason: `structural token score ${bestScore}/${threshold} in "${namespace}" files — token overlap insufficient`,
484
+ };
238
485
  }
239
486
 
240
487
  function evaluateRule(rule, task, context) {
@@ -320,13 +567,20 @@ function buildValidationContext(projectRoot, config, plugins) {
320
567
 
321
568
  function validateTask(task, context, config, plugins) {
322
569
  const pathHints = extractExplicitPaths(task.text);
570
+ const standaloneFilenames = extractStandaloneFilenames(task.text);
323
571
  const symbolHints = extractSymbolHints(task.text);
324
572
 
325
573
  const filesFromPaths = findFilesByPathHints(pathHints, context.fileIndex);
326
574
  const filesFromSymbols = findFilesBySymbols(symbolHints, context.fileIndex);
327
- const filesFromCode = findCodeEvidence(task.text, context.fileIndex);
575
+ // Combine path hints AND standalone filenames for token exclusion so that tokens
576
+ // derived from any referenced filename (e.g. "roadmap-skill" from
577
+ // "roadmap-skill.config.json") are excluded from code evidence scoring.
578
+ const pathDerivedTokens = extractPathDerivedTokens([...pathHints, ...standaloneFilenames]);
579
+ const filesFromCode = findCodeEvidence(task.text, context.fileIndex, pathDerivedTokens);
328
580
  const filesFromTests = findTestEvidence(task.text, context.fileIndex);
329
- const filesFromArtifacts = findArtifactEvidence(task.text, context.fileIndex);
581
+ const { files: filesFromArtifacts, heuristicArtifacts } = findArtifactEvidence(task.text, context.fileIndex);
582
+
583
+ const structuralCheck = checkNamespaceStructuralEvidence(task.id, task.text, context.fileIndex);
330
584
 
331
585
  const evidence = {
332
586
  code: filesFromCode.length > 0 || filesFromSymbols.length > 0,
@@ -336,7 +590,10 @@ function validateTask(task, context, config, plugins) {
336
590
  symbols: filesFromSymbols,
337
591
  codeFiles: filesFromCode,
338
592
  testFiles: filesFromTests,
339
- artifactFiles: filesFromArtifacts
593
+ artifactFiles: filesFromArtifacts,
594
+ heuristicArtifacts,
595
+ structuralEvidence: structuralCheck.applicable ? structuralCheck.passed : null,
596
+ structuralFiles: structuralCheck.structuralFiles,
340
597
  };
341
598
 
342
599
  const reasons = [];
@@ -347,8 +604,16 @@ function validateTask(task, context, config, plugins) {
347
604
  reasons.push(`missing symbol(s): ${symbolHints.join(', ')}`);
348
605
  }
349
606
 
607
+ // Namespace-structural gate: for known namespaces, token overlap alone is insufficient.
608
+ // The task must have evidence files whose paths match the namespace pattern.
609
+ if (structuralCheck.applicable && !structuralCheck.passed) {
610
+ reasons.push(structuralCheck.reason || `no structural evidence for namespace "${extractTaskNamespace(task.id)}"`);
611
+ }
612
+
350
613
  const hasEvidence = evidence.code || evidence.test || evidence.artifact || evidence.files.length > 0;
351
- if (!hasEvidence) {
614
+ if (!hasEvidence && !structuralCheck.applicable) {
615
+ reasons.push('no code, test, or artifact evidence found');
616
+ } else if (!hasEvidence && structuralCheck.applicable && structuralCheck.passed) {
352
617
  reasons.push('no code, test, or artifact evidence found');
353
618
  }
354
619
 
@@ -370,7 +635,11 @@ function validateTask(task, context, config, plugins) {
370
635
  const attempted = hasEvidence || pathHints.length > 0 || symbolHints.length > 0;
371
636
 
372
637
  const evidenceCount = [evidence.code, evidence.test, evidence.artifact].filter(Boolean).length;
373
- const confidence = evidenceCount >= 2 ? 'high' : evidenceCount === 1 ? 'medium' : attempted ? 'medium' : 'low';
638
+ const confidence = evidenceCount >= 2 ? 'high' : evidenceCount === 1 ? 'medium' : 'low';
639
+
640
+ // True when the only passing evidence is artifact/doc files and the task is not a doc task.
641
+ // Used by auditValidation to flag implementation tasks that pass solely via documentation.
642
+ const evidenceIsDocOnly = !evidence.code && !evidence.test && evidence.artifact && !isDocTask(task.text);
374
643
 
375
644
  return {
376
645
  taskId: task.id,
@@ -378,6 +647,7 @@ function validateTask(task, context, config, plugins) {
378
647
  confidence,
379
648
  reasons: uniqueReasons,
380
649
  evidence,
650
+ evidenceIsDocOnly,
381
651
  requiresTest,
382
652
  hasEvidence,
383
653
  attempted
@@ -395,12 +665,13 @@ function validateTasks(tasks, context, config, plugins) {
395
665
  function auditValidation(tasks, results) {
396
666
  const checkedWithoutEvidence = [];
397
667
  const readyButUnchecked = [];
668
+ const checkedWithWeakEvidence = [];
669
+ const documentationOnlyEvidenceForImplementation = [];
670
+ const checkedWithNoStructuralEvidence = [];
398
671
 
399
672
  for (const task of tasks) {
400
673
  const result = results[task.id];
401
- if (!result) {
402
- continue;
403
- }
674
+ if (!result) continue;
404
675
 
405
676
  if (task.checked && !result.passed) {
406
677
  checkedWithoutEvidence.push({ task, result });
@@ -409,17 +680,51 @@ function auditValidation(tasks, results) {
409
680
  if (!task.checked && result.passed) {
410
681
  readyButUnchecked.push({ task, result });
411
682
  }
683
+
684
+ if (task.checked && result.passed && result.confidence === 'low') {
685
+ checkedWithWeakEvidence.push({ task, result });
686
+ }
687
+
688
+ if (task.checked && result.passed && result.evidenceIsDocOnly) {
689
+ documentationOnlyEvidenceForImplementation.push({ task, result });
690
+ }
691
+
692
+ // Checked task that failed specifically because structural evidence is missing.
693
+ if (task.checked && !result.passed && result.evidence.structuralEvidence === false) {
694
+ checkedWithNoStructuralEvidence.push({ task, result });
695
+ }
412
696
  }
413
697
 
414
698
  return {
415
699
  checkedWithoutEvidence,
416
- readyButUnchecked
700
+ readyButUnchecked,
701
+ checkedWithWeakEvidence,
702
+ documentationOnlyEvidenceForImplementation,
703
+ checkedWithNoStructuralEvidence,
417
704
  };
418
705
  }
419
706
 
707
+ function applyMinimumConfidence(results, minimumConfidence) {
708
+ const minRank = CONFIDENCE_RANK[minimumConfidence] ?? 0;
709
+ if (minRank === 0) return;
710
+ for (const result of Object.values(results)) {
711
+ if ((CONFIDENCE_RANK[result.confidence] ?? 0) < minRank) {
712
+ result.passed = false;
713
+ result.reasons = [
714
+ ...result.reasons,
715
+ `validation confidence "${result.confidence}" is below required "${minimumConfidence}"`
716
+ ];
717
+ }
718
+ }
719
+ }
720
+
420
721
  module.exports = {
421
722
  auditValidation,
422
723
  buildValidationContext,
423
724
  validateTask,
424
- validateTasks
725
+ validateTasks,
726
+ CONFIDENCE_RANK,
727
+ applyMinimumConfidence,
728
+ extractTaskNamespace,
729
+ isAcceptanceCriteria,
425
730
  };