docguard-cli 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -6
- package/cli/commands/diagnose.mjs +15 -8
- package/cli/commands/fix.mjs +3 -2
- package/cli/commands/generate.mjs +59 -1
- package/cli/commands/guard.mjs +29 -0
- package/cli/commands/llms.mjs +159 -0
- package/cli/commands/score.mjs +162 -0
- package/cli/docguard.mjs +6 -0
- package/cli/scanners/speckit.mjs +234 -0
- package/cli/shared.mjs +76 -0
- package/cli/validators/doc-quality.mjs +629 -0
- package/cli/validators/docs-coverage.mjs +387 -0
- package/cli/validators/docs-sync.mjs +53 -0
- package/cli/validators/metadata-sync.mjs +179 -0
- package/cli/validators/metrics-consistency.mjs +166 -0
- package/cli/validators/schema-sync.mjs +219 -0
- package/cli/validators/todo-tracking.mjs +295 -0
- package/cli/validators/traceability.mjs +194 -8
- package/package.json +1 -1
|
@@ -0,0 +1,629 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Doc Quality Validator — Measures documentation writing quality
|
|
3
|
+
*
|
|
4
|
+
* Implements 8 deterministic metrics inspired by IEEE 830/ISO 29148 and the
|
|
5
|
+
* "understanding" project (github.com/Testimonial/understanding).
|
|
6
|
+
* Credit: Metric formulas and weighting system inspired by the Understanding
|
|
7
|
+
* project's 31-metric quality framework for requirements quality.
|
|
8
|
+
*
|
|
9
|
+
* Metrics implemented:
|
|
10
|
+
* Structure: Passive Voice Ratio, Ambiguous Pronoun Ratio, Atomicity Score
|
|
11
|
+
* Readability: Flesch Reading Ease, Flesch-Kincaid Grade Level
|
|
12
|
+
* Cognitive: Sentence Length, Negation Load, Conditional Load
|
|
13
|
+
*
|
|
14
|
+
* Optional: If `understanding` CLI is installed, runs a full 31-metric deep scan.
|
|
15
|
+
*
|
|
16
|
+
* Zero dependencies — pure Node.js built-ins only.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { existsSync, readFileSync, readdirSync, statSync } from 'node:fs';
|
|
20
|
+
import { resolve, join, extname } from 'node:path';
|
|
21
|
+
import { execSync } from 'node:child_process';
|
|
22
|
+
|
|
23
|
+
// ──── Metric Thresholds ────
|
|
24
|
+
// These define "good" vs "warning" boundaries for each metric.
|
|
25
|
+
// Values are based on IEEE 830 best practices and readability research.
|
|
26
|
+
|
|
27
|
+
const THRESHOLDS = {
|
|
28
|
+
passiveVoiceRatio: { warn: 0.20, label: 'Passive voice ratio' }, // >20% passive = warn
|
|
29
|
+
ambiguousPronounRatio: { warn: 0.15, label: 'Ambiguous pronoun ratio' }, // >15% ambiguous pronouns = warn
|
|
30
|
+
atomicityScore: { warn: 0.30, label: 'Non-atomic sentence ratio' }, // >30% compound sentences = warn
|
|
31
|
+
fleschReadingEase: { warn: 30, label: 'Flesch reading ease' }, // <30 = very hard to read
|
|
32
|
+
fleschKincaidGrade: { warn: 16, label: 'Flesch-Kincaid grade' }, // >16 = graduate level+
|
|
33
|
+
avgSentenceLength: { warn: 25, label: 'Avg sentence length' }, // >25 words = too long
|
|
34
|
+
negationLoad: { warn: 0.15, label: 'Negation load' }, // >15% sentences with negation = warn
|
|
35
|
+
conditionalLoad: { warn: 0.30, label: 'Conditional load' }, // >30% sentences conditional = warn
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
// ──── Text Processing Utilities ────
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Strip markdown formatting to get plain prose text.
|
|
42
|
+
* Removes: code blocks, inline code, headers, links, images, tables,
|
|
43
|
+
* HTML comments, metadata blocks, horizontal rules, list markers.
|
|
44
|
+
*/
|
|
45
|
+
function stripMarkdown(content) {
|
|
46
|
+
let text = content;
|
|
47
|
+
|
|
48
|
+
// Remove fenced code blocks (```...```) and (````...````)
|
|
49
|
+
text = text.replace(/````[\s\S]*?````/g, '');
|
|
50
|
+
text = text.replace(/```[\s\S]*?```/g, '');
|
|
51
|
+
|
|
52
|
+
// Remove HTML comments (<!-- ... -->)
|
|
53
|
+
text = text.replace(/<!--[\s\S]*?-->/g, '');
|
|
54
|
+
|
|
55
|
+
// Remove YAML frontmatter (---...---)
|
|
56
|
+
text = text.replace(/^---[\s\S]*?---\n/m, '');
|
|
57
|
+
|
|
58
|
+
// Remove table rows (lines starting with |)
|
|
59
|
+
text = text.replace(/^\|.*$/gm, '');
|
|
60
|
+
|
|
61
|
+
// Remove horizontal rules
|
|
62
|
+
text = text.replace(/^[-*_]{3,}\s*$/gm, '');
|
|
63
|
+
|
|
64
|
+
// Remove images: 
|
|
65
|
+
text = text.replace(/!\[.*?\]\(.*?\)/g, '');
|
|
66
|
+
|
|
67
|
+
// Remove links, keep link text: [text](url) → text
|
|
68
|
+
text = text.replace(/\[([^\]]*)\]\([^)]*\)/g, '$1');
|
|
69
|
+
|
|
70
|
+
// Remove inline code
|
|
71
|
+
text = text.replace(/`[^`]+`/g, '');
|
|
72
|
+
|
|
73
|
+
// Remove header markers (# ## ### etc.)
|
|
74
|
+
text = text.replace(/^#{1,6}\s+/gm, '');
|
|
75
|
+
|
|
76
|
+
// Remove list markers (-, *, 1.)
|
|
77
|
+
text = text.replace(/^\s*[-*+]\s+/gm, '');
|
|
78
|
+
text = text.replace(/^\s*\d+\.\s+/gm, '');
|
|
79
|
+
|
|
80
|
+
// Remove bold/italic markers
|
|
81
|
+
text = text.replace(/\*{1,3}([^*]+)\*{1,3}/g, '$1');
|
|
82
|
+
text = text.replace(/_{1,3}([^_]+)_{1,3}/g, '$1');
|
|
83
|
+
|
|
84
|
+
// Remove badge images (shield.io etc.)
|
|
85
|
+
text = text.replace(/!\[.*?\]\(https:\/\/img\.shields\.io\/.*?\)/g, '');
|
|
86
|
+
|
|
87
|
+
// Collapse multiple blank lines
|
|
88
|
+
text = text.replace(/\n{3,}/g, '\n\n');
|
|
89
|
+
|
|
90
|
+
return text.trim();
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Split text into sentences using common sentence-ending punctuation.
|
|
95
|
+
* Handles abbreviations (Mr., Dr., etc.) and decimal numbers to avoid false splits.
|
|
96
|
+
*/
|
|
97
|
+
function splitSentences(text) {
|
|
98
|
+
if (!text || text.trim().length === 0) return [];
|
|
99
|
+
|
|
100
|
+
// Protect common abbreviations from false sentence splits
|
|
101
|
+
let protected_ = text;
|
|
102
|
+
const abbreviations = ['Mr', 'Mrs', 'Ms', 'Dr', 'Prof', 'Sr', 'Jr', 'vs', 'etc', 'i.e', 'e.g', 'cf'];
|
|
103
|
+
for (const abbr of abbreviations) {
|
|
104
|
+
const regex = new RegExp(`\\b${abbr}\\.`, 'gi');
|
|
105
|
+
protected_ = protected_.replace(regex, `${abbr}≈`);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Protect decimal numbers (3.14)
|
|
109
|
+
protected_ = protected_.replace(/(\d)\.(\d)/g, '$1≈$2');
|
|
110
|
+
|
|
111
|
+
// Split on sentence-ending punctuation followed by space or end
|
|
112
|
+
const raw = protected_.split(/[.!?]+(?:\s+|$)/);
|
|
113
|
+
|
|
114
|
+
// Restore protected characters and filter empties
|
|
115
|
+
return raw
|
|
116
|
+
.map(s => s.replace(/≈/g, '.').trim())
|
|
117
|
+
.filter(s => s.length > 3); // Ignore fragments under 4 chars
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Count syllables in a word using a heuristic approach.
|
|
122
|
+
* Based on the algorithm used in readability research:
|
|
123
|
+
* 1. Count vowel groups
|
|
124
|
+
* 2. Subtract silent-e at end
|
|
125
|
+
* 3. Add back for specific suffixes (-le, -les, -tion, etc.)
|
|
126
|
+
* 4. Minimum 1 syllable per word
|
|
127
|
+
*/
|
|
128
|
+
function countSyllables(word) {
|
|
129
|
+
word = word.toLowerCase().replace(/[^a-z]/g, '');
|
|
130
|
+
if (word.length <= 2) return 1;
|
|
131
|
+
|
|
132
|
+
// Exception list for common words with unusual syllable counts
|
|
133
|
+
const exceptions = {
|
|
134
|
+
'the': 1, 'are': 1, 'were': 1, 'have': 1, 'there': 1,
|
|
135
|
+
'where': 1, 'here': 1, 'every': 3, 'everything': 4,
|
|
136
|
+
'create': 2, 'file': 1, 'style': 1, 'quite': 1,
|
|
137
|
+
};
|
|
138
|
+
if (exceptions[word] !== undefined) return exceptions[word];
|
|
139
|
+
|
|
140
|
+
// Count vowel groups
|
|
141
|
+
const vowelGroups = word.match(/[aeiouy]+/g);
|
|
142
|
+
let count = vowelGroups ? vowelGroups.length : 1;
|
|
143
|
+
|
|
144
|
+
// Subtract silent-e at end (but not for words like "able", "ible")
|
|
145
|
+
if (word.endsWith('e') && !word.endsWith('le') && !word.endsWith('ce') && !word.endsWith('ge')) {
|
|
146
|
+
count--;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Subtract for common diphthong/double vowel endings
|
|
150
|
+
if (word.endsWith('ed') && !word.endsWith('ted') && !word.endsWith('ded')) {
|
|
151
|
+
count--;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Ensure minimum 1 syllable
|
|
155
|
+
return Math.max(1, count);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Tokenize text into words. Strips punctuation, lowercases.
|
|
160
|
+
*/
|
|
161
|
+
function tokenizeWords(text) {
|
|
162
|
+
return text
|
|
163
|
+
.toLowerCase()
|
|
164
|
+
.replace(/[^a-z0-9\s'-]/g, ' ')
|
|
165
|
+
.split(/\s+/)
|
|
166
|
+
.filter(w => w.length > 0);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// ──── Metric Implementations ────
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Passive Voice Ratio (Structure, 4.5% weight in Understanding)
|
|
173
|
+
*
|
|
174
|
+
* Detects passive voice constructions: be-verb + past participle.
|
|
175
|
+
* Pattern: (is|was|were|been|being|are|be) + word ending in -ed/-en/-t
|
|
176
|
+
*
|
|
177
|
+
* Returns ratio of sentences containing passive voice to total sentences.
|
|
178
|
+
*/
|
|
179
|
+
function measurePassiveVoice(sentences) {
|
|
180
|
+
if (sentences.length === 0) return { ratio: 0, count: 0, total: 0 };
|
|
181
|
+
|
|
182
|
+
// Passive voice pattern: be-verb followed by past participle
|
|
183
|
+
const passivePattern = /\b(is|was|were|been|being|are|be|am)\s+([\w]+\s+)?([\w]*(?:ed|en|wn|lt|nt|pt|ft|zed))\b/i;
|
|
184
|
+
|
|
185
|
+
let passiveCount = 0;
|
|
186
|
+
for (const sentence of sentences) {
|
|
187
|
+
if (passivePattern.test(sentence)) {
|
|
188
|
+
passiveCount++;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return {
|
|
193
|
+
ratio: sentences.length > 0 ? passiveCount / sentences.length : 0,
|
|
194
|
+
count: passiveCount,
|
|
195
|
+
total: sentences.length,
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Ambiguous Pronoun Ratio (Structure, 3.0% weight in Understanding)
|
|
201
|
+
*
|
|
202
|
+
* Counts pronouns that lack clear antecedents: it, this, that, they, them, these, those.
|
|
203
|
+
* In technical documentation, these often create confusion about what exactly is referenced.
|
|
204
|
+
*
|
|
205
|
+
* Returns ratio of ambiguous pronouns to total word count.
|
|
206
|
+
*/
|
|
207
|
+
function measureAmbiguousPronouns(words) {
|
|
208
|
+
if (words.length === 0) return { ratio: 0, count: 0, total: 0 };
|
|
209
|
+
|
|
210
|
+
const ambiguousPronouns = new Set([
|
|
211
|
+
'it', 'this', 'that', 'they', 'them', 'these', 'those',
|
|
212
|
+
'its', 'their', 'theirs',
|
|
213
|
+
]);
|
|
214
|
+
|
|
215
|
+
let ambiguousCount = 0;
|
|
216
|
+
for (const word of words) {
|
|
217
|
+
if (ambiguousPronouns.has(word.toLowerCase())) {
|
|
218
|
+
ambiguousCount++;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return {
|
|
223
|
+
ratio: words.length > 0 ? ambiguousCount / words.length : 0,
|
|
224
|
+
count: ambiguousCount,
|
|
225
|
+
total: words.length,
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Atomicity Score (Structure, 9.0% weight in Understanding — HIGHEST)
|
|
231
|
+
*
|
|
232
|
+
* Measures how "atomic" (single-purpose) sentences are.
|
|
233
|
+
* Compound sentences with and/or/also/additionally indicate non-atomic requirements.
|
|
234
|
+
* IEEE 830 §4.1 recommends atomic requirements that can be independently verified.
|
|
235
|
+
*
|
|
236
|
+
* Returns ratio of NON-atomic sentences (compound) to total sentences.
|
|
237
|
+
*/
|
|
238
|
+
function measureAtomicity(sentences) {
|
|
239
|
+
if (sentences.length === 0) return { ratio: 0, count: 0, total: 0 };
|
|
240
|
+
|
|
241
|
+
// Compound indicators (sentence-level conjunctions, not word-level)
|
|
242
|
+
// We match these only when preceded/followed by spaces to avoid matching within words
|
|
243
|
+
const compoundPattern = /\b(and also|and then|as well as|in addition to|additionally|furthermore|moreover)\b/i;
|
|
244
|
+
// Simple "and" / "or" — only flag if >1 occurrence in a sentence (natural language has legitimate single "and")
|
|
245
|
+
const simpleCompound = /\band\b/gi;
|
|
246
|
+
const simpleOr = /\bor\b/gi;
|
|
247
|
+
|
|
248
|
+
let compoundCount = 0;
|
|
249
|
+
for (const sentence of sentences) {
|
|
250
|
+
if (compoundPattern.test(sentence)) {
|
|
251
|
+
compoundCount++;
|
|
252
|
+
} else {
|
|
253
|
+
// Count simple "and" — 2+ indicates compound
|
|
254
|
+
const andMatches = sentence.match(simpleCompound);
|
|
255
|
+
if (andMatches && andMatches.length >= 2) {
|
|
256
|
+
compoundCount++;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
return {
|
|
262
|
+
ratio: sentences.length > 0 ? compoundCount / sentences.length : 0,
|
|
263
|
+
count: compoundCount,
|
|
264
|
+
total: sentences.length,
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Flesch Reading Ease (Readability, 3.75% weight in Understanding)
|
|
270
|
+
*
|
|
271
|
+
* Formula: 206.835 - 1.015 * (total words / total sentences) - 84.6 * (total syllables / total words)
|
|
272
|
+
* Source: Flesch, R. (1948). "A new readability yardstick." Journal of Applied Psychology.
|
|
273
|
+
*
|
|
274
|
+
* Scale: 0-100, higher = easier to read.
|
|
275
|
+
* 90-100: Very Easy (5th grade)
|
|
276
|
+
* 60-69: Standard (8th-9th grade)
|
|
277
|
+
* 30-49: Difficult (college level)
|
|
278
|
+
* 0-29: Very Confusing (graduate level)
|
|
279
|
+
*/
|
|
280
|
+
function measureFleschReadingEase(words, sentences) {
|
|
281
|
+
if (words.length === 0 || sentences.length === 0) return 0;
|
|
282
|
+
|
|
283
|
+
const totalSyllables = words.reduce((sum, w) => sum + countSyllables(w), 0);
|
|
284
|
+
const score = 206.835
|
|
285
|
+
- 1.015 * (words.length / sentences.length)
|
|
286
|
+
- 84.6 * (totalSyllables / words.length);
|
|
287
|
+
|
|
288
|
+
return Math.max(0, Math.min(100, Math.round(score * 10) / 10));
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Flesch-Kincaid Grade Level (Readability, 2.25% weight in Understanding)
|
|
293
|
+
*
|
|
294
|
+
* Formula: 0.39 * (total words / total sentences) + 11.8 * (total syllables / total words) - 15.59
|
|
295
|
+
* Source: Kincaid, J.P. et al. (1975). "Derivation of new readability formulas."
|
|
296
|
+
*
|
|
297
|
+
* Returns US grade level (8 = 8th grade, 12 = high school senior, 16+ = graduate)
|
|
298
|
+
*/
|
|
299
|
+
function measureFleschKincaidGrade(words, sentences) {
|
|
300
|
+
if (words.length === 0 || sentences.length === 0) return 0;
|
|
301
|
+
|
|
302
|
+
const totalSyllables = words.reduce((sum, w) => sum + countSyllables(w), 0);
|
|
303
|
+
const grade = 0.39 * (words.length / sentences.length)
|
|
304
|
+
+ 11.8 * (totalSyllables / words.length)
|
|
305
|
+
- 15.59;
|
|
306
|
+
|
|
307
|
+
return Math.max(0, Math.round(grade * 10) / 10);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Sentence Length (Cognitive, 3.0% weight in Understanding)
|
|
312
|
+
*
|
|
313
|
+
* Average words per sentence. Cognitive load research (Sweller, 1988) shows that
|
|
314
|
+
* sentences over 25 words significantly increase processing effort.
|
|
315
|
+
*/
|
|
316
|
+
function measureSentenceLength(words, sentences) {
|
|
317
|
+
if (sentences.length === 0) return 0;
|
|
318
|
+
return Math.round((words.length / sentences.length) * 10) / 10;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Negation Load (Cognitive, 1.5% weight in Understanding)
|
|
323
|
+
*
|
|
324
|
+
* Ratio of sentences containing negation words.
|
|
325
|
+
* Negation increases cognitive load because readers must mentally invert meaning.
|
|
326
|
+
* IEEE 830 §4.3 recommends positive phrasing in requirements.
|
|
327
|
+
*/
|
|
328
|
+
function measureNegationLoad(sentences) {
|
|
329
|
+
if (sentences.length === 0) return { ratio: 0, count: 0, total: 0 };
|
|
330
|
+
|
|
331
|
+
const negationPattern = /\b(not|no|never|none|neither|nor|cannot|can't|don't|doesn't|didn't|won't|wouldn't|shouldn't|isn't|aren't|wasn't|weren't|hasn't|haven't|hadn't)\b/i;
|
|
332
|
+
|
|
333
|
+
let negationCount = 0;
|
|
334
|
+
for (const sentence of sentences) {
|
|
335
|
+
if (negationPattern.test(sentence)) {
|
|
336
|
+
negationCount++;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
return {
|
|
341
|
+
ratio: sentences.length > 0 ? negationCount / sentences.length : 0,
|
|
342
|
+
count: negationCount,
|
|
343
|
+
total: sentences.length,
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Conditional Load (Cognitive, 1.5% weight in Understanding)
|
|
349
|
+
*
|
|
350
|
+
* Ratio of sentences containing conditional keywords.
|
|
351
|
+
* Excessive conditionals make documentation hard to follow and test.
|
|
352
|
+
*/
|
|
353
|
+
function measureConditionalLoad(sentences) {
|
|
354
|
+
if (sentences.length === 0) return { ratio: 0, count: 0, total: 0 };
|
|
355
|
+
|
|
356
|
+
const conditionalPattern = /\b(if|unless|when|whenever|otherwise|except|provided that|assuming|in case|as long as|only if|until)\b/i;
|
|
357
|
+
|
|
358
|
+
let conditionalCount = 0;
|
|
359
|
+
for (const sentence of sentences) {
|
|
360
|
+
if (conditionalPattern.test(sentence)) {
|
|
361
|
+
conditionalCount++;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
return {
|
|
366
|
+
ratio: sentences.length > 0 ? conditionalCount / sentences.length : 0,
|
|
367
|
+
count: conditionalCount,
|
|
368
|
+
total: sentences.length,
|
|
369
|
+
};
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// ──── Score Interpretation ────
|
|
373
|
+
|
|
374
|
+
function getReadabilityLabel(score) {
|
|
375
|
+
if (score >= 90) return 'Very Easy';
|
|
376
|
+
if (score >= 70) return 'Easy';
|
|
377
|
+
if (score >= 60) return 'Standard';
|
|
378
|
+
if (score >= 50) return 'Fairly Difficult';
|
|
379
|
+
if (score >= 30) return 'Difficult';
|
|
380
|
+
return 'Very Confusing';
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
function getGradeLabel(grade) {
|
|
384
|
+
if (grade <= 6) return '6th grade';
|
|
385
|
+
if (grade <= 8) return '8th grade';
|
|
386
|
+
if (grade <= 10) return '10th grade';
|
|
387
|
+
if (grade <= 12) return 'high school';
|
|
388
|
+
if (grade <= 16) return 'college';
|
|
389
|
+
return 'graduate+';
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// ──── Understanding CLI Integration ────
|
|
393
|
+
|
|
394
|
+
/**
|
|
395
|
+
* Check if the `understanding` CLI is available on the system.
|
|
396
|
+
* Returns the path to the executable or null.
|
|
397
|
+
*/
|
|
398
|
+
function findUnderstandingCli() {
|
|
399
|
+
try {
|
|
400
|
+
const result = execSync('which understanding 2>/dev/null || where understanding 2>NUL', {
|
|
401
|
+
encoding: 'utf-8',
|
|
402
|
+
timeout: 3000,
|
|
403
|
+
}).trim();
|
|
404
|
+
return result || null;
|
|
405
|
+
} catch {
|
|
406
|
+
return null;
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
/**
|
|
411
|
+
* Run the `understanding` CLI on a file and parse results.
|
|
412
|
+
* Returns understanding's quality score or null if it fails.
|
|
413
|
+
*/
|
|
414
|
+
function runUnderstandingDeepScan(filePath) {
|
|
415
|
+
try {
|
|
416
|
+
const result = execSync(`understanding analyze "${filePath}" --enhanced --json 2>/dev/null`, {
|
|
417
|
+
encoding: 'utf-8',
|
|
418
|
+
timeout: 10000,
|
|
419
|
+
});
|
|
420
|
+
return JSON.parse(result);
|
|
421
|
+
} catch {
|
|
422
|
+
return null;
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// ──── Main Validator ────
|
|
427
|
+
|
|
428
|
+
/**
|
|
429
|
+
* Collect all markdown files in docs-canonical/ directory.
|
|
430
|
+
*/
|
|
431
|
+
function getCanonicalDocs(projectDir) {
|
|
432
|
+
const docsDir = resolve(projectDir, 'docs-canonical');
|
|
433
|
+
const docs = [];
|
|
434
|
+
|
|
435
|
+
if (!existsSync(docsDir)) return docs;
|
|
436
|
+
|
|
437
|
+
try {
|
|
438
|
+
const entries = readdirSync(docsDir);
|
|
439
|
+
for (const entry of entries) {
|
|
440
|
+
if (extname(entry).toLowerCase() === '.md') {
|
|
441
|
+
docs.push({
|
|
442
|
+
name: entry,
|
|
443
|
+
path: join(docsDir, entry),
|
|
444
|
+
});
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
} catch {
|
|
448
|
+
// Directory read failed silently
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// Also check README.md at project root
|
|
452
|
+
const readmePath = resolve(projectDir, 'README.md');
|
|
453
|
+
if (existsSync(readmePath)) {
|
|
454
|
+
docs.push({ name: 'README.md', path: readmePath });
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
return docs;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
/**
|
|
461
|
+
* Analyze a single document and return per-metric results.
|
|
462
|
+
*/
|
|
463
|
+
function analyzeDocument(doc) {
|
|
464
|
+
const content = readFileSync(doc.path, 'utf-8');
|
|
465
|
+
const plainText = stripMarkdown(content);
|
|
466
|
+
|
|
467
|
+
if (plainText.length < 50) {
|
|
468
|
+
return { skipped: true, reason: 'too short', name: doc.name };
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
const sentences = splitSentences(plainText);
|
|
472
|
+
const words = tokenizeWords(plainText);
|
|
473
|
+
|
|
474
|
+
if (sentences.length < 3 || words.length < 20) {
|
|
475
|
+
return { skipped: true, reason: 'insufficient content', name: doc.name };
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
const passive = measurePassiveVoice(sentences);
|
|
479
|
+
const ambiguous = measureAmbiguousPronouns(words);
|
|
480
|
+
const atomicity = measureAtomicity(sentences);
|
|
481
|
+
const fleschEase = measureFleschReadingEase(words, sentences);
|
|
482
|
+
const fleschGrade = measureFleschKincaidGrade(words, sentences);
|
|
483
|
+
const avgSentLen = measureSentenceLength(words, sentences);
|
|
484
|
+
const negation = measureNegationLoad(sentences);
|
|
485
|
+
const conditional = measureConditionalLoad(sentences);
|
|
486
|
+
|
|
487
|
+
return {
|
|
488
|
+
skipped: false,
|
|
489
|
+
name: doc.name,
|
|
490
|
+
sentences: sentences.length,
|
|
491
|
+
words: words.length,
|
|
492
|
+
metrics: {
|
|
493
|
+
passiveVoiceRatio: passive.ratio,
|
|
494
|
+
ambiguousPronounRatio: ambiguous.ratio,
|
|
495
|
+
atomicityScore: atomicity.ratio,
|
|
496
|
+
fleschReadingEase: fleschEase,
|
|
497
|
+
fleschKincaidGrade: fleschGrade,
|
|
498
|
+
avgSentenceLength: avgSentLen,
|
|
499
|
+
negationLoad: negation.ratio,
|
|
500
|
+
conditionalLoad: conditional.ratio,
|
|
501
|
+
},
|
|
502
|
+
details: { passive, ambiguous, atomicity, negation, conditional },
|
|
503
|
+
};
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
/**
|
|
507
|
+
* Main validator entry point.
|
|
508
|
+
*
|
|
509
|
+
* Scans all canonical docs, runs 8 metrics on each, and reports
|
|
510
|
+
* per-doc findings as warnings when thresholds are exceeded.
|
|
511
|
+
*/
|
|
512
|
+
export function validateDocQuality(projectDir, config) {
|
|
513
|
+
const results = { errors: [], warnings: [], passed: 0, total: 0 };
|
|
514
|
+
|
|
515
|
+
const docs = getCanonicalDocs(projectDir);
|
|
516
|
+
if (docs.length === 0) {
|
|
517
|
+
// No docs to analyze — structure validator catches this
|
|
518
|
+
return results;
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
// Check for optional understanding CLI
|
|
522
|
+
const understandingCli = findUnderstandingCli();
|
|
523
|
+
const useDeepScan = config.docQuality?.deepScan !== false && understandingCli;
|
|
524
|
+
|
|
525
|
+
for (const doc of docs) {
|
|
526
|
+
if (!existsSync(doc.path)) continue;
|
|
527
|
+
|
|
528
|
+
const analysis = analyzeDocument(doc);
|
|
529
|
+
if (analysis.skipped) continue;
|
|
530
|
+
|
|
531
|
+
const m = analysis.metrics;
|
|
532
|
+
|
|
533
|
+
// ── Check 1: Passive Voice ──
|
|
534
|
+
results.total++;
|
|
535
|
+
if (m.passiveVoiceRatio <= THRESHOLDS.passiveVoiceRatio.warn) {
|
|
536
|
+
results.passed++;
|
|
537
|
+
} else {
|
|
538
|
+
results.warnings.push(
|
|
539
|
+
`${doc.name}: High passive voice ratio (${(m.passiveVoiceRatio * 100).toFixed(0)}% of sentences). ` +
|
|
540
|
+
`Use active voice for clarity. Found ${analysis.details.passive.count}/${analysis.details.passive.total} passive sentences`
|
|
541
|
+
);
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
// ── Check 2: Ambiguous Pronouns ──
|
|
545
|
+
results.total++;
|
|
546
|
+
if (m.ambiguousPronounRatio <= THRESHOLDS.ambiguousPronounRatio.warn) {
|
|
547
|
+
results.passed++;
|
|
548
|
+
} else {
|
|
549
|
+
results.warnings.push(
|
|
550
|
+
`${doc.name}: High ambiguous pronoun ratio (${(m.ambiguousPronounRatio * 100).toFixed(1)}%). ` +
|
|
551
|
+
`Replace "it/this/that/they" with specific nouns for clarity`
|
|
552
|
+
);
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
// ── Check 3: Atomicity ──
|
|
556
|
+
results.total++;
|
|
557
|
+
if (m.atomicityScore <= THRESHOLDS.atomicityScore.warn) {
|
|
558
|
+
results.passed++;
|
|
559
|
+
} else {
|
|
560
|
+
results.warnings.push(
|
|
561
|
+
`${doc.name}: Low atomicity (${(m.atomicityScore * 100).toFixed(0)}% compound sentences). ` +
|
|
562
|
+
`Split compound sentences for easier verification (IEEE 830 §4.1)`
|
|
563
|
+
);
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
// ── Check 4: Flesch Reading Ease ──
|
|
567
|
+
results.total++;
|
|
568
|
+
if (m.fleschReadingEase >= THRESHOLDS.fleschReadingEase.warn) {
|
|
569
|
+
results.passed++;
|
|
570
|
+
} else {
|
|
571
|
+
results.warnings.push(
|
|
572
|
+
`${doc.name}: Very low readability (Flesch score: ${m.fleschReadingEase}/100 — ${getReadabilityLabel(m.fleschReadingEase)}). ` +
|
|
573
|
+
`Shorten sentences and use simpler words`
|
|
574
|
+
);
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
// ── Check 5: Flesch-Kincaid Grade ──
|
|
578
|
+
results.total++;
|
|
579
|
+
if (m.fleschKincaidGrade <= THRESHOLDS.fleschKincaidGrade.warn) {
|
|
580
|
+
results.passed++;
|
|
581
|
+
} else {
|
|
582
|
+
results.warnings.push(
|
|
583
|
+
`${doc.name}: Reading level too high (grade ${m.fleschKincaidGrade} — ${getGradeLabel(m.fleschKincaidGrade)}). ` +
|
|
584
|
+
`Aim for grade 10-12 for technical docs`
|
|
585
|
+
);
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
// ── Check 6: Sentence Length ──
|
|
589
|
+
results.total++;
|
|
590
|
+
if (m.avgSentenceLength <= THRESHOLDS.avgSentenceLength.warn) {
|
|
591
|
+
results.passed++;
|
|
592
|
+
} else {
|
|
593
|
+
results.warnings.push(
|
|
594
|
+
`${doc.name}: Average sentence too long (${m.avgSentenceLength} words). ` +
|
|
595
|
+
`Target ≤25 words per sentence for readability (Sweller, 1988)`
|
|
596
|
+
);
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
// ── Check 7: Negation Load ──
|
|
600
|
+
results.total++;
|
|
601
|
+
if (m.negationLoad <= THRESHOLDS.negationLoad.warn) {
|
|
602
|
+
results.passed++;
|
|
603
|
+
} else {
|
|
604
|
+
results.warnings.push(
|
|
605
|
+
`${doc.name}: High negation load (${(m.negationLoad * 100).toFixed(0)}% of sentences use negation). ` +
|
|
606
|
+
`Rephrase in positive terms: "must not fail" → "must succeed" (IEEE 830 §4.3)`
|
|
607
|
+
);
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
// ── Check 8: Conditional Load ──
|
|
611
|
+
results.total++;
|
|
612
|
+
if (m.conditionalLoad <= THRESHOLDS.conditionalLoad.warn) {
|
|
613
|
+
results.passed++;
|
|
614
|
+
} else {
|
|
615
|
+
results.warnings.push(
|
|
616
|
+
`${doc.name}: High conditional load (${(m.conditionalLoad * 100).toFixed(0)}% of sentences are conditional). ` +
|
|
617
|
+
`Simplify by splitting conditionals into separate requirements`
|
|
618
|
+
);
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
// ── Optional: Understanding deep scan note ──
|
|
623
|
+
if (!understandingCli && docs.length > 0) {
|
|
624
|
+
// Don't add as warning — just a note in verbose mode
|
|
625
|
+
// Users who want full 31-metric scan can install understanding
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
return results;
|
|
629
|
+
}
|