docguard-cli 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -223,7 +223,7 @@ $ npx docguard-cli guard
223
223
 
224
224
  ---
225
225
 
226
- ## 19 Validators
226
+ ## 18 Validators
227
227
 
228
228
  | # | Validator | What It Checks | Default |
229
229
  |---|-----------|---------------|---------|
@@ -16,7 +16,7 @@
16
16
  import { c } from '../shared.mjs';
17
17
  import { runGuardInternal } from './guard.mjs';
18
18
  import { runScoreInternal } from './score.mjs';
19
- import { existsSync, readFileSync, mkdirSync } from 'node:fs';
19
+ import { existsSync, readFileSync } from 'node:fs';
20
20
  import { resolve, dirname } from 'node:path';
21
21
  import { fileURLToPath } from 'node:url';
22
22
  import { execSync } from 'node:child_process';
@@ -11,6 +11,11 @@
11
11
  * Readability: Flesch Reading Ease, Flesch-Kincaid Grade Level
12
12
  * Cognitive: Sentence Length, Negation Load, Conditional Load
13
13
  *
14
+ * v0.9.3 — Prose-Only Extraction Engine:
15
+ * Instead of stripping markdown and measuring residue (which treats table
16
+ * cells as "long sentences"), this version extracts ONLY actual prose
17
+ * paragraphs. Docs that are mostly tables/code skip readability scoring.
18
+ *
14
19
  * Optional: If `understanding` CLI is installed, runs a full 31-metric deep scan.
15
20
  *
16
21
  * Zero dependencies — pure Node.js built-ins only.
@@ -25,111 +30,197 @@ import { execSync } from 'node:child_process';
25
30
  // Values are based on IEEE 830 best practices and readability research.
26
31
 
27
32
  const THRESHOLDS = {
28
- passiveVoiceRatio: { warn: 0.20, label: 'Passive voice ratio' }, // >20% passive = warn
33
+ passiveVoiceRatio: { warn: 0.25, label: 'Passive voice ratio' }, // >25% passive = warn
29
34
  ambiguousPronounRatio: { warn: 0.15, label: 'Ambiguous pronoun ratio' }, // >15% ambiguous pronouns = warn
30
- atomicityScore: { warn: 0.30, label: 'Non-atomic sentence ratio' }, // >30% compound sentences = warn
31
- fleschReadingEase: { warn: 30, label: 'Flesch reading ease' }, // <30 = very hard to read
32
- fleschKincaidGrade: { warn: 16, label: 'Flesch-Kincaid grade' }, // >16 = graduate level+
33
- avgSentenceLength: { warn: 25, label: 'Avg sentence length' }, // >25 words = too long
34
- negationLoad: { warn: 0.15, label: 'Negation load' }, // >15% sentences with negation = warn
35
+ atomicityScore: { warn: 0.35, label: 'Non-atomic sentence ratio' }, // >35% compound sentences = warn
36
+ fleschReadingEase: { warn: 15, label: 'Flesch reading ease' }, // <15 = truly unreadable prose
37
+ fleschKincaidGrade: { warn: 18, label: 'Flesch-Kincaid grade' }, // >18 = graduate level+
38
+ avgSentenceLength: { warn: 30, label: 'Avg sentence length' }, // >30 words = too long
39
+ negationLoad: { warn: 0.20, label: 'Negation load' }, // >20% sentences with negation = warn
35
40
  conditionalLoad: { warn: 0.30, label: 'Conditional load' }, // >30% sentences conditional = warn
36
41
  };
37
42
 
38
- // ──── Text Processing Utilities ────
43
+ // Minimum prose words required for readability scoring.
44
+ // Docs with less than this are reference docs (tables, code) — skip readability.
45
+ const MIN_PROSE_WORDS = 50;
46
+
47
+ // ──── Technical Vocabulary ────
48
+ // Terms the target audience knows. Treated as 2-syllable words for Flesch scoring
49
+ // so they don't artificially inflate difficulty.
50
+
51
+ const TECH_VOCAB = new Set([
52
+ // Infrastructure & databases
53
+ 'dynamodb', 'redis', 'postgres', 'postgresql', 'mongodb', 'mysql', 'sqlite',
54
+ 'kubernetes', 'docker', 'dockerfile', 'nginx', 'apache', 'cloudfront',
55
+ 'cloudwatch', 'elasticsearch', 'opensearch', 'terraform', 'ansible',
56
+ 'memcached', 'cassandra', 'rabbitmq', 'kafka',
57
+ // Frameworks & languages
58
+ 'typescript', 'javascript', 'python', 'fastify', 'express', 'nextjs',
59
+ 'webpack', 'vite', 'vitest', 'playwright', 'cypress', 'mocha',
60
+ 'nestjs', 'angular', 'svelte', 'nuxtjs', 'gatsby', 'remix',
61
+ // Protocols & patterns
62
+ 'websocket', 'websockets', 'middleware', 'microservice', 'microservices',
63
+ 'graphql', 'restful', 'oauth', 'openapi', 'webhook', 'webhooks',
64
+ 'grpc', 'protobuf', 'pubsub',
65
+ // AWS services
66
+ 'lambda', 'cognito', 'amplify', 'apprunner', 'cloudformation',
67
+ 'apigateway', 'secretsmanager', 'parameterstore', 'eventbridge',
68
+ 'fargate', 'elasticache', 'sagemaker',
69
+ // Common developer terms
70
+ 'namespace', 'endpoint', 'endpoints', 'timestamp', 'timestamps',
71
+ 'boolean', 'callback', 'callbacks', 'codebase', 'monorepo',
72
+ 'frontend', 'backend', 'fullstack', 'changelog', 'localhost',
73
+ 'hostname', 'username', 'eslint', 'prettier', 'rollup',
74
+ 'authentication', 'authorization', 'infrastructure', 'serialization',
75
+ 'deserialization', 'middleware', 'polymorphism', 'abstraction',
76
+ ]);
77
+
78
+ // ──── Prose Extraction Engine ────
39
79
 
40
80
  /**
41
- * Strip markdown formatting to get plain prose text.
42
- * Removes: code blocks, inline code, headers, links, images, tables,
43
- * HTML comments, metadata blocks, horizontal rules, list markers.
81
+ * Extract only prose paragraphs from markdown content.
82
+ *
83
+ * Instead of stripping markdown and measuring residue (where table cells
84
+ * become "146-word sentences"), this identifies actual prose — blocks of
85
+ * text that form readable sentences — and returns only those.
86
+ *
87
+ * A line qualifies as prose if it:
88
+ * - Is not inside a code block / HTML comment
89
+ * - Is not a table row, header, horizontal rule, or metadata
90
+ * - Has ≥55% alphabetic characters (filters out paths/URLs/symbol-heavy lines)
91
+ * - Has ≥5 words (fragments aren't prose)
44
92
  */
45
- function stripMarkdown(content) {
46
- let text = content;
47
-
48
- // Remove fenced code blocks (```...```) and (````...````)
49
- text = text.replace(/````[\s\S]*?````/g, '');
50
- text = text.replace(/```[\s\S]*?```/g, '');
51
-
52
- // Remove HTML comments (<!-- ... -->)
53
- text = text.replace(/<!--[\s\S]*?-->/g, '');
54
-
55
- // Remove YAML frontmatter (---...---)
56
- text = text.replace(/^---[\s\S]*?---\n/m, '');
57
-
58
- // Remove table rows (lines starting with |)
59
- text = text.replace(/^\|.*$/gm, '');
60
-
61
- // Remove horizontal rules
62
- text = text.replace(/^[-*_]{3,}\s*$/gm, '');
63
-
64
- // Remove images: ![alt](url)
65
- text = text.replace(/!\[.*?\]\(.*?\)/g, '');
66
-
67
- // Remove links, keep link text: [text](url) → text
68
- text = text.replace(/\[([^\]]*)\]\([^)]*\)/g, '$1');
69
-
70
- // Remove inline code
71
- text = text.replace(/`[^`]+`/g, '');
72
-
73
- // Remove header markers (# ## ### etc.)
74
- text = text.replace(/^#{1,6}\s+/gm, '');
75
-
76
- // Remove list markers (-, *, 1.)
77
- text = text.replace(/^\s*[-*+]\s+/gm, '');
78
- text = text.replace(/^\s*\d+\.\s+/gm, '');
79
-
80
- // Remove bold/italic markers
81
- text = text.replace(/\*{1,3}([^*]+)\*{1,3}/g, '$1');
82
- text = text.replace(/_{1,3}([^_]+)_{1,3}/g, '$1');
93
+ function extractProse(content) {
94
+ const lines = content.split('\n');
95
+ const proseLines = [];
96
+ let inCodeBlock = false;
97
+ let inHtmlComment = false;
98
+
99
+ for (const rawLine of lines) {
100
+ const line = rawLine.trim();
101
+
102
+ // Track code block boundaries (``` and ````)
103
+ if (/^`{3,}/.test(line)) {
104
+ inCodeBlock = !inCodeBlock;
105
+ continue;
106
+ }
107
+ if (inCodeBlock) continue;
83
108
 
84
- // Remove badge images (shield.io etc.)
85
- text = text.replace(/!\[.*?\]\(https:\/\/img\.shields\.io\/.*?\)/g, '');
109
+ // Track multi-line HTML comments
110
+ if (line.includes('<!--') && !line.includes('-->')) {
111
+ inHtmlComment = true;
112
+ continue;
113
+ }
114
+ if (inHtmlComment) {
115
+ if (line.includes('-->')) inHtmlComment = false;
116
+ continue;
117
+ }
86
118
 
87
- // Collapse multiple blank lines
88
- text = text.replace(/\n{3,}/g, '\n\n');
119
+ // Skip non-prose line types
120
+ if (line.startsWith('|')) continue; // Table rows
121
+ if (line.startsWith('#')) continue; // Headers
122
+ if (line.startsWith('!')) continue; // Images
123
+ if (/^[-*_]{3,}\s*$/.test(line)) continue; // Horizontal rules
124
+ if (/^[|:\-\s]+$/.test(line)) continue; // Table separators
125
+ if (/^<!--.*-->$/.test(line)) continue; // Inline HTML comments
126
+ if (/^<[^>]+>/.test(line)) continue; // HTML tags
127
+ if (/^---\s*$/.test(line)) continue; // YAML frontmatter
128
+ if (line.length === 0) continue; // Empty lines
129
+
130
+ // Clean the line: extract text from markdown formatting
131
+ let cleaned = line;
132
+ cleaned = cleaned.replace(/\[([^\]]*)\]\([^)]*\)/g, '$1'); // Links → text only
133
+ cleaned = cleaned.replace(/`[^`]+`/g, ''); // Remove inline code
134
+ cleaned = cleaned.replace(/!\[.*?\]\(.*?\)/g, ''); // Remove images
135
+ cleaned = cleaned.replace(/\*{1,3}([^*]+)\*{1,3}/g, '$1'); // Bold/italic → text
136
+ cleaned = cleaned.replace(/_{1,3}([^_]+)_{1,3}/g, '$1'); // Underline emphasis
137
+ cleaned = cleaned.replace(/^[-*+]\s+/, ''); // List markers
138
+ cleaned = cleaned.replace(/^\d+\.\s+/, ''); // Numbered list markers
139
+ cleaned = cleaned.trim();
140
+
141
+ if (cleaned.length < 15) continue;
142
+
143
+ // Prose heuristic: check alphabetic ratio and word count
144
+ const alphaCount = (cleaned.match(/[a-zA-Z]/g) || []).length;
145
+ const alphaRatio = alphaCount / cleaned.length;
146
+ const wordCount = cleaned.split(/\s+/).length;
147
+
148
+ // A prose line needs ≥55% letters and ≥5 words
149
+ if (alphaRatio >= 0.55 && wordCount >= 5) {
150
+ proseLines.push(cleaned);
151
+ }
152
+ }
89
153
 
90
- return text.trim();
154
+ return proseLines.join('\n');
91
155
  }
92
156
 
93
157
  /**
94
- * Split text into sentences using common sentence-ending punctuation.
95
- * Handles abbreviations (Mr., Dr., etc.) and decimal numbers to avoid false splits.
158
+ * Split text into sentences with markdown-aware boundary detection.
159
+ *
160
+ * Protects against false splits from:
161
+ * - File paths (src/services/auth.ts → the dot isn't a sentence boundary)
162
+ * - Version numbers (v0.9.2, Node.js 18)
163
+ * - URLs (https://example.com)
164
+ * - Common abbreviations (e.g., i.e., etc., vs.)
165
+ * - Technical dotted names (package.json, .env.local)
96
166
  */
97
167
  function splitSentences(text) {
98
168
  if (!text || text.trim().length === 0) return [];
99
169
 
100
- // Protect common abbreviations from false sentence splits
101
170
  let protected_ = text;
102
- const abbreviations = ['Mr', 'Mrs', 'Ms', 'Dr', 'Prof', 'Sr', 'Jr', 'vs', 'etc', 'i.e', 'e.g', 'cf'];
171
+
172
+ // Protect dotted filenames (package.json, .env.local, auth.ts)
173
+ protected_ = protected_.replace(/[\w.-]+\.[a-z]{1,4}(?=[\s,;:)\]|]|$)/gi, (m) => m.replace(/\./g, '≈'));
174
+
175
+ // Protect version numbers (v0.9.2, 1.2.3)
176
+ protected_ = protected_.replace(/\bv?\d+\.\d+(?:\.\d+)*\b/g, (m) => m.replace(/\./g, '≈'));
177
+
178
+ // Protect URLs
179
+ protected_ = protected_.replace(/https?:\/\/[^\s)]+/g, (m) => m.replace(/\./g, '≈'));
180
+
181
+ // Protect common abbreviations
182
+ const abbreviations = ['Mr', 'Mrs', 'Ms', 'Dr', 'Prof', 'Sr', 'Jr', 'vs', 'etc', 'approx', 'incl'];
103
183
  for (const abbr of abbreviations) {
104
184
  const regex = new RegExp(`\\b${abbr}\\.`, 'gi');
105
- protected_ = protected_.replace(regex, `${abbr}≈`);
185
+ protected_ = protected_.replace(regex, (m) => m.replace(/\./g, '≈'));
106
186
  }
107
187
 
188
+ // Protect e.g. and i.e. specifically (have dots in the abbreviation itself)
189
+ protected_ = protected_.replace(/\be\.g\./gi, 'e≈g≈');
190
+ protected_ = protected_.replace(/\bi\.e\./gi, 'i≈e≈');
191
+
192
+ // Protect Node.js, Vue.js, etc.
193
+ protected_ = protected_.replace(/\b(\w+)\.js\b/gi, '$1≈js');
194
+
108
195
  // Protect decimal numbers (3.14)
109
196
  protected_ = protected_.replace(/(\d)\.(\d)/g, '$1≈$2');
110
197
 
111
- // Split on sentence-ending punctuation followed by space or end
112
- const raw = protected_.split(/[.!?]+(?:\s+|$)/);
198
+ // Split on sentence-ending punctuation followed by whitespace/newline/end
199
+ const raw = protected_.split(/[.!?]+(?:\s+|\n|$)/);
113
200
 
114
- // Restore protected characters and filter empties
201
+ // Restore protected characters and filter empties/fragments
115
202
  return raw
116
203
  .map(s => s.replace(/≈/g, '.').trim())
117
- .filter(s => s.length > 3); // Ignore fragments under 4 chars
204
+ .filter(s => {
205
+ if (s.length < 10) return false;
206
+ return s.split(/\s+/).length >= 3; // At least 3 words
207
+ });
118
208
  }
119
209
 
120
210
  /**
121
- * Count syllables in a word using a heuristic approach.
122
- * Based on the algorithm used in readability research:
123
- * 1. Count vowel groups
124
- * 2. Subtract silent-e at end
125
- * 3. Add back for specific suffixes (-le, -les, -tion, etc.)
126
- * 4. Minimum 1 syllable per word
211
+ * Count syllables with technical vocabulary normalization.
212
+ *
213
+ * Technical terms (DynamoDB, WebSocket, middleware) are normalized to
214
+ * 2 syllables. The target audience knows these terms — they don't make
215
+ * the text harder to read.
127
216
  */
128
217
  function countSyllables(word) {
129
218
  word = word.toLowerCase().replace(/[^a-z]/g, '');
130
219
  if (word.length <= 2) return 1;
131
220
 
132
- // Exception list for common words with unusual syllable counts
221
+ // Technical vocabulary 2 syllables (known terms)
222
+ if (TECH_VOCAB.has(word)) return 2;
223
+
133
224
  const exceptions = {
134
225
  'the': 1, 'are': 1, 'were': 1, 'have': 1, 'there': 1,
135
226
  'where': 1, 'here': 1, 'every': 3, 'everything': 4,
@@ -141,17 +232,16 @@ function countSyllables(word) {
141
232
  const vowelGroups = word.match(/[aeiouy]+/g);
142
233
  let count = vowelGroups ? vowelGroups.length : 1;
143
234
 
144
- // Subtract silent-e at end (but not for words like "able", "ible")
235
+ // Subtract silent-e at end (but not -le, -ce, -ge)
145
236
  if (word.endsWith('e') && !word.endsWith('le') && !word.endsWith('ce') && !word.endsWith('ge')) {
146
237
  count--;
147
238
  }
148
239
 
149
- // Subtract for common diphthong/double vowel endings
240
+ // Subtract for common past-tense endings
150
241
  if (word.endsWith('ed') && !word.endsWith('ted') && !word.endsWith('ded')) {
151
242
  count--;
152
243
  }
153
244
 
154
- // Ensure minimum 1 syllable
155
245
  return Math.max(1, count);
156
246
  }
157
247
 
@@ -179,7 +269,6 @@ function tokenizeWords(text) {
179
269
  function measurePassiveVoice(sentences) {
180
270
  if (sentences.length === 0) return { ratio: 0, count: 0, total: 0 };
181
271
 
182
- // Passive voice pattern: be-verb followed by past participle
183
272
  const passivePattern = /\b(is|was|were|been|being|are|be|am)\s+([\w]+\s+)?([\w]*(?:ed|en|wn|lt|nt|pt|ft|zed))\b/i;
184
273
 
185
274
  let passiveCount = 0;
@@ -198,11 +287,6 @@ function measurePassiveVoice(sentences) {
198
287
 
199
288
  /**
200
289
  * Ambiguous Pronoun Ratio (Structure, 3.0% weight in Understanding)
201
- *
202
- * Counts pronouns that lack clear antecedents: it, this, that, they, them, these, those.
203
- * In technical documentation, these often create confusion about what exactly is referenced.
204
- *
205
- * Returns ratio of ambiguous pronouns to total word count.
206
290
  */
207
291
  function measureAmbiguousPronouns(words) {
208
292
  if (words.length === 0) return { ratio: 0, count: 0, total: 0 };
@@ -227,30 +311,19 @@ function measureAmbiguousPronouns(words) {
227
311
  }
228
312
 
229
313
  /**
230
- * Atomicity Score (Structure, 9.0% weight in Understanding — HIGHEST)
231
- *
232
- * Measures how "atomic" (single-purpose) sentences are.
233
- * Compound sentences with and/or/also/additionally indicate non-atomic requirements.
234
- * IEEE 830 §4.1 recommends atomic requirements that can be independently verified.
235
- *
236
- * Returns ratio of NON-atomic sentences (compound) to total sentences.
314
+ * Atomicity Score (Structure, 9.0% weight — HIGHEST in Understanding)
237
315
  */
238
316
  function measureAtomicity(sentences) {
239
317
  if (sentences.length === 0) return { ratio: 0, count: 0, total: 0 };
240
318
 
241
- // Compound indicators (sentence-level conjunctions, not word-level)
242
- // We match these only when preceded/followed by spaces to avoid matching within words
243
319
  const compoundPattern = /\b(and also|and then|as well as|in addition to|additionally|furthermore|moreover)\b/i;
244
- // Simple "and" / "or" — only flag if >1 occurrence in a sentence (natural language has legitimate single "and")
245
320
  const simpleCompound = /\band\b/gi;
246
- const simpleOr = /\bor\b/gi;
247
321
 
248
322
  let compoundCount = 0;
249
323
  for (const sentence of sentences) {
250
324
  if (compoundPattern.test(sentence)) {
251
325
  compoundCount++;
252
326
  } else {
253
- // Count simple "and" — 2+ indicates compound
254
327
  const andMatches = sentence.match(simpleCompound);
255
328
  if (andMatches && andMatches.length >= 2) {
256
329
  compoundCount++;
@@ -266,16 +339,8 @@ function measureAtomicity(sentences) {
266
339
  }
267
340
 
268
341
  /**
269
- * Flesch Reading Ease (Readability, 3.75% weight in Understanding)
270
- *
271
- * Formula: 206.835 - 1.015 * (total words / total sentences) - 84.6 * (total syllables / total words)
272
- * Source: Flesch, R. (1948). "A new readability yardstick." Journal of Applied Psychology.
273
- *
274
- * Scale: 0-100, higher = easier to read.
275
- * 90-100: Very Easy (5th grade)
276
- * 60-69: Standard (8th-9th grade)
277
- * 30-49: Difficult (college level)
278
- * 0-29: Very Confusing (graduate level)
342
+ * Flesch Reading Ease (Readability)
343
+ * Formula: 206.835 - 1.015 * (words/sentences) - 84.6 * (syllables/words)
279
344
  */
280
345
  function measureFleschReadingEase(words, sentences) {
281
346
  if (words.length === 0 || sentences.length === 0) return 0;
@@ -289,12 +354,8 @@ function measureFleschReadingEase(words, sentences) {
289
354
  }
290
355
 
291
356
  /**
292
- * Flesch-Kincaid Grade Level (Readability, 2.25% weight in Understanding)
293
- *
294
- * Formula: 0.39 * (total words / total sentences) + 11.8 * (total syllables / total words) - 15.59
295
- * Source: Kincaid, J.P. et al. (1975). "Derivation of new readability formulas."
296
- *
297
- * Returns US grade level (8 = 8th grade, 12 = high school senior, 16+ = graduate)
357
+ * Flesch-Kincaid Grade Level (Readability)
358
+ * Formula: 0.39 * (words/sentences) + 11.8 * (syllables/words) - 15.59
298
359
  */
299
360
  function measureFleschKincaidGrade(words, sentences) {
300
361
  if (words.length === 0 || sentences.length === 0) return 0;
@@ -308,10 +369,7 @@ function measureFleschKincaidGrade(words, sentences) {
308
369
  }
309
370
 
310
371
  /**
311
- * Sentence Length (Cognitive, 3.0% weight in Understanding)
312
- *
313
- * Average words per sentence. Cognitive load research (Sweller, 1988) shows that
314
- * sentences over 25 words significantly increase processing effort.
372
+ * Sentence Length (Cognitive)
315
373
  */
316
374
  function measureSentenceLength(words, sentences) {
317
375
  if (sentences.length === 0) return 0;
@@ -319,11 +377,7 @@ function measureSentenceLength(words, sentences) {
319
377
  }
320
378
 
321
379
  /**
322
- * Negation Load (Cognitive, 1.5% weight in Understanding)
323
- *
324
- * Ratio of sentences containing negation words.
325
- * Negation increases cognitive load because readers must mentally invert meaning.
326
- * IEEE 830 §4.3 recommends positive phrasing in requirements.
380
+ * Negation Load (Cognitive)
327
381
  */
328
382
  function measureNegationLoad(sentences) {
329
383
  if (sentences.length === 0) return { ratio: 0, count: 0, total: 0 };
@@ -345,10 +399,7 @@ function measureNegationLoad(sentences) {
345
399
  }
346
400
 
347
401
  /**
348
- * Conditional Load (Cognitive, 1.5% weight in Understanding)
349
- *
350
- * Ratio of sentences containing conditional keywords.
351
- * Excessive conditionals make documentation hard to follow and test.
402
+ * Conditional Load (Cognitive)
352
403
  */
353
404
  function measureConditionalLoad(sentences) {
354
405
  if (sentences.length === 0) return { ratio: 0, count: 0, total: 0 };
@@ -377,6 +428,7 @@ function getReadabilityLabel(score) {
377
428
  if (score >= 60) return 'Standard';
378
429
  if (score >= 50) return 'Fairly Difficult';
379
430
  if (score >= 30) return 'Difficult';
431
+ if (score >= 15) return 'Hard — Technical';
380
432
  return 'Very Confusing';
381
433
  }
382
434
 
@@ -393,11 +445,11 @@ function getGradeLabel(grade) {
393
445
 
394
446
  /**
395
447
  * Check if the `understanding` CLI is available on the system.
396
- * Returns the path to the executable or null.
397
448
  */
398
449
  function findUnderstandingCli() {
399
450
  try {
400
- const result = execSync('which understanding 2>/dev/null || where understanding 2>NUL', {
451
+ const cmd = process.platform === 'win32' ? 'where understanding' : 'which understanding';
452
+ const result = execSync(`${cmd} 2>/dev/null`, {
401
453
  encoding: 'utf-8',
402
454
  timeout: 3000,
403
455
  }).trim();
@@ -409,7 +461,6 @@ function findUnderstandingCli() {
409
461
 
410
462
  /**
411
463
  * Run the `understanding` CLI on a file and parse results.
412
- * Returns understanding's quality score or null if it fails.
413
464
  */
414
465
  function runUnderstandingDeepScan(filePath) {
415
466
  try {
@@ -459,20 +510,22 @@ function getCanonicalDocs(projectDir) {
459
510
 
460
511
  /**
461
512
  * Analyze a single document and return per-metric results.
513
+ *
514
+ * Uses extractProse() instead of stripMarkdown() — only actual prose
515
+ * paragraphs are scored. Documents that are mostly tables/code/reference
516
+ * material are skipped for readability (they'd score 0/100 unfairly).
462
517
  */
463
518
  function analyzeDocument(doc) {
464
519
  const content = readFileSync(doc.path, 'utf-8');
465
- const plainText = stripMarkdown(content);
466
-
467
- if (plainText.length < 50) {
468
- return { skipped: true, reason: 'too short', name: doc.name };
469
- }
520
+ const proseText = extractProse(content);
470
521
 
471
- const sentences = splitSentences(plainText);
472
- const words = tokenizeWords(plainText);
522
+ const sentences = splitSentences(proseText);
523
+ const words = tokenizeWords(proseText);
473
524
 
474
- if (sentences.length < 3 || words.length < 20) {
475
- return { skipped: true, reason: 'insufficient content', name: doc.name };
525
+ // Skip if insufficient prose content
526
+ // Reference docs (mostly tables, code, lists) shouldn't be scored for readability
527
+ if (words.length < MIN_PROSE_WORDS || sentences.length < 3) {
528
+ return { skipped: true, reason: 'insufficient prose (reference document)', name: doc.name };
476
529
  }
477
530
 
478
531
  const passive = measurePassiveVoice(sentences);
@@ -514,7 +567,6 @@ export function validateDocQuality(projectDir, config) {
514
567
 
515
568
  const docs = getCanonicalDocs(projectDir);
516
569
  if (docs.length === 0) {
517
- // No docs to analyze — structure validator catches this
518
570
  return results;
519
571
  }
520
572
 
@@ -581,7 +633,7 @@ export function validateDocQuality(projectDir, config) {
581
633
  } else {
582
634
  results.warnings.push(
583
635
  `${doc.name}: Reading level too high (grade ${m.fleschKincaidGrade} — ${getGradeLabel(m.fleschKincaidGrade)}). ` +
584
- `Aim for grade 10-12 for technical docs`
636
+ `Aim for grade 12-16 for technical docs`
585
637
  );
586
638
  }
587
639
 
@@ -592,7 +644,7 @@ export function validateDocQuality(projectDir, config) {
592
644
  } else {
593
645
  results.warnings.push(
594
646
  `${doc.name}: Average sentence too long (${m.avgSentenceLength} words). ` +
595
- `Target ≤25 words per sentence for readability (Sweller, 1988)`
647
+ `Target ≤30 words per sentence for readability`
596
648
  );
597
649
  }
598
650
 
@@ -619,11 +671,5 @@ export function validateDocQuality(projectDir, config) {
619
671
  }
620
672
  }
621
673
 
622
- // ── Optional: Understanding deep scan note ──
623
- if (!understandingCli && docs.length > 0) {
624
- // Don't add as warning — just a note in verbose mode
625
- // Users who want full 31-metric scan can install understanding
626
- }
627
-
628
674
  return results;
629
675
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "docguard-cli",
3
- "version": "0.9.1",
3
+ "version": "0.9.3",
4
4
  "description": "The enforcement tool for Canonical-Driven Development (CDD). Audit, generate, and guard your project documentation.",
5
5
  "type": "module",
6
6
  "bin": {