npm - docguard-cli - Versions diffs - 0.9.2 → 0.9.4 - Mend

docguard-cli 0.9.2 → 0.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/cli/commands/diagnose.mjs +2 -2
package/cli/commands/generate.mjs +49 -10
package/cli/validators/doc-quality.mjs +184 -163
package/package.json +1 -1

package/cli/commands/diagnose.mjs CHANGED Viewed

@@ -120,10 +120,10 @@ export function runDiagnose(projectDir, config, flags) {
         });
       } catch { /* init may partially succeed */ }
-      // Run generate to fill in content
+      // Run generate to fill in MISSING content only (never --force, which would overwrite existing docs)
       try {
         const cliPath = resolve(dirname(fileURLToPath(import.meta.url)), '..', 'docguard.mjs');
-        execSync(`node "${cliPath}" generate --dir "${projectDir}" --force`, {
+        execSync(`node "${cliPath}" generate --dir "${projectDir}"`, {
           encoding: 'utf-8',
           stdio: 'pipe',
         });

package/cli/commands/generate.mjs CHANGED Viewed

@@ -5,7 +5,7 @@
  * This is the "killer feature" — take any project and auto-generate CDD docs.
  */
-import { existsSync, readFileSync, writeFileSync, readdirSync, statSync, mkdirSync } from 'node:fs';
+import { existsSync, readFileSync, writeFileSync, readdirSync, statSync, mkdirSync, copyFileSync } from 'node:fs';
 import { resolve, join, extname, basename, relative, dirname } from 'node:path';
 import { c } from '../shared.mjs';
 import { detectDocTools } from '../scanners/doc-tools.mjs';
@@ -18,6 +18,30 @@ const IGNORE_DIRS = new Set([
   '.amplify-hosting', '.serverless',
 ]);
+/**
+ * Create a .bak backup of an existing file before --force overwrites it.
+ * Only backs up if the file exists and has content.
+ */
+function backupFile(filePath) {
+  if (existsSync(filePath)) {
+    try {
+      const content = readFileSync(filePath, 'utf-8');
+      if (content.trim().length > 0) {
+        copyFileSync(filePath, filePath + '.bak');
+      }
+    } catch { /* backup failure is non-fatal */ }
+  }
+}
+/**
+ * Safe write — creates a .bak backup before overwriting existing files.
+ * Call this instead of raw writeFileSync when generating docs.
+ */
+function safeWrite(filePath, content) {
+  backupFile(filePath);
+  writeFileSync(filePath, content, 'utf-8');
+}
 const CODE_EXTENSIONS = new Set([
   '.js', '.mjs', '.cjs', '.ts', '.tsx', '.jsx',
   '.py', '.java', '.go', '.rs', '.rb', '.php', '.cs',
@@ -137,6 +161,21 @@ export function runGenerate(projectDir, config, flags) {
     mkdirSync(docsDir, { recursive: true });
   }
+  // ── Safety: warn if --force will overwrite existing files ──
+  if (flags.force) {
+    const targetFiles = [
+      'docs-canonical/ARCHITECTURE.md', 'docs-canonical/API-REFERENCE.md',
+      'docs-canonical/DATA-MODEL.md', 'docs-canonical/ENVIRONMENT.md',
+      'docs-canonical/TEST-SPEC.md', 'docs-canonical/SECURITY.md',
+      'AGENTS.md', 'CHANGELOG.md', 'DRIFT-LOG.md',
+    ];
+    const existing = targetFiles.filter(f => existsSync(resolve(projectDir, f)));
+    if (existing.length > 0) {
+      console.log(`  ${c.yellow}⚠️  --force: ${existing.length} existing file(s) will be overwritten.${c.reset}`);
+      console.log(`  ${c.dim}   Backups saved as .bak files.${c.reset}\n`);
+    }
+  }
   let created = 0;
   let skipped = 0;
@@ -633,7 +672,7 @@ See \\\`docs-canonical/KNOWN-GOTCHAS.md\\\` for known issues.
 | 0.1.0 | ${new Date().toISOString().split('T')[0]} | DocGuard Generate | Auto-generated (arc42 + C4 aligned) |
 `;
-  writeFileSync(path, appendStandardsCitation(content, 'ARCHITECTURE.md'), 'utf-8');
+  safeWrite(path, appendStandardsCitation(content, 'ARCHITECTURE.md'), 'utf-8');
   console.log(`  ${c.green}✅ ARCHITECTURE.md${c.reset} (arc42 §1-§12, ${componentRows.length} components, ${Object.values(stack).filter(Boolean).length} tech)`);
   return true;
 }
@@ -730,7 +769,7 @@ ${resourceSections}
 | 0.1.0 | ${new Date().toISOString().split('T')[0]} | DocGuard Generate | Auto-generated (${deepRoutes.length} endpoints from ${deepRoutes[0]?.source || 'code'}) |
 `;
-  writeFileSync(path, appendStandardsCitation(content, 'API-REFERENCE.md'), 'utf-8');
+  safeWrite(path, appendStandardsCitation(content, 'API-REFERENCE.md'), 'utf-8');
   console.log(`  ${c.green}✅ API-REFERENCE.md${c.reset} (${deepRoutes.length} endpoints, ${Object.keys(groups).length} resources)`);
   return true;
 }
@@ -885,7 +924,7 @@ ${erDiagram}
 | 0.1.0 | ${new Date().toISOString().split('T')[0]} | DocGuard Generate | Auto-generated (${entities.length} entities, ${relationships.length} relationships from ${schemaSource}) |
 `;
-  writeFileSync(path, appendStandardsCitation(content, 'DATA-MODEL.md'), 'utf-8');
+  safeWrite(path, appendStandardsCitation(content, 'DATA-MODEL.md'), 'utf-8');
   console.log(`  ${c.green}✅ DATA-MODEL.md${c.reset} (${entities.length} entities, ${relationships.length} relationships from ${schemaSource})`);
   return true;
 }
@@ -948,7 +987,7 @@ ${envVarRows || '| <!-- No .env.example found --> | | | | |'}
 | 0.1.0 | ${new Date().toISOString().split('T')[0]} | DocGuard Generate | Auto-generated (${scan.envVars.length} env vars found) |
 `;
-  writeFileSync(path, appendStandardsCitation(content, 'ENVIRONMENT.md'), 'utf-8');
+  safeWrite(path, appendStandardsCitation(content, 'ENVIRONMENT.md'), 'utf-8');
   console.log(`  ${c.green}✅ ENVIRONMENT.md${c.reset} (${scan.envVars.length} env vars detected)`);
   return true;
 }
@@ -1033,7 +1072,7 @@ ${serviceRows || '| <!-- No services found --> | | | |'}
 | 0.1.0 | ${new Date().toISOString().split('T')[0]} | DocGuard Generate | Auto-generated (${scan.tests.length} test files, ${serviceMap.filter(s => s.status === '✅').length}/${serviceMap.length} mapped) |
 `;
-  writeFileSync(path, appendStandardsCitation(content, 'TEST-SPEC.md'), 'utf-8');
+  safeWrite(path, appendStandardsCitation(content, 'TEST-SPEC.md'), 'utf-8');
   console.log(`  ${c.green}✅ TEST-SPEC.md${c.reset} (${scan.tests.length} tests, ${serviceMap.filter(s => s.status === '✅').length}/${serviceMap.length} services mapped)`);
   return true;
 }
@@ -1099,7 +1138,7 @@ ${scan.envVars.filter(v => isSecretVar(v.name)).map(v =>
 | 0.1.0 | ${new Date().toISOString().split('T')[0]} | DocGuard Generate | Auto-generated |
 `;
-  writeFileSync(path, appendStandardsCitation(content, 'SECURITY.md'), 'utf-8');
+  safeWrite(path, appendStandardsCitation(content, 'SECURITY.md'), 'utf-8');
   console.log(`  ${c.green}✅ SECURITY.md${c.reset} (auth: ${stack.auth || 'not detected'})`);
   return true;
 }
@@ -1209,7 +1248,7 @@ npx docguard-cli generate       # Generate docs from code
 - Test requirements in TEST-SPEC.md must be met
 - Documentation changes must pass \`docguard guard\`
 `;
-    writeFileSync(agentsPath, content, 'utf-8');
+    safeWrite(agentsPath, content);
     console.log(`  ${c.green}✅ AGENTS.md${c.reset} (AGENTS.md standard compliant)`);
     created++;
   } else {
@@ -1231,7 +1270,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 ### Added
 - CDD documentation via DocGuard generate
 `;
-    writeFileSync(changelogPath, content, 'utf-8');
+    safeWrite(changelogPath, content);
     console.log(`  ${c.green}✅ CHANGELOG.md${c.reset}`);
     created++;
   } else {
@@ -1251,7 +1290,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 |------|------|---------------|-------------------|----------|------------|
 | | | | | | |
 `;
-    writeFileSync(driftPath, content, 'utf-8');
+    safeWrite(driftPath, content);
     console.log(`  ${c.green}✅ DRIFT-LOG.md${c.reset}`);
     created++;
   } else {

package/cli/validators/doc-quality.mjs CHANGED Viewed

@@ -11,6 +11,11 @@
  *   Readability: Flesch Reading Ease, Flesch-Kincaid Grade Level
  *   Cognitive:   Sentence Length, Negation Load, Conditional Load
  *
+ * v0.9.3 — Prose-Only Extraction Engine:
+ *   Instead of stripping markdown and measuring residue (which treats table
+ *   cells as "long sentences"), this version extracts ONLY actual prose
+ *   paragraphs. Docs that are mostly tables/code skip readability scoring.
+ *
  * Optional: If `understanding` CLI is installed, runs a full 31-metric deep scan.
  *
  * Zero dependencies — pure Node.js built-ins only.
@@ -25,134 +30,197 @@ import { execSync } from 'node:child_process';
 // Values are based on IEEE 830 best practices and readability research.
 const THRESHOLDS = {
-  passiveVoiceRatio:     { warn: 0.20, label: 'Passive voice ratio' },       // >20% passive = warn
+  passiveVoiceRatio:     { warn: 0.25, label: 'Passive voice ratio' },       // >25% passive = warn
   ambiguousPronounRatio: { warn: 0.15, label: 'Ambiguous pronoun ratio' },   // >15% ambiguous pronouns = warn
-  atomicityScore:        { warn: 0.30, label: 'Non-atomic sentence ratio' }, // >30% compound sentences = warn
-  fleschReadingEase:     { warn: 20,   label: 'Flesch reading ease' },       // <20 = very hard to read (lowered from 30 for technical markdown)
-  fleschKincaidGrade:    { warn: 16,   label: 'Flesch-Kincaid grade' },      // >16 = graduate level+
-  avgSentenceLength:     { warn: 25,   label: 'Avg sentence length' },       // >25 words = too long
-  negationLoad:          { warn: 0.15, label: 'Negation load' },             // >15% sentences with negation = warn
+  atomicityScore:        { warn: 0.35, label: 'Non-atomic sentence ratio' }, // >35% compound sentences = warn
+  fleschReadingEase:     { warn: 15,   label: 'Flesch reading ease' },       // <15 = truly unreadable prose
+  fleschKincaidGrade:    { warn: 18,   label: 'Flesch-Kincaid grade' },      // >18 = graduate level+
+  avgSentenceLength:     { warn: 30,   label: 'Avg sentence length' },       // >30 words = too long
+  negationLoad:          { warn: 0.20, label: 'Negation load' },             // >20% sentences with negation = warn
   conditionalLoad:       { warn: 0.30, label: 'Conditional load' },          // >30% sentences conditional = warn
 };
-// ──── Text Processing Utilities ────
+// Minimum prose words required for readability scoring.
+// Docs with less than this are reference docs (tables, code) — skip readability.
+const MIN_PROSE_WORDS = 50;
+// ──── Technical Vocabulary ────
+// Terms the target audience knows. Treated as 2-syllable words for Flesch scoring
+// so they don't artificially inflate difficulty.
+const TECH_VOCAB = new Set([
+  // Infrastructure & databases
+  'dynamodb', 'redis', 'postgres', 'postgresql', 'mongodb', 'mysql', 'sqlite',
+  'kubernetes', 'docker', 'dockerfile', 'nginx', 'apache', 'cloudfront',
+  'cloudwatch', 'elasticsearch', 'opensearch', 'terraform', 'ansible',
+  'memcached', 'cassandra', 'rabbitmq', 'kafka',
+  // Frameworks & languages
+  'typescript', 'javascript', 'python', 'fastify', 'express', 'nextjs',
+  'webpack', 'vite', 'vitest', 'playwright', 'cypress', 'mocha',
+  'nestjs', 'angular', 'svelte', 'nuxtjs', 'gatsby', 'remix',
+  // Protocols & patterns
+  'websocket', 'websockets', 'middleware', 'microservice', 'microservices',
+  'graphql', 'restful', 'oauth', 'openapi', 'webhook', 'webhooks',
+  'grpc', 'protobuf', 'pubsub',
+  // AWS services
+  'lambda', 'cognito', 'amplify', 'apprunner', 'cloudformation',
+  'apigateway', 'secretsmanager', 'parameterstore', 'eventbridge',
+  'fargate', 'elasticache', 'sagemaker',
+  // Common developer terms
+  'namespace', 'endpoint', 'endpoints', 'timestamp', 'timestamps',
+  'boolean', 'callback', 'callbacks', 'codebase', 'monorepo',
+  'frontend', 'backend', 'fullstack', 'changelog', 'localhost',
+  'hostname', 'username', 'eslint', 'prettier', 'rollup',
+  'authentication', 'authorization', 'infrastructure', 'serialization',
+  'deserialization', 'middleware', 'polymorphism', 'abstraction',
+]);
+// ──── Prose Extraction Engine ────
 /**
- * Strip markdown formatting to get plain prose text.
- * Removes: code blocks, inline code, headers, links, images, tables,
- * HTML comments, metadata blocks, horizontal rules, list markers.
+ * Extract only prose paragraphs from markdown content.
+ *
+ * Instead of stripping markdown and measuring residue (where table cells
+ * become "146-word sentences"), this identifies actual prose — blocks of
+ * text that form readable sentences — and returns only those.
+ *
+ * A line qualifies as prose if it:
+ *   - Is not inside a code block / HTML comment
+ *   - Is not a table row, header, horizontal rule, or metadata
+ *   - Has ≥55% alphabetic characters (filters out paths/URLs/symbol-heavy lines)
+ *   - Has ≥5 words (fragments aren't prose)
  */
-function stripMarkdown(content) {
-  let text = content;
-  // Remove fenced code blocks (```...```) and (````...````)
-  text = text.replace(/````[\s\S]*?````/g, '');
-  text = text.replace(/```[\s\S]*?```/g, '');
-  // Remove mermaid diagrams
-  text = text.replace(/```mermaid[\s\S]*?```/g, '');
-  // Remove HTML comments (<!-- ... -->)
-  text = text.replace(/<!--[\s\S]*?-->/g, '');
-  // Remove HTML tags
-  text = text.replace(/<[^>]+>/g, '');
-  // Remove YAML frontmatter (---...---)
-  text = text.replace(/^---[\s\S]*?---\n/m, '');
-  // Remove table rows (lines starting with |) and table separators
-  text = text.replace(/^\|.*$/gm, '');
-  text = text.replace(/^[|:\-\s]+$/gm, '');
-  // Remove horizontal rules
-  text = text.replace(/^[-*_]{3,}\s*$/gm, '');
-  // Remove badge images (shield.io etc.) — before generic image removal
-  text = text.replace(/!\[.*?\]\(https?:\/\/[^)]+\)/g, '');
-  // Remove images: ![alt](url)
-  text = text.replace(/!\[.*?\]\(.*?\)/g, '');
-  // Remove links, keep link text: [text](url) → text
-  text = text.replace(/\[([^\]]*)\]\([^)]*\)/g, '$1');
-  // Remove inline code
-  text = text.replace(/`[^`]+`/g, '');
-  // Remove header markers (# ## ### etc.)
-  text = text.replace(/^#{1,6}\s+/gm, '');
-  // Remove list markers (-, *, 1.)
-  text = text.replace(/^\s*[-*+]\s+/gm, '');
-  text = text.replace(/^\s*\d+\.\s+/gm, '');
-  // Remove bold/italic markers
-  text = text.replace(/\*{1,3}([^*]+)\*{1,3}/g, '$1');
-  text = text.replace(/_{1,3}([^_]+)_{1,3}/g, '$1');
-  // Remove definition-style lines (key: value or key | value)
-  text = text.replace(/^\s*\w[\w\s]*\s*[:|]\s*.*$/gm, (match) => {
-    // Only strip if it looks like a key-value pair, not a sentence
-    if (match.includes('.') || match.split(/\s+/).length > 8) return match;
-    return '';
-  });
+function extractProse(content) {
+  const lines = content.split('\n');
+  const proseLines = [];
+  let inCodeBlock = false;
+  let inHtmlComment = false;
+  for (const rawLine of lines) {
+    const line = rawLine.trim();
+    // Track code block boundaries (``` and ````)
+    if (/^`{3,}/.test(line)) {
+      inCodeBlock = !inCodeBlock;
+      continue;
+    }
+    if (inCodeBlock) continue;
-  // Remove lines that are mostly non-prose (>60% special characters)
-  text = text.replace(/^.+$/gm, (line) => {
-    const trimmed = line.trim();
-    if (trimmed.length < 5) return '';
-    const alphaCount = (trimmed.match(/[a-zA-Z]/g) || []).length;
-    const ratio = alphaCount / trimmed.length;
-    return ratio < 0.4 ? '' : line; // If <40% letters, it's not prose
-  });
+    // Track multi-line HTML comments
+    if (line.includes('<!--') && !line.includes('-->')) {
+      inHtmlComment = true;
+      continue;
+    }
+    if (inHtmlComment) {
+      if (line.includes('-->')) inHtmlComment = false;
+      continue;
+    }
-  // Collapse multiple blank lines
-  text = text.replace(/\n{3,}/g, '\n\n');
+    // Skip non-prose line types
+    if (line.startsWith('|')) continue;                     // Table rows
+    if (line.startsWith('#')) continue;                     // Headers
+    if (line.startsWith('!')) continue;                     // Images
+    if (/^[-*_]{3,}\s*$/.test(line)) continue;             // Horizontal rules
+    if (/^[|:\-\s]+$/.test(line)) continue;                // Table separators
+    if (/^<!--.*-->$/.test(line)) continue;                // Inline HTML comments
+    if (/^<[^>]+>/.test(line)) continue;                   // HTML tags
+    if (/^---\s*$/.test(line)) continue;                   // YAML frontmatter
+    if (line.length === 0) continue;                        // Empty lines
+    // Clean the line: extract text from markdown formatting
+    let cleaned = line;
+    cleaned = cleaned.replace(/\[([^\]]*)\]\([^)]*\)/g, '$1');   // Links → text only
+    cleaned = cleaned.replace(/`[^`]+`/g, '');                     // Remove inline code
+    cleaned = cleaned.replace(/!\[.*?\]\(.*?\)/g, '');             // Remove images
+    cleaned = cleaned.replace(/\*{1,3}([^*]+)\*{1,3}/g, '$1');   // Bold/italic → text
+    cleaned = cleaned.replace(/_{1,3}([^_]+)_{1,3}/g, '$1');     // Underline emphasis
+    cleaned = cleaned.replace(/^[-*+]\s+/, '');                    // List markers
+    cleaned = cleaned.replace(/^\d+\.\s+/, '');                    // Numbered list markers
+    cleaned = cleaned.trim();
+    if (cleaned.length < 15) continue;
+    // Prose heuristic: check alphabetic ratio and word count
+    const alphaCount = (cleaned.match(/[a-zA-Z]/g) || []).length;
+    const alphaRatio = alphaCount / cleaned.length;
+    const wordCount = cleaned.split(/\s+/).length;
+    // A prose line needs ≥55% letters and ≥5 words
+    if (alphaRatio >= 0.55 && wordCount >= 5) {
+      proseLines.push(cleaned);
+    }
+  }
-  return text.trim();
+  return proseLines.join('\n');
 }
 /**
- * Split text into sentences using common sentence-ending punctuation.
- * Handles abbreviations (Mr., Dr., etc.) and decimal numbers to avoid false splits.
+ * Split text into sentences with markdown-aware boundary detection.
+ *
+ * Protects against false splits from:
+ *   - File paths (src/services/auth.ts → the dot isn't a sentence boundary)
+ *   - Version numbers (v0.9.2, Node.js 18)
+ *   - URLs (https://example.com)
+ *   - Common abbreviations (e.g., i.e., etc., vs.)
+ *   - Technical dotted names (package.json, .env.local)
  */
 function splitSentences(text) {
   if (!text || text.trim().length === 0) return [];
-  // Protect common abbreviations from false sentence splits
   let protected_ = text;
-  const abbreviations = ['Mr', 'Mrs', 'Ms', 'Dr', 'Prof', 'Sr', 'Jr', 'vs', 'etc', 'i.e', 'e.g', 'cf'];
+  // Protect dotted filenames (package.json, .env.local, auth.ts)
+  protected_ = protected_.replace(/[\w.-]+\.[a-z]{1,4}(?=[\s,;:)\]|]|$)/gi, (m) => m.replace(/\./g, '≈'));
+  // Protect version numbers (v0.9.2, 1.2.3)
+  protected_ = protected_.replace(/\bv?\d+\.\d+(?:\.\d+)*\b/g, (m) => m.replace(/\./g, '≈'));
+  // Protect URLs
+  protected_ = protected_.replace(/https?:\/\/[^\s)]+/g, (m) => m.replace(/\./g, '≈'));
+  // Protect common abbreviations
+  const abbreviations = ['Mr', 'Mrs', 'Ms', 'Dr', 'Prof', 'Sr', 'Jr', 'vs', 'etc', 'approx', 'incl'];
   for (const abbr of abbreviations) {
     const regex = new RegExp(`\\b${abbr}\\.`, 'gi');
-    protected_ = protected_.replace(regex, `${abbr}≈`);
+    protected_ = protected_.replace(regex, (m) => m.replace(/\./g, '≈'));
   }
+  // Protect e.g. and i.e. specifically (have dots in the abbreviation itself)
+  protected_ = protected_.replace(/\be\.g\./gi, 'e≈g≈');
+  protected_ = protected_.replace(/\bi\.e\./gi, 'i≈e≈');
+  // Protect Node.js, Vue.js, etc.
+  protected_ = protected_.replace(/\b(\w+)\.js\b/gi, '$1≈js');
   // Protect decimal numbers (3.14)
   protected_ = protected_.replace(/(\d)\.(\d)/g, '$1≈$2');
-  // Split on sentence-ending punctuation followed by space or end
-  const raw = protected_.split(/[.!?]+(?:\s+|$)/);
+  // Split on sentence-ending punctuation followed by whitespace/newline/end
+  const raw = protected_.split(/[.!?]+(?:\s+|\n|$)/);
-  // Restore protected characters and filter empties
+  // Restore protected characters and filter empties/fragments
   return raw
     .map(s => s.replace(/≈/g, '.').trim())
-    .filter(s => s.length > 3); // Ignore fragments under 4 chars
+    .filter(s => {
+      if (s.length < 10) return false;
+      return s.split(/\s+/).length >= 3;  // At least 3 words
+    });
 }
 /**
- * Count syllables in a word using a heuristic approach.
- * Based on the algorithm used in readability research:
- *   1. Count vowel groups
- *   2. Subtract silent-e at end
- *   3. Add back for specific suffixes (-le, -les, -tion, etc.)
- *   4. Minimum 1 syllable per word
+ * Count syllables with technical vocabulary normalization.
+ *
+ * Technical terms (DynamoDB, WebSocket, middleware) are normalized to
+ * 2 syllables. The target audience knows these terms — they don't make
+ * the text harder to read.
  */
 function countSyllables(word) {
   word = word.toLowerCase().replace(/[^a-z]/g, '');
   if (word.length <= 2) return 1;
-  // Exception list for common words with unusual syllable counts
+  // Technical vocabulary → 2 syllables (known terms)
+  if (TECH_VOCAB.has(word)) return 2;
   const exceptions = {
     'the': 1, 'are': 1, 'were': 1, 'have': 1, 'there': 1,
     'where': 1, 'here': 1, 'every': 3, 'everything': 4,
@@ -164,17 +232,16 @@ function countSyllables(word) {
   const vowelGroups = word.match(/[aeiouy]+/g);
   let count = vowelGroups ? vowelGroups.length : 1;
-  // Subtract silent-e at end (but not for words like "able", "ible")
+  // Subtract silent-e at end (but not -le, -ce, -ge)
   if (word.endsWith('e') && !word.endsWith('le') && !word.endsWith('ce') && !word.endsWith('ge')) {
     count--;
   }
-  // Subtract for common diphthong/double vowel endings
+  // Subtract for common past-tense endings
   if (word.endsWith('ed') && !word.endsWith('ted') && !word.endsWith('ded')) {
     count--;
   }
-  // Ensure minimum 1 syllable
   return Math.max(1, count);
 }
@@ -202,7 +269,6 @@ function tokenizeWords(text) {
 function measurePassiveVoice(sentences) {
   if (sentences.length === 0) return { ratio: 0, count: 0, total: 0 };
-  // Passive voice pattern: be-verb followed by past participle
   const passivePattern = /\b(is|was|were|been|being|are|be|am)\s+([\w]+\s+)?([\w]*(?:ed|en|wn|lt|nt|pt|ft|zed))\b/i;
   let passiveCount = 0;
@@ -221,11 +287,6 @@ function measurePassiveVoice(sentences) {
 /**
  * Ambiguous Pronoun Ratio (Structure, 3.0% weight in Understanding)
- *
- * Counts pronouns that lack clear antecedents: it, this, that, they, them, these, those.
- * In technical documentation, these often create confusion about what exactly is referenced.
- *
- * Returns ratio of ambiguous pronouns to total word count.
  */
 function measureAmbiguousPronouns(words) {
   if (words.length === 0) return { ratio: 0, count: 0, total: 0 };
@@ -250,30 +311,19 @@ function measureAmbiguousPronouns(words) {
 }
 /**
- * Atomicity Score (Structure, 9.0% weight in Understanding — HIGHEST)
- *
- * Measures how "atomic" (single-purpose) sentences are.
- * Compound sentences with and/or/also/additionally indicate non-atomic requirements.
- * IEEE 830 §4.1 recommends atomic requirements that can be independently verified.
- *
- * Returns ratio of NON-atomic sentences (compound) to total sentences.
+ * Atomicity Score (Structure, 9.0% weight — HIGHEST in Understanding)
  */
 function measureAtomicity(sentences) {
   if (sentences.length === 0) return { ratio: 0, count: 0, total: 0 };
-  // Compound indicators (sentence-level conjunctions, not word-level)
-  // We match these only when preceded/followed by spaces to avoid matching within words
   const compoundPattern = /\b(and also|and then|as well as|in addition to|additionally|furthermore|moreover)\b/i;
-  // Simple "and" / "or" — only flag if >1 occurrence in a sentence (natural language has legitimate single "and")
   const simpleCompound = /\band\b/gi;
-  const simpleOr = /\bor\b/gi;
   let compoundCount = 0;
   for (const sentence of sentences) {
     if (compoundPattern.test(sentence)) {
       compoundCount++;
     } else {
-      // Count simple "and" — 2+ indicates compound
       const andMatches = sentence.match(simpleCompound);
       if (andMatches && andMatches.length >= 2) {
         compoundCount++;
@@ -289,16 +339,8 @@ function measureAtomicity(sentences) {
 }
 /**
- * Flesch Reading Ease (Readability, 3.75% weight in Understanding)
- *
- * Formula: 206.835 - 1.015 * (total words / total sentences) - 84.6 * (total syllables / total words)
- * Source: Flesch, R. (1948). "A new readability yardstick." Journal of Applied Psychology.
- *
- * Scale: 0-100, higher = easier to read.
- *   90-100: Very Easy (5th grade)
- *   60-69:  Standard (8th-9th grade)
- *   30-49:  Difficult (college level)
- *   0-29:   Very Confusing (graduate level)
+ * Flesch Reading Ease (Readability)
+ * Formula: 206.835 - 1.015 * (words/sentences) - 84.6 * (syllables/words)
  */
 function measureFleschReadingEase(words, sentences) {
   if (words.length === 0 || sentences.length === 0) return 0;
@@ -312,12 +354,8 @@ function measureFleschReadingEase(words, sentences) {
 }
 /**
- * Flesch-Kincaid Grade Level (Readability, 2.25% weight in Understanding)
- *
- * Formula: 0.39 * (total words / total sentences) + 11.8 * (total syllables / total words) - 15.59
- * Source: Kincaid, J.P. et al. (1975). "Derivation of new readability formulas."
- *
- * Returns US grade level (8 = 8th grade, 12 = high school senior, 16+ = graduate)
+ * Flesch-Kincaid Grade Level (Readability)
+ * Formula: 0.39 * (words/sentences) + 11.8 * (syllables/words) - 15.59
  */
 function measureFleschKincaidGrade(words, sentences) {
   if (words.length === 0 || sentences.length === 0) return 0;
@@ -331,10 +369,7 @@ function measureFleschKincaidGrade(words, sentences) {
 }
 /**
- * Sentence Length (Cognitive, 3.0% weight in Understanding)
- *
- * Average words per sentence. Cognitive load research (Sweller, 1988) shows that
- * sentences over 25 words significantly increase processing effort.
+ * Sentence Length (Cognitive)
  */
 function measureSentenceLength(words, sentences) {
   if (sentences.length === 0) return 0;
@@ -342,11 +377,7 @@ function measureSentenceLength(words, sentences) {
 }
 /**
- * Negation Load (Cognitive, 1.5% weight in Understanding)
- *
- * Ratio of sentences containing negation words.
- * Negation increases cognitive load because readers must mentally invert meaning.
- * IEEE 830 §4.3 recommends positive phrasing in requirements.
+ * Negation Load (Cognitive)
  */
 function measureNegationLoad(sentences) {
   if (sentences.length === 0) return { ratio: 0, count: 0, total: 0 };
@@ -368,10 +399,7 @@ function measureNegationLoad(sentences) {
 }
 /**
- * Conditional Load (Cognitive, 1.5% weight in Understanding)
- *
- * Ratio of sentences containing conditional keywords.
- * Excessive conditionals make documentation hard to follow and test.
+ * Conditional Load (Cognitive)
  */
 function measureConditionalLoad(sentences) {
   if (sentences.length === 0) return { ratio: 0, count: 0, total: 0 };
@@ -400,6 +428,7 @@ function getReadabilityLabel(score) {
   if (score >= 60) return 'Standard';
   if (score >= 50) return 'Fairly Difficult';
   if (score >= 30) return 'Difficult';
+  if (score >= 15) return 'Hard — Technical';
   return 'Very Confusing';
 }
@@ -416,11 +445,9 @@ function getGradeLabel(grade) {
 /**
  * Check if the `understanding` CLI is available on the system.
- * Returns the path to the executable or null.
  */
 function findUnderstandingCli() {
   try {
-    // Use 'which' on Unix/Mac, 'where' on Windows — never redirect to NUL (creates file on Mac)
     const cmd = process.platform === 'win32' ? 'where understanding' : 'which understanding';
     const result = execSync(`${cmd} 2>/dev/null`, {
       encoding: 'utf-8',
@@ -434,7 +461,6 @@ function findUnderstandingCli() {
 /**
  * Run the `understanding` CLI on a file and parse results.
- * Returns understanding's quality score or null if it fails.
  */
 function runUnderstandingDeepScan(filePath) {
   try {
@@ -484,20 +510,22 @@ function getCanonicalDocs(projectDir) {
 /**
  * Analyze a single document and return per-metric results.
+ *
+ * Uses extractProse() instead of stripMarkdown() — only actual prose
+ * paragraphs are scored. Documents that are mostly tables/code/reference
+ * material are skipped for readability (they'd score 0/100 unfairly).
  */
 function analyzeDocument(doc) {
   const content = readFileSync(doc.path, 'utf-8');
-  const plainText = stripMarkdown(content);
-  if (plainText.length < 50) {
-    return { skipped: true, reason: 'too short', name: doc.name };
-  }
+  const proseText = extractProse(content);
-  const sentences = splitSentences(plainText);
-  const words = tokenizeWords(plainText);
+  const sentences = splitSentences(proseText);
+  const words = tokenizeWords(proseText);
-  if (sentences.length < 3 || words.length < 20) {
-    return { skipped: true, reason: 'insufficient content', name: doc.name };
+  // Skip if insufficient prose content
+  // Reference docs (mostly tables, code, lists) shouldn't be scored for readability
+  if (words.length < MIN_PROSE_WORDS || sentences.length < 3) {
+    return { skipped: true, reason: 'insufficient prose (reference document)', name: doc.name };
   }
   const passive = measurePassiveVoice(sentences);
@@ -539,7 +567,6 @@ export function validateDocQuality(projectDir, config) {
   const docs = getCanonicalDocs(projectDir);
   if (docs.length === 0) {
-    // No docs to analyze — structure validator catches this
     return results;
   }
@@ -606,7 +633,7 @@ export function validateDocQuality(projectDir, config) {
     } else {
       results.warnings.push(
         `${doc.name}: Reading level too high (grade ${m.fleschKincaidGrade} — ${getGradeLabel(m.fleschKincaidGrade)}). ` +
-        `Aim for grade 10-12 for technical docs`
+        `Aim for grade 12-16 for technical docs`
       );
     }
@@ -617,7 +644,7 @@ export function validateDocQuality(projectDir, config) {
     } else {
       results.warnings.push(
         `${doc.name}: Average sentence too long (${m.avgSentenceLength} words). ` +
-        `Target ≤25 words per sentence for readability (Sweller, 1988)`
+        `Target ≤30 words per sentence for readability`
       );
     }
@@ -644,11 +671,5 @@ export function validateDocQuality(projectDir, config) {
     }
   }
-  // ── Optional: Understanding deep scan note ──
-  if (!understandingCli && docs.length > 0) {
-    // Don't add as warning — just a note in verbose mode
-    // Users who want full 31-metric scan can install understanding
-  }
   return results;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "docguard-cli",
-  "version": "0.9.2",
+  "version": "0.9.4",
   "description": "The enforcement tool for Canonical-Driven Development (CDD). Audit, generate, and guard your project documentation.",
   "type": "module",
   "bin": {