@writechoice/mint-cli 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -32,6 +32,9 @@ writechoice check links docs.example.com http://localhost:3000
32
32
  # Fix broken anchor links
33
33
  writechoice fix links
34
34
 
35
+ # Fix MDX parsing errors (void tags, stray angle brackets)
36
+ writechoice fix parse
37
+
35
38
  # Generate config.json template
36
39
  writechoice config
37
40
  ```
@@ -94,6 +97,29 @@ writechoice fix links -r custom_report.json # Use custom report
94
97
 
95
98
  **Note:** Requires JSON report from `check links` command.
96
99
 
100
+ ### `fix parse`
101
+
102
+ Automatically fixes common MDX parsing errors: void HTML tags not self-closed and stray angle brackets in text.
103
+
104
+ ```bash
105
+ writechoice fix parse # Fix from check parse report
106
+ writechoice fix parse -f file.mdx # Fix a single file directly
107
+ writechoice fix parse -d docs # Fix files in a directory
108
+ writechoice fix parse -r custom_report.json # Use custom report
109
+ ```
110
+
111
+ **Common options:**
112
+ - `-r, --report <path>` - Path to JSON report (default: `mdx_errors_report.json`)
113
+ - `-f, --file <path>` - Fix a single MDX file directly
114
+ - `-d, --dir <path>` - Fix MDX files in a directory
115
+ - `--quiet` - Suppress output
116
+
117
+ **What it fixes:**
118
+ - Void tags: `<br>` → `<br />`, `<img src="x">` → `<img src="x" />`
119
+ - Stray brackets: `x < 10` → `x &lt; 10`, `y > 5` → `y &gt; 5`
120
+
121
+ Content inside code blocks and inline code is never modified.
122
+
97
123
  ### `update`
98
124
 
99
125
  Update CLI to latest version.
@@ -107,7 +133,8 @@ writechoice update
107
133
  - **MDX Parsing Validation** - Catch syntax errors before deployment
108
134
  - **Link Validation** - Test links against live websites with Playwright
109
135
  - **Two-Step Anchor Validation** - Compare production vs development anchors
110
- - **Auto-Fix** - Separate fix command to automatically correct broken anchor links
136
+ - **Auto-Fix Links** - Automatically correct broken anchor links
137
+ - **Auto-Fix Parsing** - Automatically fix void tags and stray angle brackets
111
138
  - **Dual Report Formats** - Generates both JSON (for automation) and Markdown (for humans)
112
139
  - **Configuration File** - Optional config.json for default settings
113
140
  - **CI/CD Ready** - Exit codes for pipeline integration
@@ -176,10 +203,21 @@ writechoice fix links
176
203
  # Fix from custom report
177
204
  writechoice fix links -r custom_report.json
178
205
 
206
+ # Fix MDX parsing errors
207
+ writechoice fix parse
208
+
209
+ # Fix a single file directly
210
+ writechoice fix parse -f docs/getting-started.mdx
211
+
179
212
  # Full workflow: validate -> fix -> re-validate
180
213
  writechoice check links docs.example.com
181
214
  writechoice fix links
182
215
  writechoice check links docs.example.com
216
+
217
+ # Full parse workflow: validate -> fix -> re-validate
218
+ writechoice check parse
219
+ writechoice fix parse
220
+ writechoice check parse
183
221
  ```
184
222
 
185
223
  ## Documentation
@@ -191,6 +229,7 @@ Detailed documentation is available in the [docs/](docs/) folder:
191
229
  - [check links](docs/commands/check-links.md) - Link validation
192
230
  - [check parse](docs/commands/check-parse.md) - MDX parsing validation
193
231
  - [fix links](docs/commands/fix-links.md) - Auto-fix broken links
232
+ - [fix parse](docs/commands/fix-parse.md) - Auto-fix MDX parsing errors
194
233
  - [update](docs/commands/update.md) - Update command
195
234
  - **Guides**
196
235
  - [Configuration File](docs/config-file.md) - Using config.json
@@ -222,7 +261,8 @@ writechoice-mint-cli/
222
261
  │ │ │ ├── links.js # Link validation
223
262
  │ │ │ └── mdx.js # MDX parsing validation
224
263
  │ │ └── fix/
225
- │ │ └── links.js # Link fixing
264
+ │ │ ├── links.js # Link fixing
265
+ │ │ └── parse.js # Parse error fixing
226
266
  │ └── utils/
227
267
  │ ├── helpers.js # Utility functions
228
268
  │ └── reports.js # Report generation
package/bin/cli.js CHANGED
@@ -95,6 +95,20 @@ fix
95
95
  await fixLinks(options);
96
96
  });
97
97
 
98
+ // Fix parse subcommand
99
+ fix
100
+ .command("parse")
101
+ .description("Fix common MDX parsing errors (void tags, stray angle brackets)")
102
+ .option("-r, --report <path>", "Path to parse validation report", "mdx_errors_report.json")
103
+ .option("-f, --file <path>", "Fix a single MDX file directly")
104
+ .option("-d, --dir <path>", "Fix MDX files in a specific directory")
105
+ .option("--quiet", "Suppress terminal output")
106
+ .action(async (options) => {
107
+ const { fixParse } = await import("../src/commands/fix/parse.js");
108
+ options.verbose = !options.quiet;
109
+ await fixParse(options);
110
+ });
111
+
98
112
  // Config command
99
113
  program
100
114
  .command("config")
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@writechoice/mint-cli",
3
- "version": "0.0.10",
3
+ "version": "0.0.12",
4
4
  "description": "CLI tool for Mintlify documentation validation and utilities",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -0,0 +1,447 @@
1
+ /**
2
+ * MDX Parse Fix Tool
3
+ *
4
+ * Fixes common MDX parsing errors in documentation files:
5
+ * 1. Void HTML tags not self-closed (<br> → <br />)
6
+ * 2. Stray < / > in text (escape to &lt; / &gt;)
7
+ *
8
+ * Skips content inside code fences and inline code.
9
+ */
10
+
11
+ import { existsSync, readdirSync, statSync, readFileSync, writeFileSync } from "fs";
12
+ import { join, relative, resolve } from "path";
13
+ import chalk from "chalk";
14
+
15
+ // Void HTML elements that must be self-closing in JSX/MDX
16
+ const VOID_ELEMENTS = [
17
+ "area", "base", "br", "col", "embed", "hr", "img",
18
+ "input", "link", "meta", "source", "track", "wbr",
19
+ ];
20
+
21
+ const VOID_PATTERN = new RegExp(
22
+ `<(${VOID_ELEMENTS.join("|")})(\\s[^>]*?)?\\s*(?<!\\/)>`,
23
+ "gi"
24
+ );
25
+
26
+ const EXCLUDED_DIRS = ["snippets", "node_modules", ".git"];
27
+
28
+ /**
29
+ * Finds MDX files to process
30
+ */
31
+ function findMdxFiles(repoRoot, directory = null, file = null) {
32
+ if (file) {
33
+ const fullPath = resolve(repoRoot, file);
34
+ return existsSync(fullPath) ? [fullPath] : [];
35
+ }
36
+
37
+ const searchDirs = directory
38
+ ? [resolve(repoRoot, directory)]
39
+ : [repoRoot];
40
+
41
+ const mdxFiles = [];
42
+
43
+ function walkDirectory(dir) {
44
+ const dirName = dir.split("/").pop();
45
+ if (EXCLUDED_DIRS.includes(dirName)) return;
46
+
47
+ try {
48
+ const entries = readdirSync(dir);
49
+ for (const entry of entries) {
50
+ const fullPath = join(dir, entry);
51
+ const stat = statSync(fullPath);
52
+ if (stat.isDirectory()) {
53
+ walkDirectory(fullPath);
54
+ } else if (stat.isFile() && entry.endsWith(".mdx")) {
55
+ mdxFiles.push(fullPath);
56
+ }
57
+ }
58
+ } catch (error) {
59
+ console.error(`Error reading directory ${dir}: ${error.message}`);
60
+ }
61
+ }
62
+
63
+ for (const dir of searchDirs) {
64
+ if (existsSync(dir)) walkDirectory(dir);
65
+ }
66
+
67
+ return mdxFiles.sort();
68
+ }
69
+
70
+ /**
71
+ * Gets file list from a parse report (only files with errors)
72
+ */
73
+ function getFilesFromReport(reportPath, repoRoot) {
74
+ if (!existsSync(reportPath)) return null;
75
+
76
+ try {
77
+ const report = JSON.parse(readFileSync(reportPath, "utf-8"));
78
+ const errorFiles = (report.errors || []).map((e) =>
79
+ resolve(repoRoot, e.filePath)
80
+ );
81
+ return errorFiles;
82
+ } catch (error) {
83
+ console.error(`Error reading report: ${error.message}`);
84
+ return null;
85
+ }
86
+ }
87
+
88
+ /**
89
+ * Splits file content into protected (code) and unprotected (text) segments.
90
+ * Returns an array of { text, protected } objects.
91
+ */
92
+ function segmentContent(content) {
93
+ const segments = [];
94
+ let pos = 0;
95
+ const len = content.length;
96
+
97
+ // State tracking for fenced code blocks
98
+ let inFence = false;
99
+ let fenceMarker = "";
100
+
101
+ const lines = content.split("\n");
102
+ let lineStart = 0;
103
+
104
+ for (let i = 0; i < lines.length; i++) {
105
+ const line = lines[i];
106
+ const lineEnd = lineStart + line.length;
107
+ const trimmed = line.trimStart();
108
+
109
+ // Check for fenced code block boundaries
110
+ if (!inFence) {
111
+ const fenceMatch = trimmed.match(/^(`{3,}|~{3,})/);
112
+ if (fenceMatch) {
113
+ // Push any text before this fence line
114
+ if (lineStart > pos) {
115
+ segments.push({ text: content.slice(pos, lineStart), protected: false });
116
+ }
117
+ inFence = true;
118
+ fenceMarker = fenceMatch[1][0].repeat(fenceMatch[1].length);
119
+ // This line is protected
120
+ segments.push({ text: content.slice(lineStart, lineEnd), protected: true });
121
+ pos = lineEnd;
122
+ lineStart = lineEnd + 1; // +1 for newline
123
+ continue;
124
+ }
125
+ } else {
126
+ // Check for closing fence
127
+ const closeMatch = trimmed.match(/^(`{3,}|~{3,})\s*$/);
128
+ if (closeMatch && closeMatch[1][0] === fenceMarker[0] && closeMatch[1].length >= fenceMarker.length) {
129
+ // Include this line as protected, then exit fence
130
+ segments.push({ text: content.slice(pos, lineEnd), protected: true });
131
+ pos = lineEnd;
132
+ inFence = false;
133
+ fenceMarker = "";
134
+ lineStart = lineEnd + 1;
135
+ continue;
136
+ }
137
+ // Still inside fence, continue
138
+ lineStart = lineEnd + 1;
139
+ continue;
140
+ }
141
+
142
+ lineStart = lineEnd + 1;
143
+ }
144
+
145
+ // Push remaining content
146
+ if (pos < content.length) {
147
+ segments.push({ text: content.slice(pos), protected: inFence });
148
+ }
149
+
150
+ return segments;
151
+ }
152
+
153
+ /**
154
+ * Fixes void HTML tags in a text segment (not inside inline code).
155
+ * Returns { text, count }.
156
+ */
157
+ function fixVoidTags(text) {
158
+ let count = 0;
159
+
160
+ // Process the text but protect inline code spans
161
+ const parts = [];
162
+ let lastIndex = 0;
163
+
164
+ // Match inline code: `...`
165
+ const inlineCodeRegex = /`[^`]+`/g;
166
+ let match;
167
+
168
+ while ((match = inlineCodeRegex.exec(text)) !== null) {
169
+ // Process text before this inline code
170
+ const before = text.slice(lastIndex, match.index);
171
+ const { text: fixed, count: c } = replaceVoidTags(before);
172
+ parts.push(fixed);
173
+ count += c;
174
+
175
+ // Keep inline code unchanged
176
+ parts.push(match[0]);
177
+ lastIndex = match.index + match[0].length;
178
+ }
179
+
180
+ // Process remaining text after last inline code
181
+ const remaining = text.slice(lastIndex);
182
+ const { text: fixed, count: c } = replaceVoidTags(remaining);
183
+ parts.push(fixed);
184
+ count += c;
185
+
186
+ return { text: parts.join(""), count };
187
+ }
188
+
189
+ /**
190
+ * Replaces non-self-closed void tags in a string
191
+ */
192
+ function replaceVoidTags(text) {
193
+ let count = 0;
194
+ const result = text.replace(VOID_PATTERN, (match, tag, attrs) => {
195
+ // Already self-closing check (belt and suspenders)
196
+ if (match.trimEnd().endsWith("/>")) return match;
197
+ count++;
198
+ const attrStr = attrs ? attrs.trimEnd() : "";
199
+ return `<${tag}${attrStr} />`;
200
+ });
201
+ return { text: result, count };
202
+ }
203
+
204
+ /**
205
+ * Fixes stray < and > in a text segment (not inside inline code or tags).
206
+ * Returns { text, count }.
207
+ */
208
+ function fixStrayAngleBrackets(text) {
209
+ let count = 0;
210
+
211
+ // Process the text but protect inline code spans and valid tags
212
+ const parts = [];
213
+ let lastIndex = 0;
214
+
215
+ // Match inline code or valid HTML/JSX tags (opening, closing, self-closing, comments)
216
+ const protectedRegex = /`[^`]+`|<\/[a-zA-Z][a-zA-Z0-9]*\s*>|<[a-zA-Z][a-zA-Z0-9]*(?:\s[^>]*)?\s*\/?>|<!--[\s\S]*?-->|<![^>]*>/g;
217
+ let match;
218
+
219
+ while ((match = protectedRegex.exec(text)) !== null) {
220
+ // Process text before this protected span
221
+ const before = text.slice(lastIndex, match.index);
222
+ const { text: fixed, count: c } = escapeStrayBrackets(before);
223
+ parts.push(fixed);
224
+ count += c;
225
+
226
+ // Keep protected span unchanged
227
+ parts.push(match[0]);
228
+ lastIndex = match.index + match[0].length;
229
+ }
230
+
231
+ // Process remaining text
232
+ const remaining = text.slice(lastIndex);
233
+ const { text: fixed, count: c } = escapeStrayBrackets(remaining);
234
+ parts.push(fixed);
235
+ count += c;
236
+
237
+ return { text: parts.join(""), count };
238
+ }
239
+
240
+ /**
241
+ * Escapes stray < and > in plain text (no tags or code present)
242
+ */
243
+ function escapeStrayBrackets(text) {
244
+ let count = 0;
245
+
246
+ // Also protect MDX expressions {}, JSX attribute patterns, and frontmatter
247
+ // Escape < that is NOT the start of a valid tag
248
+ let result = text.replace(/</g, (match, offset) => {
249
+ const after = text.slice(offset + 1);
250
+ // Valid tag starts: letter, /, !
251
+ if (/^[a-zA-Z\/!]/.test(after)) return match;
252
+ count++;
253
+ return "&lt;";
254
+ });
255
+
256
+ // Escape > that is NOT part of a blockquote or tag end
257
+ // Only escape > that appears to be in running text (preceded by space/word char)
258
+ const srcText = result;
259
+ let countGt = 0;
260
+ result = result.replace(/>/g, (match, offset) => {
261
+ // Keep > at start of line (blockquote syntax)
262
+ const lineStart = srcText.lastIndexOf("\n", offset - 1) + 1;
263
+ const beforeOnLine = srcText.slice(lineStart, offset).trimStart();
264
+ if (beforeOnLine === "" || /^>+$/.test(beforeOnLine)) return match;
265
+
266
+ // Keep > that looks like it closes a tag (preceded by tag-like content)
267
+ // This shouldn't happen since valid tags are protected above, but be safe
268
+ const before = srcText.slice(Math.max(0, offset - 1), offset);
269
+ if (/[a-zA-Z0-9"'\/\-]/.test(before)) {
270
+ // Could be end of tag — but tags should already be protected.
271
+ // In plain text, this is likely stray (e.g., "a > b")
272
+ // Only escape if it looks like a comparison/text context
273
+ const surroundBefore = srcText.slice(Math.max(0, offset - 2), offset);
274
+ const afterChar = srcText[offset + 1] || "";
275
+ if (/\s/.test(surroundBefore[0]) && /[\s\w]/.test(afterChar)) {
276
+ countGt++;
277
+ return "&gt;";
278
+ }
279
+ return match;
280
+ }
281
+
282
+ countGt++;
283
+ return "&gt;";
284
+ });
285
+
286
+ return { text: result, count: count + countGt };
287
+ }
288
+
289
+ /**
290
+ * Applies all fixes to a single file
291
+ * Returns { voidTagFixes, strayBracketFixes }
292
+ */
293
+ function fixFile(filePath) {
294
+ const content = readFileSync(filePath, "utf-8");
295
+ const segments = segmentContent(content);
296
+
297
+ let totalVoidFixes = 0;
298
+ let totalBracketFixes = 0;
299
+
300
+ const fixedSegments = segments.map((seg) => {
301
+ if (seg.protected) return seg.text;
302
+
303
+ // Apply void tag fixes first
304
+ const { text: afterVoid, count: voidCount } = fixVoidTags(seg.text);
305
+ totalVoidFixes += voidCount;
306
+
307
+ // Then apply stray bracket fixes
308
+ const { text: afterBrackets, count: bracketCount } = fixStrayAngleBrackets(afterVoid);
309
+ totalBracketFixes += bracketCount;
310
+
311
+ return afterBrackets;
312
+ });
313
+
314
+ const fixedContent = fixedSegments.join("");
315
+
316
+ if (fixedContent !== content) {
317
+ writeFileSync(filePath, fixedContent, "utf-8");
318
+ }
319
+
320
+ return { voidTagFixes: totalVoidFixes, strayBracketFixes: totalBracketFixes };
321
+ }
322
+
323
+ /**
324
+ * Main CLI function for fixing parse errors
325
+ */
326
+ export async function fixParse(options) {
327
+ const repoRoot = process.cwd();
328
+
329
+ if (!options.quiet) {
330
+ console.log(chalk.bold("\n\uD83D\uDD27 MDX Parse Fixer\n"));
331
+ }
332
+
333
+ // Determine which files to fix
334
+ let files;
335
+
336
+ if (options.file || options.dir) {
337
+ // Direct file/dir mode — no report needed
338
+ files = findMdxFiles(repoRoot, options.dir, options.file);
339
+
340
+ if (files.length === 0) {
341
+ console.error("No MDX files found.");
342
+ process.exit(1);
343
+ }
344
+
345
+ if (!options.quiet) {
346
+ console.log(`Found ${files.length} MDX file(s) to process\n`);
347
+ }
348
+ } else {
349
+ // Report mode
350
+ const reportPath = options.report || "mdx_errors_report.json";
351
+
352
+ if (!existsSync(reportPath)) {
353
+ console.error(chalk.red(`\n\u2717 Error: Report file not found: ${reportPath}`));
354
+
355
+ if (reportPath.endsWith(".md")) {
356
+ const jsonPath = reportPath.replace(/\.md$/, ".json");
357
+ console.error(chalk.yellow(`\n\u26A0\uFE0F The fix command requires a JSON report file.`));
358
+ console.error(chalk.yellow(`Try using: ${chalk.cyan(jsonPath)}`));
359
+ } else {
360
+ console.error(chalk.yellow(`\n\u26A0\uFE0F Make sure to run the validation command first:`));
361
+ console.error(chalk.gray(` writechoice check parse`));
362
+ }
363
+
364
+ process.exit(1);
365
+ }
366
+
367
+ if (!reportPath.endsWith(".json")) {
368
+ console.error(chalk.red(`\n\u2717 Error: The fix command requires a JSON report file.`));
369
+ console.error(chalk.yellow(`\nProvided file: ${reportPath}`));
370
+
371
+ if (reportPath.endsWith(".md")) {
372
+ const jsonPath = reportPath.replace(/\.md$/, ".json");
373
+ console.error(chalk.yellow(`\nThe markdown (.md) report is for human readability only.`));
374
+ console.error(chalk.yellow(`Please use the JSON report instead: ${chalk.cyan(jsonPath)}`));
375
+ }
376
+
377
+ process.exit(1);
378
+ }
379
+
380
+ if (!options.quiet) {
381
+ console.log(`Reading report: ${chalk.cyan(reportPath)}`);
382
+ }
383
+
384
+ files = getFilesFromReport(reportPath, repoRoot);
385
+
386
+ if (!files || files.length === 0) {
387
+ if (!options.quiet) {
388
+ console.log(chalk.yellow("\n\u26A0\uFE0F No files with errors found in report."));
389
+ }
390
+ return;
391
+ }
392
+
393
+ if (!options.quiet) {
394
+ console.log(`Found ${files.length} file(s) with errors\n`);
395
+ }
396
+ }
397
+
398
+ // Apply fixes
399
+ const results = {};
400
+ let totalVoid = 0;
401
+ let totalBracket = 0;
402
+
403
+ for (const filePath of files) {
404
+ if (!existsSync(filePath)) {
405
+ if (options.verbose) {
406
+ console.log(`Warning: File not found: ${filePath}`);
407
+ }
408
+ continue;
409
+ }
410
+
411
+ const { voidTagFixes, strayBracketFixes } = fixFile(filePath);
412
+ const totalFixes = voidTagFixes + strayBracketFixes;
413
+
414
+ if (totalFixes > 0) {
415
+ const relPath = relative(repoRoot, filePath);
416
+ results[relPath] = { voidTagFixes, strayBracketFixes };
417
+ totalVoid += voidTagFixes;
418
+ totalBracket += strayBracketFixes;
419
+
420
+ if (options.verbose) {
421
+ console.log(`Fixed ${chalk.cyan(relPath)}: ${voidTagFixes} void tag(s), ${strayBracketFixes} stray bracket(s)`);
422
+ }
423
+ }
424
+ }
425
+
426
+ // Summary
427
+ if (!options.quiet) {
428
+ const fileCount = Object.keys(results).length;
429
+ const totalFixes = totalVoid + totalBracket;
430
+
431
+ if (fileCount > 0) {
432
+ console.log(chalk.green(`\n\u2713 Fixed ${totalFixes} issue(s) in ${fileCount} file(s):\n`));
433
+
434
+ for (const [filePath, counts] of Object.entries(results)) {
435
+ const details = [];
436
+ if (counts.voidTagFixes > 0) details.push(`${counts.voidTagFixes} void tag(s)`);
437
+ if (counts.strayBracketFixes > 0) details.push(`${counts.strayBracketFixes} stray bracket(s)`);
438
+ console.log(` ${chalk.cyan(filePath)}: ${details.join(", ")}`);
439
+ }
440
+
441
+ console.log(chalk.yellow("\n\u26A0\uFE0F Run validation again to verify the fixes:"));
442
+ console.log(chalk.gray(" writechoice check parse"));
443
+ } else {
444
+ console.log(chalk.yellow("\n\u26A0\uFE0F No fixable issues found."));
445
+ }
446
+ }
447
+ }
@@ -398,6 +398,76 @@ function findMdxFiles(repoRoot, directory = null, file = null) {
398
398
 
399
399
  // Playwright Validation Functions
400
400
 
401
+ /**
402
+ * Find a heading on the page by matching its text with the link text.
403
+ * Considers exact matches, partial matches, and phrase variations.
404
+ *
405
+ * @param {Page} page - Playwright page object
406
+ * @param {string} linkText - The text from the link (e.g., "Document Picker Security Settings")
407
+ * @param {boolean} verbose - Whether to log verbose output
408
+ * @returns {Promise<{heading: ElementHandle|null, text: string|null, index: number}>}
409
+ */
410
+ async function findHeadingByText(page, linkText, verbose = false) {
411
+ const allHeadings = await page.$$("h1, h2, h3, h4, h5, h6");
412
+
413
+ // Normalize the link text for comparison
414
+ const normalizedLinkText = linkText.toLowerCase().trim();
415
+
416
+ // Store potential matches with their scores
417
+ const matches = [];
418
+
419
+ for (let i = 0; i < allHeadings.length; i++) {
420
+ const heading = allHeadings[i];
421
+ const headingText = await heading.innerText();
422
+ const headingTextClean = cleanHeadingText(headingText);
423
+ const normalizedHeadingText = headingTextClean.toLowerCase().trim();
424
+
425
+ // Exact match (highest priority)
426
+ if (normalizedHeadingText === normalizedLinkText) {
427
+ matches.push({ heading, text: headingTextClean, index: i, score: 100 });
428
+ continue;
429
+ }
430
+
431
+ // Check if link text is contained in heading (complete phrase match)
432
+ if (normalizedHeadingText.includes(normalizedLinkText)) {
433
+ matches.push({ heading, text: headingTextClean, index: i, score: 80 });
434
+ continue;
435
+ }
436
+
437
+ // Check if heading is contained in link text (link text might be more specific)
438
+ if (normalizedLinkText.includes(normalizedHeadingText)) {
439
+ matches.push({ heading, text: headingTextClean, index: i, score: 70 });
440
+ continue;
441
+ }
442
+
443
+ // Check word-by-word match (all words from link text appear in heading)
444
+ const linkWords = normalizedLinkText.split(/\s+/);
445
+ const headingWords = normalizedHeadingText.split(/\s+/);
446
+ const matchingWords = linkWords.filter(word => headingWords.includes(word));
447
+
448
+ if (matchingWords.length === linkWords.length && linkWords.length >= 2) {
449
+ // All words match
450
+ matches.push({ heading, text: headingTextClean, index: i, score: 60 });
451
+ } else if (matchingWords.length >= Math.ceil(linkWords.length * 0.7) && linkWords.length >= 3) {
452
+ // At least 70% of words match (for longer phrases)
453
+ matches.push({ heading, text: headingTextClean, index: i, score: 50 });
454
+ }
455
+ }
456
+
457
+ // Sort by score (highest first)
458
+ matches.sort((a, b) => b.score - a.score);
459
+
460
+ if (matches.length > 0) {
461
+ const bestMatch = matches[0];
462
+ if (verbose) {
463
+ console.log(`${DEFAULT_SPACE}Found heading by text match (score: ${bestMatch.score}): "${bestMatch.text}"`);
464
+ }
465
+ return { heading: bestMatch.heading, text: bestMatch.text, index: bestMatch.index };
466
+ }
467
+
468
+ return { heading: null, text: null, index: -1 };
469
+ }
470
+
401
471
  async function validateAnchor(page, link, baseUrl, validationBaseUrl, repoRoot, verbose = false, progress = "") {
402
472
  const startTime = Date.now();
403
473
 
@@ -475,22 +545,39 @@ async function validateAnchor(page, link, baseUrl, validationBaseUrl, repoRoot,
475
545
  }
476
546
  }
477
547
 
548
+ // If anchor not found by ID, try to find heading by text
478
549
  if (!targetHeading) {
479
- const validationTargetUrl = link.basePath.replace(baseUrl, validationBaseUrl);
480
- return new ValidationResult(
481
- link.source,
482
- link.targetUrl, // sourceUrl (production)
483
- validationTargetUrl, // targetUrl (validation)
484
- link.basePath,
485
- link.anchor,
486
- link.expectedSlug,
487
- "failure",
488
- null,
489
- null,
490
- null,
491
- `Anchor #${link.anchor} not found on base URL page`,
492
- Date.now() - startTime,
493
- );
550
+ if (verbose) {
551
+ console.log(`${DEFAULT_SPACE}Anchor #${link.anchor} not found by ID, searching by text...`);
552
+ }
553
+
554
+ // Try to find the heading by matching the link text
555
+ const textMatch = await findHeadingByText(page, link.source.linkText, verbose);
556
+
557
+ if (!textMatch.heading) {
558
+ const validationTargetUrl = link.basePath.replace(baseUrl, validationBaseUrl);
559
+ return new ValidationResult(
560
+ link.source,
561
+ link.targetUrl, // sourceUrl (production)
562
+ validationTargetUrl, // targetUrl (validation)
563
+ link.basePath,
564
+ link.anchor,
565
+ link.expectedSlug,
566
+ "failure",
567
+ null,
568
+ null,
569
+ null,
570
+ `Anchor #${link.anchor} not found on base URL page. Also could not find a heading matching "${link.source.linkText}"`,
571
+ Date.now() - startTime,
572
+ );
573
+ }
574
+
575
+ // Found a heading by text! Use it as the target
576
+ targetHeading = textMatch.heading;
577
+
578
+ if (verbose) {
579
+ console.log(`${DEFAULT_SPACE}✓ Using heading found by text match: "${textMatch.text}"`);
580
+ }
494
581
  }
495
582
 
496
583
  // Get the actual heading text from the base URL
@@ -552,20 +639,37 @@ async function validateAnchor(page, link, baseUrl, validationBaseUrl, repoRoot,
552
639
 
553
640
  if (matchingHeadings.length === 0) {
554
641
  const validationTargetUrl = validationUrl;
555
- return new ValidationResult(
556
- link.source,
557
- link.targetUrl, // sourceUrl (production)
558
- validationTargetUrl, // targetUrl (validation)
559
- link.basePath,
560
- link.anchor,
561
- link.expectedSlug,
562
- "failure",
563
- null,
564
- actualHeadingTextClean,
565
- null,
566
- `Heading "${actualHeadingTextClean}" found on base URL but not on validation URL`,
567
- Date.now() - startTime,
568
- );
642
+
643
+ // Try to find heading by text on validation page as well
644
+ if (verbose) {
645
+ console.log(`${DEFAULT_SPACE}Heading not found by text on validation page, trying broader search...`);
646
+ }
647
+
648
+ const validationTextMatch = await findHeadingByText(page, link.source.linkText, verbose);
649
+
650
+ if (validationTextMatch.heading) {
651
+ if (verbose) {
652
+ console.log(`${DEFAULT_SPACE}Found alternative heading on validation page: "${validationTextMatch.text}"`);
653
+ }
654
+
655
+ // Use this heading instead
656
+ matchingHeadings.push(validationTextMatch.heading);
657
+ } else {
658
+ return new ValidationResult(
659
+ link.source,
660
+ link.targetUrl, // sourceUrl (production)
661
+ validationTargetUrl, // targetUrl (validation)
662
+ link.basePath,
663
+ link.anchor,
664
+ link.expectedSlug,
665
+ "failure",
666
+ null,
667
+ actualHeadingTextClean,
668
+ null,
669
+ `Heading "${actualHeadingTextClean}" found on base URL but not on validation URL`,
670
+ Date.now() - startTime,
671
+ );
672
+ }
569
673
  }
570
674
 
571
675
  // Use the same index to handle duplicate headings
@@ -614,6 +718,10 @@ async function validateAnchor(page, link, baseUrl, validationBaseUrl, repoRoot,
614
718
 
615
719
  if (!generatedAnchor) {
616
720
  const validationTargetUrl = validationUrl;
721
+
722
+ // Suggest a kebab-case anchor based on the heading text
723
+ const suggestedAnchor = toKebabCase(actualHeadingTextClean);
724
+
617
725
  return new ValidationResult(
618
726
  link.source,
619
727
  link.targetUrl, // sourceUrl (production)
@@ -625,7 +733,7 @@ async function validateAnchor(page, link, baseUrl, validationBaseUrl, repoRoot,
625
733
  null,
626
734
  actualHeadingTextClean,
627
735
  null,
628
- `Could not extract generated anchor after clicking heading "${actualHeadingTextClean}"`,
736
+ `Could not extract generated anchor after clicking heading "${actualHeadingTextClean}". Suggested anchor based on heading: #${suggestedAnchor}`,
629
737
  Date.now() - startTime,
630
738
  );
631
739
  }
@@ -661,7 +769,7 @@ async function validateAnchor(page, link, baseUrl, validationBaseUrl, repoRoot,
661
769
  null,
662
770
  actualHeadingTextClean,
663
771
  generatedAnchor,
664
- `Expected anchor "#${link.anchor}" but page generates "#${generatedAnchor}" for heading "${actualHeadingTextClean}"`,
772
+ `Expected anchor "#${link.anchor}" but page generates "#${generatedAnchor}" for heading "${actualHeadingTextClean}". Suggestion: Update link to use #${generatedAnchor}`,
665
773
  Date.now() - startTime,
666
774
  );
667
775
  }