@writechoice/mint-cli 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -25
- package/bin/cli.js +4 -2
- package/package.json +1 -1
- package/src/commands/validate/links.js +292 -107
- package/src/utils/helpers.js +5 -0
package/README.md
CHANGED
|
@@ -47,7 +47,7 @@ Check the installed version:
|
|
|
47
47
|
```bash
|
|
48
48
|
writechoice --version
|
|
49
49
|
# or
|
|
50
|
-
writechoice -
|
|
50
|
+
writechoice -v
|
|
51
51
|
```
|
|
52
52
|
|
|
53
53
|
### Update to Latest Version
|
|
@@ -74,6 +74,37 @@ You can also omit the `https://` prefix:
|
|
|
74
74
|
writechoice check links docs.example.com
|
|
75
75
|
```
|
|
76
76
|
|
|
77
|
+
**Using a Validation Base URL**
|
|
78
|
+
|
|
79
|
+
When validating anchor links online, the tool can use a different base URL (e.g., a local development server or staging environment) to click on headings and extract the generated anchors:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
# Use localhost:3000 for validation (default)
|
|
83
|
+
writechoice check links docs.example.com
|
|
84
|
+
|
|
85
|
+
# Use a custom validation URL
|
|
86
|
+
writechoice check links docs.example.com http://localhost:3000
|
|
87
|
+
|
|
88
|
+
# Use a staging environment
|
|
89
|
+
writechoice check links docs.example.com https://staging.example.com
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
The validation base URL is only used for online checks. Local file validation remains unchanged for optimal performance.
|
|
93
|
+
|
|
94
|
+
**How the two-step validation works:**
|
|
95
|
+
|
|
96
|
+
For anchor links, the tool performs a smart validation:
|
|
97
|
+
|
|
98
|
+
1. Navigates to your production docs (base URL) to find the actual heading the anchor points to
|
|
99
|
+
2. Then navigates to your local dev server (validation URL) and clicks the same heading to see what anchor it generates
|
|
100
|
+
3. Compares the two anchors to detect mismatches
|
|
101
|
+
|
|
102
|
+
This is useful because:
|
|
103
|
+
|
|
104
|
+
- Link text in MDX files may differ from actual heading text
|
|
105
|
+
- Handles pages with duplicate headings correctly by matching position
|
|
106
|
+
- Validates against your local development environment before deploying
|
|
107
|
+
|
|
77
108
|
### Common Options
|
|
78
109
|
|
|
79
110
|
```bash
|
|
@@ -110,19 +141,20 @@ writechoice check links docs.example.com --fix-from-report custom_report.json
|
|
|
110
141
|
|
|
111
142
|
### Complete Options
|
|
112
143
|
|
|
113
|
-
| Option | Alias | Description
|
|
114
|
-
| -------------------------- | ----- |
|
|
115
|
-
| `<baseUrl>` | - | Base URL for the documentation site (required, with or without https://)
|
|
116
|
-
|
|
|
117
|
-
| `--
|
|
118
|
-
| `--
|
|
119
|
-
| `--
|
|
120
|
-
| `--
|
|
121
|
-
| `--
|
|
122
|
-
| `--
|
|
123
|
-
| `--
|
|
124
|
-
| `--
|
|
125
|
-
| `--fix
|
|
144
|
+
| Option | Alias | Description | Default |
|
|
145
|
+
| -------------------------- | ----- | ------------------------------------------------------------------------- | ----------------------- |
|
|
146
|
+
| `<baseUrl>` | - | Base URL for the documentation site (required, with or without https://) | - |
|
|
147
|
+
| `[validationBaseUrl]` | - | Base URL for online validation (optional, clicks headings to get anchors) | `http://localhost:3000` |
|
|
148
|
+
| `--file <path>` | `-f` | Validate links in a single MDX file | - |
|
|
149
|
+
| `--dir <path>` | `-d` | Validate links in a specific directory | - |
|
|
150
|
+
| `--output <path>` | `-o` | Output path for JSON report | `links_report.json` |
|
|
151
|
+
| `--dry-run` | - | Extract and show links without validating | `false` |
|
|
152
|
+
| `--quiet` | - | Suppress terminal output (only generate report) | `false` |
|
|
153
|
+
| `--concurrency <number>` | `-c` | Number of concurrent browser tabs | `25` |
|
|
154
|
+
| `--headless` | - | Run browser in headless mode | `true` |
|
|
155
|
+
| `--no-headless` | - | Show browser window (for debugging) | - |
|
|
156
|
+
| `--fix` | - | Automatically fix anchor links in MDX files | `false` |
|
|
157
|
+
| `--fix-from-report [path]` | - | Fix anchor links from report file (optional path) | `links_report.json` |
|
|
126
158
|
|
|
127
159
|
**Note:** Detailed progress output is shown by default. Use `--quiet` to suppress terminal output.
|
|
128
160
|
|
|
@@ -138,15 +170,23 @@ The tool extracts internal links from MDX files in the following formats:
|
|
|
138
170
|
4. **JSX Button components**: `<Button href="/path/to/page#anchor">Button Text</Button>`
|
|
139
171
|
|
|
140
172
|
**Images are automatically ignored:**
|
|
173
|
+
|
|
141
174
|
- Markdown images: ``
|
|
142
175
|
- HTML images: `<img src="./image.png" />`
|
|
143
176
|
|
|
144
177
|
### Validation Process
|
|
145
178
|
|
|
146
179
|
1. **Local Validation**: First checks if the target MDX file exists locally
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
180
|
+
- For normal links: Verifies the file exists in the repository
|
|
181
|
+
- For anchor links: Checks if the heading exists in the MDX file with matching kebab-case format
|
|
182
|
+
2. **Online Validation**: If local check fails, performs a two-step validation process
|
|
183
|
+
- For normal links: Navigates to the validation base URL and verifies the page loads successfully
|
|
184
|
+
- For anchor links (two-step process):
|
|
185
|
+
1. **Step 1 - Find the target heading**: Navigates to the base URL (production docs) with the anchor to identify which heading the anchor points to and its position (handles duplicate headings)
|
|
186
|
+
2. **Step 2 - Get generated anchor**: Navigates to the validation base URL (e.g., localhost:3000), finds the same heading (by text and position), clicks it to trigger anchor generation, and extracts the generated anchor from the URL
|
|
187
|
+
3. Compares the generated anchor with the expected anchor from the MDX file
|
|
188
|
+
3. **Validation Base URL**: By default uses `http://localhost:3000` for online validation, or you can specify a custom URL (e.g., staging environment). This allows testing against a local development server or staging environment while validating links meant for production.
|
|
189
|
+
4. **Auto-Fix**: When issues are found, can automatically update MDX files with the correct anchors
|
|
150
190
|
|
|
151
191
|
### Report Format
|
|
152
192
|
|
|
@@ -168,14 +208,6 @@ The tool generates a JSON report with the following structure:
|
|
|
168
208
|
"failure": 8,
|
|
169
209
|
"error": 2
|
|
170
210
|
},
|
|
171
|
-
"summary_by_file": {
|
|
172
|
-
"docs/getting-started.mdx": {
|
|
173
|
-
"total": 10,
|
|
174
|
-
"success": 9,
|
|
175
|
-
"failure": 1,
|
|
176
|
-
"error": 0
|
|
177
|
-
}
|
|
178
|
-
},
|
|
179
211
|
"results_by_file": {
|
|
180
212
|
"docs/getting-started.mdx": [
|
|
181
213
|
{
|
package/bin/cli.js
CHANGED
|
@@ -24,7 +24,7 @@ const check = program.command("check").description("Validation commands for docu
|
|
|
24
24
|
|
|
25
25
|
// Validate links subcommand
|
|
26
26
|
check
|
|
27
|
-
.command("links <baseUrl>")
|
|
27
|
+
.command("links <baseUrl> [validationBaseUrl]")
|
|
28
28
|
.description("Validate internal links and anchors in MDX documentation files")
|
|
29
29
|
.option("-f, --file <path>", "Validate links in a single MDX file")
|
|
30
30
|
.option("-d, --dir <path>", "Validate links in a specific directory")
|
|
@@ -36,10 +36,12 @@ check
|
|
|
36
36
|
.option("--no-headless", "Show browser window (for debugging)")
|
|
37
37
|
.option("--fix", "Automatically fix anchor links in MDX files")
|
|
38
38
|
.option("--fix-from-report [path]", "Fix anchor links from report file (default: links_report.json)")
|
|
39
|
-
.action(async (baseUrl, options) => {
|
|
39
|
+
.action(async (baseUrl, validationBaseUrl, options) => {
|
|
40
40
|
const { validateLinks } = await import("../src/commands/validate/links.js");
|
|
41
41
|
// Verbose is now default (true unless --quiet is specified)
|
|
42
42
|
options.verbose = !options.quiet;
|
|
43
|
+
// Set validation base URL to localhost:3000 if not provided
|
|
44
|
+
options.validationBaseUrl = validationBaseUrl || "http://localhost:3000";
|
|
43
45
|
await validateLinks(baseUrl, options);
|
|
44
46
|
});
|
|
45
47
|
|
package/package.json
CHANGED
|
@@ -26,6 +26,8 @@ const __filename = fileURLToPath(import.meta.url);
|
|
|
26
26
|
const __dirname = dirname(__filename);
|
|
27
27
|
|
|
28
28
|
// Configuration
|
|
29
|
+
|
|
30
|
+
const DEFAULT_SPACE = " ";
|
|
29
31
|
const DEFAULT_BASE_URL = "https://docs.nebius.com";
|
|
30
32
|
const EXCLUDED_DIRS = ["snippets"];
|
|
31
33
|
const MDX_DIRS = ["."];
|
|
@@ -44,12 +46,14 @@ const LINK_PATTERNS = {
|
|
|
44
46
|
|
|
45
47
|
// Data Structures
|
|
46
48
|
class LinkLocation {
|
|
47
|
-
constructor(filePath, lineNumber, linkText, rawHref, linkType) {
|
|
49
|
+
constructor(filePath, lineNumber, linkText, rawHref, linkType, sourceUrl, targetUrl) {
|
|
48
50
|
this.filePath = filePath;
|
|
49
51
|
this.lineNumber = lineNumber;
|
|
50
52
|
this.linkText = linkText;
|
|
51
53
|
this.rawHref = rawHref;
|
|
52
54
|
this.linkType = linkType;
|
|
55
|
+
this.sourceUrl = sourceUrl;
|
|
56
|
+
this.targetUrl = targetUrl;
|
|
53
57
|
}
|
|
54
58
|
}
|
|
55
59
|
|
|
@@ -66,6 +70,7 @@ class Link {
|
|
|
66
70
|
class ValidationResult {
|
|
67
71
|
constructor(
|
|
68
72
|
source,
|
|
73
|
+
sourceUrl,
|
|
69
74
|
targetUrl,
|
|
70
75
|
basePath,
|
|
71
76
|
anchor,
|
|
@@ -73,11 +78,12 @@ class ValidationResult {
|
|
|
73
78
|
status,
|
|
74
79
|
actualUrl = null,
|
|
75
80
|
actualHeading = null,
|
|
76
|
-
|
|
81
|
+
actualHeadingAnchor = null,
|
|
77
82
|
errorMessage = null,
|
|
78
83
|
validationTimeMs = 0,
|
|
79
84
|
) {
|
|
80
85
|
this.source = source;
|
|
86
|
+
this.sourceUrl = sourceUrl;
|
|
81
87
|
this.targetUrl = targetUrl;
|
|
82
88
|
this.basePath = basePath;
|
|
83
89
|
this.anchor = anchor;
|
|
@@ -85,7 +91,7 @@ class ValidationResult {
|
|
|
85
91
|
this.status = status;
|
|
86
92
|
this.actualUrl = actualUrl;
|
|
87
93
|
this.actualHeading = actualHeading;
|
|
88
|
-
this.
|
|
94
|
+
this.actualHeadingAnchor = actualHeadingAnchor;
|
|
89
95
|
this.errorMessage = errorMessage;
|
|
90
96
|
this.validationTimeMs = validationTimeMs;
|
|
91
97
|
}
|
|
@@ -199,7 +205,7 @@ function extractMdxHeadings(filePath) {
|
|
|
199
205
|
}
|
|
200
206
|
}
|
|
201
207
|
|
|
202
|
-
function extractLinksFromFile(filePath, baseUrl, repoRoot, verbose = false) {
|
|
208
|
+
function extractLinksFromFile(filePath, baseUrl, validationBaseUrl, repoRoot, verbose = false) {
|
|
203
209
|
if (verbose) {
|
|
204
210
|
console.log(` Extracting links from ${relative(repoRoot, filePath)}`);
|
|
205
211
|
}
|
|
@@ -215,6 +221,12 @@ function extractLinksFromFile(filePath, baseUrl, repoRoot, verbose = false) {
|
|
|
215
221
|
const { cleanedContent } = removeCodeBlocksAndFrontmatter(content);
|
|
216
222
|
const links = [];
|
|
217
223
|
|
|
224
|
+
// Calculate source URLs from file path
|
|
225
|
+
const relativeFilePath = relative(repoRoot, filePath);
|
|
226
|
+
const urlPath = relativeFilePath.replace(/\.mdx$/, "").replace(/\/index$/, "");
|
|
227
|
+
const fileSourceUrl = normalizeUrl(`${baseUrl}/${urlPath}`);
|
|
228
|
+
const fileTargetUrl = normalizeUrl(`${validationBaseUrl}/${urlPath}`);
|
|
229
|
+
|
|
218
230
|
// Collect all image positions to skip them
|
|
219
231
|
const imagePositions = new Set();
|
|
220
232
|
|
|
@@ -253,6 +265,8 @@ function extractLinksFromFile(filePath, baseUrl, repoRoot, verbose = false) {
|
|
|
253
265
|
linkText.trim(),
|
|
254
266
|
href,
|
|
255
267
|
"markdown",
|
|
268
|
+
fileSourceUrl,
|
|
269
|
+
fileTargetUrl,
|
|
256
270
|
);
|
|
257
271
|
|
|
258
272
|
const [basePath, anchor = ""] = targetUrl.split("#");
|
|
@@ -278,6 +292,8 @@ function extractLinksFromFile(filePath, baseUrl, repoRoot, verbose = false) {
|
|
|
278
292
|
linkText.trim(),
|
|
279
293
|
href,
|
|
280
294
|
"html",
|
|
295
|
+
fileSourceUrl,
|
|
296
|
+
fileTargetUrl,
|
|
281
297
|
);
|
|
282
298
|
|
|
283
299
|
const [basePath, anchor = ""] = targetUrl.split("#");
|
|
@@ -303,6 +319,8 @@ function extractLinksFromFile(filePath, baseUrl, repoRoot, verbose = false) {
|
|
|
303
319
|
linkText.trim(),
|
|
304
320
|
href,
|
|
305
321
|
"jsx",
|
|
322
|
+
fileSourceUrl,
|
|
323
|
+
fileTargetUrl,
|
|
306
324
|
);
|
|
307
325
|
|
|
308
326
|
const [basePath, anchor = ""] = targetUrl.split("#");
|
|
@@ -328,6 +346,8 @@ function extractLinksFromFile(filePath, baseUrl, repoRoot, verbose = false) {
|
|
|
328
346
|
linkText.trim(),
|
|
329
347
|
href,
|
|
330
348
|
"jsx",
|
|
349
|
+
fileSourceUrl,
|
|
350
|
+
fileTargetUrl,
|
|
331
351
|
);
|
|
332
352
|
|
|
333
353
|
const [basePath, anchor = ""] = targetUrl.split("#");
|
|
@@ -377,15 +397,15 @@ function findMdxFiles(repoRoot, directory = null, file = null) {
|
|
|
377
397
|
|
|
378
398
|
// Playwright Validation Functions
|
|
379
399
|
|
|
380
|
-
async function validateAnchor(page, link, baseUrl, repoRoot, verbose = false, progress = "") {
|
|
400
|
+
async function validateAnchor(page, link, baseUrl, validationBaseUrl, repoRoot, verbose = false, progress = "") {
|
|
381
401
|
const startTime = Date.now();
|
|
382
402
|
|
|
383
403
|
try {
|
|
384
404
|
if (verbose) {
|
|
385
|
-
console.log(`${progress}
|
|
405
|
+
console.log(`${progress} -> Validating anchor: ${link.anchor}`);
|
|
386
406
|
}
|
|
387
407
|
|
|
388
|
-
// OPTIMIZATION: Check if anchor exists in local MDX file first
|
|
408
|
+
// OPTIMIZATION: Check if anchor exists in local MDX file first (local validation)
|
|
389
409
|
const mdxFilePath = urlToFilePath(link.basePath, baseUrl, repoRoot);
|
|
390
410
|
if (mdxFilePath && existsSync(mdxFilePath)) {
|
|
391
411
|
const mdxHeadings = extractMdxHeadings(mdxFilePath);
|
|
@@ -394,11 +414,14 @@ async function validateAnchor(page, link, baseUrl, repoRoot, verbose = false, pr
|
|
|
394
414
|
if (mdxHeadingsKebab.includes(link.anchor)) {
|
|
395
415
|
const heading = mdxHeadings.find((h) => toKebabCase(h) === link.anchor);
|
|
396
416
|
if (verbose) {
|
|
397
|
-
console.log(
|
|
417
|
+
console.log(`${DEFAULT_SPACE}✓ Anchor validated locally in MDX file`);
|
|
398
418
|
}
|
|
419
|
+
// Construct validation URL
|
|
420
|
+
const validationTargetUrl = link.targetUrl.replace(baseUrl, validationBaseUrl);
|
|
399
421
|
return new ValidationResult(
|
|
400
422
|
link.source,
|
|
401
|
-
link.targetUrl,
|
|
423
|
+
link.targetUrl, // sourceUrl (production)
|
|
424
|
+
validationTargetUrl, // targetUrl (validation)
|
|
402
425
|
link.basePath,
|
|
403
426
|
link.anchor,
|
|
404
427
|
link.expectedSlug,
|
|
@@ -410,24 +433,53 @@ async function validateAnchor(page, link, baseUrl, repoRoot, verbose = false, pr
|
|
|
410
433
|
Date.now() - startTime,
|
|
411
434
|
);
|
|
412
435
|
} else if (verbose) {
|
|
413
|
-
console.log(
|
|
436
|
+
console.log(`${DEFAULT_SPACE}Anchor not found in local MDX, checking online...`);
|
|
414
437
|
}
|
|
415
438
|
}
|
|
416
439
|
|
|
417
|
-
//
|
|
418
|
-
|
|
440
|
+
// ONLINE VALIDATION: Two-step process
|
|
441
|
+
// Step 1: Navigate to the base URL (production docs) to find the actual heading
|
|
442
|
+
if (verbose) {
|
|
443
|
+
console.log(`${DEFAULT_SPACE}Step 1: Navigating to base URL to find heading: ${link.targetUrl}`);
|
|
444
|
+
}
|
|
419
445
|
|
|
420
|
-
|
|
421
|
-
let heading = await page.$(`#${link.anchor}`);
|
|
446
|
+
await page.goto(link.targetUrl, { waitUntil: "networkidle", timeout: DEFAULT_TIMEOUT });
|
|
422
447
|
|
|
423
|
-
|
|
424
|
-
|
|
448
|
+
// Try to find the heading by the anchor ID
|
|
449
|
+
let targetHeading = await page.$(`#${link.anchor}`);
|
|
450
|
+
if (!targetHeading) {
|
|
451
|
+
targetHeading = await page.$(`[id="${link.anchor}"]`);
|
|
425
452
|
}
|
|
426
453
|
|
|
427
|
-
|
|
454
|
+
// If we still can't find it, try scrolling to the anchor via hash navigation
|
|
455
|
+
if (!targetHeading) {
|
|
456
|
+
if (verbose) {
|
|
457
|
+
console.log(`${DEFAULT_SPACE}Heading not found by ID, checking if page scrolled to anchor...`);
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
// Get all headings and see if any are in the viewport (likely scrolled to)
|
|
461
|
+
const headings = await page.$$("h1, h2, h3, h4, h5, h6");
|
|
462
|
+
for (const heading of headings) {
|
|
463
|
+
const isInViewport = await heading.isVisible();
|
|
464
|
+
const boundingBox = await heading.boundingBox();
|
|
465
|
+
|
|
466
|
+
// Check if heading is near the top of the viewport (likely the anchor target)
|
|
467
|
+
if (isInViewport && boundingBox && boundingBox.y < 300) {
|
|
468
|
+
const headingId = await heading.getAttribute("id");
|
|
469
|
+
if (headingId === link.anchor) {
|
|
470
|
+
targetHeading = heading;
|
|
471
|
+
break;
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
if (!targetHeading) {
|
|
478
|
+
const validationTargetUrl = link.basePath.replace(baseUrl, validationBaseUrl);
|
|
428
479
|
return new ValidationResult(
|
|
429
480
|
link.source,
|
|
430
|
-
link.targetUrl,
|
|
481
|
+
link.targetUrl, // sourceUrl (production)
|
|
482
|
+
validationTargetUrl, // targetUrl (validation)
|
|
431
483
|
link.basePath,
|
|
432
484
|
link.anchor,
|
|
433
485
|
link.expectedSlug,
|
|
@@ -435,88 +487,189 @@ async function validateAnchor(page, link, baseUrl, repoRoot, verbose = false, pr
|
|
|
435
487
|
null,
|
|
436
488
|
null,
|
|
437
489
|
null,
|
|
438
|
-
`Anchor #${link.anchor} not found on page`,
|
|
490
|
+
`Anchor #${link.anchor} not found on base URL page`,
|
|
439
491
|
Date.now() - startTime,
|
|
440
492
|
);
|
|
441
493
|
}
|
|
442
494
|
|
|
443
|
-
// Get heading text
|
|
444
|
-
const
|
|
445
|
-
const
|
|
446
|
-
const actualKebab = toKebabCase(actualTextClean);
|
|
495
|
+
// Get the actual heading text from the base URL
|
|
496
|
+
const actualHeadingText = await targetHeading.innerText();
|
|
497
|
+
const actualHeadingTextClean = cleanHeadingText(actualHeadingText);
|
|
447
498
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
const mdxHeadingsKebab = mdxHeadings.map((h) => toKebabCase(h));
|
|
499
|
+
if (verbose) {
|
|
500
|
+
console.log(`${DEFAULT_SPACE}Found heading on base URL: "${actualHeadingTextClean}"`);
|
|
501
|
+
}
|
|
452
502
|
|
|
453
|
-
|
|
503
|
+
// Get all headings on the page to determine the index of this heading (for duplicates)
|
|
504
|
+
const allHeadings = await page.$$("h1, h2, h3, h4, h5, h6");
|
|
505
|
+
let targetHeadingIndex = -1;
|
|
506
|
+
const headingsWithSameText = [];
|
|
454
507
|
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
Date.now() - startTime,
|
|
469
|
-
);
|
|
470
|
-
} else {
|
|
471
|
-
return new ValidationResult(
|
|
472
|
-
link.source,
|
|
473
|
-
link.targetUrl,
|
|
474
|
-
link.basePath,
|
|
475
|
-
link.anchor,
|
|
476
|
-
link.expectedSlug,
|
|
477
|
-
"failure",
|
|
478
|
-
null,
|
|
479
|
-
actualTextClean,
|
|
480
|
-
actualKebab,
|
|
481
|
-
`Anchor "#${link.anchor}" matches page heading "${actualTextClean}" but this heading is not found in the MDX file`,
|
|
482
|
-
Date.now() - startTime,
|
|
483
|
-
);
|
|
508
|
+
for (let i = 0; i < allHeadings.length; i++) {
|
|
509
|
+
const headingText = await allHeadings[i].innerText();
|
|
510
|
+
const headingTextClean = cleanHeadingText(headingText);
|
|
511
|
+
|
|
512
|
+
if (headingTextClean.toLowerCase() === actualHeadingTextClean.toLowerCase()) {
|
|
513
|
+
headingsWithSameText.push(i);
|
|
514
|
+
|
|
515
|
+
// Check if this is our target heading
|
|
516
|
+
const isSameElement = await page.evaluate(({ h1, h2 }) => h1 === h2, { h1: targetHeading, h2: allHeadings[i] });
|
|
517
|
+
|
|
518
|
+
if (isSameElement) {
|
|
519
|
+
targetHeadingIndex = headingsWithSameText.length - 1; // Index within headings with same text
|
|
520
|
+
}
|
|
484
521
|
}
|
|
485
|
-
}
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
Date.now() - startTime,
|
|
513
|
-
);
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
if (verbose) {
|
|
525
|
+
console.log(
|
|
526
|
+
`${DEFAULT_SPACE}Heading occurrence: ${targetHeadingIndex + 1} of ${headingsWithSameText.length} with text "${actualHeadingTextClean}"`,
|
|
527
|
+
);
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
// Step 2: Navigate to the validation URL (localhost) to get the generated anchor
|
|
531
|
+
const validationUrl = link.basePath.replace(baseUrl, validationBaseUrl);
|
|
532
|
+
|
|
533
|
+
if (verbose) {
|
|
534
|
+
console.log(`${DEFAULT_SPACE}Step 2: Navigating to validation URL: ${validationUrl}`);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
await page.goto(validationUrl, { waitUntil: "networkidle", timeout: DEFAULT_TIMEOUT });
|
|
538
|
+
|
|
539
|
+
// Find the same heading on the validation page (by text and index)
|
|
540
|
+
const validationHeadings = await page.$$("h1, h2, h3, h4, h5, h6");
|
|
541
|
+
const matchingHeadings = [];
|
|
542
|
+
|
|
543
|
+
for (const heading of validationHeadings) {
|
|
544
|
+
const headingText = await heading.innerText();
|
|
545
|
+
const headingTextClean = cleanHeadingText(headingText);
|
|
546
|
+
|
|
547
|
+
if (headingTextClean.toLowerCase() === actualHeadingTextClean.toLowerCase()) {
|
|
548
|
+
matchingHeadings.push(heading);
|
|
514
549
|
}
|
|
515
550
|
}
|
|
551
|
+
|
|
552
|
+
if (matchingHeadings.length === 0) {
|
|
553
|
+
const validationTargetUrl = validationUrl;
|
|
554
|
+
return new ValidationResult(
|
|
555
|
+
link.source,
|
|
556
|
+
link.targetUrl, // sourceUrl (production)
|
|
557
|
+
validationTargetUrl, // targetUrl (validation)
|
|
558
|
+
link.basePath,
|
|
559
|
+
link.anchor,
|
|
560
|
+
link.expectedSlug,
|
|
561
|
+
"failure",
|
|
562
|
+
null,
|
|
563
|
+
actualHeadingTextClean,
|
|
564
|
+
null,
|
|
565
|
+
`Heading "${actualHeadingTextClean}" found on base URL but not on validation URL`,
|
|
566
|
+
Date.now() - startTime,
|
|
567
|
+
);
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
// Use the same index to handle duplicate headings
|
|
571
|
+
const targetValidationHeading = matchingHeadings[Math.min(targetHeadingIndex, matchingHeadings.length - 1)];
|
|
572
|
+
|
|
573
|
+
if (verbose) {
|
|
574
|
+
console.log(
|
|
575
|
+
`${DEFAULT_SPACE}Found matching heading on validation page (${targetHeadingIndex + 1} of ${matchingHeadings.length})`,
|
|
576
|
+
);
|
|
577
|
+
console.log(`${DEFAULT_SPACE}Clicking heading to get generated anchor...`);
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
// Click the heading to get the generated anchor
|
|
581
|
+
let clickTarget = targetValidationHeading;
|
|
582
|
+
const linkInHeading = await targetValidationHeading.$("a");
|
|
583
|
+
if (linkInHeading) {
|
|
584
|
+
clickTarget = linkInHeading;
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
await clickTarget.click();
|
|
588
|
+
|
|
589
|
+
// Wait for URL update
|
|
590
|
+
await page.waitForTimeout(500);
|
|
591
|
+
|
|
592
|
+
// Extract the generated anchor
|
|
593
|
+
const currentUrl = page.url();
|
|
594
|
+
let generatedAnchor = null;
|
|
595
|
+
|
|
596
|
+
if (currentUrl.includes("#")) {
|
|
597
|
+
generatedAnchor = currentUrl.split("#")[1];
|
|
598
|
+
if (verbose) {
|
|
599
|
+
console.log(`${DEFAULT_SPACE}Generated anchor from URL: #${generatedAnchor}`);
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
// If no anchor in URL, try to get it from the href attribute
|
|
604
|
+
if (!generatedAnchor && linkInHeading) {
|
|
605
|
+
const href = await linkInHeading.getAttribute("href");
|
|
606
|
+
if (href && href.includes("#")) {
|
|
607
|
+
generatedAnchor = href.split("#")[1];
|
|
608
|
+
if (verbose) {
|
|
609
|
+
console.log(`${DEFAULT_SPACE}Generated anchor from href: #${generatedAnchor}`);
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
if (!generatedAnchor) {
|
|
615
|
+
const validationTargetUrl = validationUrl;
|
|
616
|
+
return new ValidationResult(
|
|
617
|
+
link.source,
|
|
618
|
+
link.targetUrl, // sourceUrl (production)
|
|
619
|
+
validationTargetUrl, // targetUrl (validation)
|
|
620
|
+
link.basePath,
|
|
621
|
+
link.anchor,
|
|
622
|
+
link.expectedSlug,
|
|
623
|
+
"failure",
|
|
624
|
+
null,
|
|
625
|
+
actualHeadingTextClean,
|
|
626
|
+
null,
|
|
627
|
+
`Could not extract generated anchor after clicking heading "${actualHeadingTextClean}"`,
|
|
628
|
+
Date.now() - startTime,
|
|
629
|
+
);
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
// Compare the generated anchor with the expected anchor
|
|
633
|
+
// Construct the full validation URL with the generated anchor
|
|
634
|
+
const validationTargetUrl = generatedAnchor ? `${validationUrl}#${generatedAnchor}` : validationUrl;
|
|
635
|
+
|
|
636
|
+
if (generatedAnchor === link.anchor) {
|
|
637
|
+
return new ValidationResult(
|
|
638
|
+
link.source,
|
|
639
|
+
link.targetUrl, // sourceUrl (production)
|
|
640
|
+
validationTargetUrl, // targetUrl (validation with generated anchor)
|
|
641
|
+
link.basePath,
|
|
642
|
+
link.anchor,
|
|
643
|
+
link.expectedSlug,
|
|
644
|
+
"success",
|
|
645
|
+
link.basePath,
|
|
646
|
+
actualHeadingTextClean,
|
|
647
|
+
generatedAnchor,
|
|
648
|
+
null,
|
|
649
|
+
Date.now() - startTime,
|
|
650
|
+
);
|
|
651
|
+
} else {
|
|
652
|
+
return new ValidationResult(
|
|
653
|
+
link.source,
|
|
654
|
+
link.targetUrl, // sourceUrl (production)
|
|
655
|
+
validationTargetUrl, // targetUrl (validation with generated anchor)
|
|
656
|
+
link.basePath,
|
|
657
|
+
link.anchor,
|
|
658
|
+
link.expectedSlug,
|
|
659
|
+
"failure",
|
|
660
|
+
null,
|
|
661
|
+
actualHeadingTextClean,
|
|
662
|
+
generatedAnchor,
|
|
663
|
+
`Expected anchor "#${link.anchor}" but page generates "#${generatedAnchor}" for heading "${actualHeadingTextClean}"`,
|
|
664
|
+
Date.now() - startTime,
|
|
665
|
+
);
|
|
666
|
+
}
|
|
516
667
|
} catch (error) {
|
|
668
|
+
const validationTargetUrl = link.targetUrl.replace(baseUrl, validationBaseUrl);
|
|
517
669
|
return new ValidationResult(
|
|
518
670
|
link.source,
|
|
519
|
-
link.targetUrl,
|
|
671
|
+
link.targetUrl, // sourceUrl (production)
|
|
672
|
+
validationTargetUrl, // targetUrl (validation)
|
|
520
673
|
link.basePath,
|
|
521
674
|
link.anchor,
|
|
522
675
|
link.expectedSlug,
|
|
@@ -530,23 +683,25 @@ async function validateAnchor(page, link, baseUrl, repoRoot, verbose = false, pr
|
|
|
530
683
|
}
|
|
531
684
|
}
|
|
532
685
|
|
|
533
|
-
async function validateNormalLink(page, link, baseUrl, repoRoot, verbose = false, progress = "") {
|
|
686
|
+
async function validateNormalLink(page, link, baseUrl, validationBaseUrl, repoRoot, verbose = false, progress = "") {
|
|
534
687
|
const startTime = Date.now();
|
|
535
688
|
|
|
536
689
|
try {
|
|
537
690
|
if (verbose) {
|
|
538
|
-
console.log(`${progress}
|
|
691
|
+
console.log(`${progress} -> Validating link: ${link.targetUrl}`);
|
|
539
692
|
}
|
|
540
693
|
|
|
541
694
|
// OPTIMIZATION: Check if target MDX file exists locally first
|
|
542
695
|
const mdxFilePath = urlToFilePath(link.targetUrl, baseUrl, repoRoot);
|
|
543
696
|
if (mdxFilePath && existsSync(mdxFilePath)) {
|
|
544
697
|
if (verbose) {
|
|
545
|
-
console.log(
|
|
698
|
+
console.log(`${DEFAULT_SPACE}✓ Link validated locally (file exists)`);
|
|
546
699
|
}
|
|
700
|
+
const validationTargetUrl = link.targetUrl.replace(baseUrl, validationBaseUrl);
|
|
547
701
|
return new ValidationResult(
|
|
548
702
|
link.source,
|
|
549
|
-
link.targetUrl,
|
|
703
|
+
link.targetUrl, // sourceUrl (production)
|
|
704
|
+
validationTargetUrl, // targetUrl (validation)
|
|
550
705
|
link.basePath,
|
|
551
706
|
link.anchor,
|
|
552
707
|
link.expectedSlug,
|
|
@@ -558,16 +713,24 @@ async function validateNormalLink(page, link, baseUrl, repoRoot, verbose = false
|
|
|
558
713
|
Date.now() - startTime,
|
|
559
714
|
);
|
|
560
715
|
} else if (verbose) {
|
|
561
|
-
console.log(
|
|
716
|
+
console.log(`${DEFAULT_SPACE}File not found locally, checking online...`);
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
// Convert the target URL to use the validation base URL
|
|
720
|
+
const validationUrl = link.targetUrl.replace(baseUrl, validationBaseUrl);
|
|
721
|
+
|
|
722
|
+
if (verbose) {
|
|
723
|
+
console.log(`${DEFAULT_SPACE}Navigating to: ${validationUrl}`);
|
|
562
724
|
}
|
|
563
725
|
|
|
564
|
-
// Navigate to the
|
|
565
|
-
const response = await page.goto(
|
|
726
|
+
// Navigate to the validation URL
|
|
727
|
+
const response = await page.goto(validationUrl, { waitUntil: "networkidle", timeout: DEFAULT_TIMEOUT });
|
|
566
728
|
|
|
567
729
|
if (!response) {
|
|
568
730
|
return new ValidationResult(
|
|
569
731
|
link.source,
|
|
570
|
-
link.targetUrl,
|
|
732
|
+
link.targetUrl, // sourceUrl (production)
|
|
733
|
+
validationUrl, // targetUrl (validation)
|
|
571
734
|
link.basePath,
|
|
572
735
|
link.anchor,
|
|
573
736
|
link.expectedSlug,
|
|
@@ -585,7 +748,8 @@ async function validateNormalLink(page, link, baseUrl, repoRoot, verbose = false
|
|
|
585
748
|
if (response.status() >= 400) {
|
|
586
749
|
return new ValidationResult(
|
|
587
750
|
link.source,
|
|
588
|
-
link.targetUrl,
|
|
751
|
+
link.targetUrl, // sourceUrl (production)
|
|
752
|
+
validationUrl, // targetUrl (validation)
|
|
589
753
|
link.basePath,
|
|
590
754
|
link.anchor,
|
|
591
755
|
link.expectedSlug,
|
|
@@ -600,7 +764,8 @@ async function validateNormalLink(page, link, baseUrl, repoRoot, verbose = false
|
|
|
600
764
|
|
|
601
765
|
return new ValidationResult(
|
|
602
766
|
link.source,
|
|
603
|
-
link.targetUrl,
|
|
767
|
+
link.targetUrl, // sourceUrl (production)
|
|
768
|
+
validationUrl, // targetUrl (validation)
|
|
604
769
|
link.basePath,
|
|
605
770
|
link.anchor,
|
|
606
771
|
link.expectedSlug,
|
|
@@ -612,9 +777,11 @@ async function validateNormalLink(page, link, baseUrl, repoRoot, verbose = false
|
|
|
612
777
|
Date.now() - startTime,
|
|
613
778
|
);
|
|
614
779
|
} catch (error) {
|
|
780
|
+
const validationTargetUrl = link.targetUrl.replace(baseUrl, validationBaseUrl);
|
|
615
781
|
return new ValidationResult(
|
|
616
782
|
link.source,
|
|
617
|
-
link.targetUrl,
|
|
783
|
+
link.targetUrl, // sourceUrl (production)
|
|
784
|
+
validationTargetUrl, // targetUrl (validation)
|
|
618
785
|
link.basePath,
|
|
619
786
|
link.anchor,
|
|
620
787
|
link.expectedSlug,
|
|
@@ -628,15 +795,15 @@ async function validateNormalLink(page, link, baseUrl, repoRoot, verbose = false
|
|
|
628
795
|
}
|
|
629
796
|
}
|
|
630
797
|
|
|
631
|
-
async function validateLink(page, link, baseUrl, repoRoot, verbose = false, progress = "") {
|
|
798
|
+
async function validateLink(page, link, baseUrl, validationBaseUrl, repoRoot, verbose = false, progress = "") {
|
|
632
799
|
if (link.anchor) {
|
|
633
|
-
return await validateAnchor(page, link, baseUrl, repoRoot, verbose, progress);
|
|
800
|
+
return await validateAnchor(page, link, baseUrl, validationBaseUrl, repoRoot, verbose, progress);
|
|
634
801
|
} else {
|
|
635
|
-
return await validateNormalLink(page, link, baseUrl, repoRoot, verbose, progress);
|
|
802
|
+
return await validateNormalLink(page, link, baseUrl, validationBaseUrl, repoRoot, verbose, progress);
|
|
636
803
|
}
|
|
637
804
|
}
|
|
638
805
|
|
|
639
|
-
async function validateLinksAsync(links, baseUrl, repoRoot, concurrency, headless, verbose) {
|
|
806
|
+
async function validateLinksAsync(links, baseUrl, validationBaseUrl, repoRoot, concurrency, headless, verbose) {
|
|
640
807
|
const results = [];
|
|
641
808
|
|
|
642
809
|
let browser;
|
|
@@ -670,7 +837,7 @@ async function validateLinksAsync(links, baseUrl, repoRoot, concurrency, headles
|
|
|
670
837
|
const page = await context.newPage();
|
|
671
838
|
|
|
672
839
|
try {
|
|
673
|
-
const result = await validateLink(page, link, baseUrl, repoRoot, verbose, progress);
|
|
840
|
+
const result = await validateLink(page, link, baseUrl, validationBaseUrl, repoRoot, verbose, progress);
|
|
674
841
|
return result;
|
|
675
842
|
} finally {
|
|
676
843
|
await context.close();
|
|
@@ -826,7 +993,7 @@ function fixLinks(results, repoRoot, verbose = false) {
|
|
|
826
993
|
const failuresByFile = {};
|
|
827
994
|
|
|
828
995
|
for (const result of results) {
|
|
829
|
-
if (result.status !== "failure" || !result.
|
|
996
|
+
if (result.status !== "failure" || !result.actualHeadingAnchor || !result.anchor) {
|
|
830
997
|
continue;
|
|
831
998
|
}
|
|
832
999
|
|
|
@@ -873,7 +1040,7 @@ function fixLinks(results, repoRoot, verbose = false) {
|
|
|
873
1040
|
const linkType = failure.source.linkType;
|
|
874
1041
|
|
|
875
1042
|
const pathPart = oldHref.includes("#") ? oldHref.split("#")[0] : oldHref;
|
|
876
|
-
const newHref = pathPart ? `${pathPart}#${failure.
|
|
1043
|
+
const newHref = pathPart ? `${pathPart}#${failure.actualHeadingAnchor}` : `#${failure.actualHeadingAnchor}`;
|
|
877
1044
|
|
|
878
1045
|
if (oldHref === newHref) {
|
|
879
1046
|
if (verbose) {
|
|
@@ -977,7 +1144,6 @@ function generateReport(results, config, outputPath) {
|
|
|
977
1144
|
failure,
|
|
978
1145
|
error,
|
|
979
1146
|
},
|
|
980
|
-
summary_by_file: summaryByFile,
|
|
981
1147
|
results_by_file: resultsByFile,
|
|
982
1148
|
};
|
|
983
1149
|
|
|
@@ -1042,13 +1208,27 @@ export async function validateLinks(baseUrl, options) {
|
|
|
1042
1208
|
console.log(`Found ${mdxFiles.length} MDX files\n`);
|
|
1043
1209
|
}
|
|
1044
1210
|
|
|
1211
|
+
// Normalize validation base URL
|
|
1212
|
+
let normalizedValidationBaseUrl = options.validationBaseUrl || "http://localhost:3000";
|
|
1213
|
+
if (!normalizedValidationBaseUrl.startsWith("http://") && !normalizedValidationBaseUrl.startsWith("https://")) {
|
|
1214
|
+
normalizedValidationBaseUrl = "https://" + normalizedValidationBaseUrl;
|
|
1215
|
+
}
|
|
1216
|
+
// Remove trailing slash
|
|
1217
|
+
normalizedValidationBaseUrl = normalizeUrl(normalizedValidationBaseUrl);
|
|
1218
|
+
|
|
1045
1219
|
if (options.verbose && !options.quiet) {
|
|
1046
1220
|
console.log("Extracting links...");
|
|
1047
1221
|
}
|
|
1048
1222
|
|
|
1049
1223
|
const allLinks = [];
|
|
1050
1224
|
for (const mdxFile of mdxFiles) {
|
|
1051
|
-
const links = extractLinksFromFile(
|
|
1225
|
+
const links = extractLinksFromFile(
|
|
1226
|
+
mdxFile,
|
|
1227
|
+
normalizedBaseUrl,
|
|
1228
|
+
normalizedValidationBaseUrl,
|
|
1229
|
+
repoRoot,
|
|
1230
|
+
options.verbose && !options.quiet,
|
|
1231
|
+
);
|
|
1052
1232
|
allLinks.push(...links);
|
|
1053
1233
|
}
|
|
1054
1234
|
|
|
@@ -1081,9 +1261,14 @@ export async function validateLinks(baseUrl, options) {
|
|
|
1081
1261
|
console.log("\nValidating links...");
|
|
1082
1262
|
}
|
|
1083
1263
|
|
|
1264
|
+
if (!options.quiet) {
|
|
1265
|
+
console.log(`\nUsing validation base URL: ${normalizedValidationBaseUrl}`);
|
|
1266
|
+
}
|
|
1267
|
+
|
|
1084
1268
|
const results = await validateLinksAsync(
|
|
1085
1269
|
allLinks,
|
|
1086
1270
|
normalizedBaseUrl,
|
|
1271
|
+
normalizedValidationBaseUrl,
|
|
1087
1272
|
repoRoot,
|
|
1088
1273
|
parseInt(options.concurrency) || DEFAULT_CONCURRENCY,
|
|
1089
1274
|
options.headless !== false,
|
package/src/utils/helpers.js
CHANGED
|
@@ -8,6 +8,11 @@ import { URL } from 'url';
|
|
|
8
8
|
* "Create resources\nCreate resources" -> "Create resources"
|
|
9
9
|
*/
|
|
10
10
|
export function cleanHeadingText(text) {
|
|
11
|
+
// Remove zero-width characters and other invisible Unicode characters
|
|
12
|
+
// This includes: zero-width space, zero-width non-joiner, zero-width joiner,
|
|
13
|
+
// left-to-right mark, right-to-left mark, etc.
|
|
14
|
+
text = text.replace(/[\u200B-\u200D\u200E-\u200F\uFEFF]/g, '');
|
|
15
|
+
|
|
11
16
|
// Split by newlines and get unique parts while preserving order
|
|
12
17
|
const lines = text
|
|
13
18
|
.split('\n')
|