@writechoice/mint-cli 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1183 @@
1
+ /**
2
+ * MDX Link Validation Tool
3
+ *
4
+ * Validates internal links and anchors in MDX documentation files by testing them
5
+ * against the live website. Uses Playwright for browser automation to handle
6
+ * JavaScript-rendered Mintlify pages.
7
+ */
8
+
9
+ import { readFileSync, writeFileSync, existsSync, readdirSync, statSync } from 'fs';
10
+ import { join, relative, resolve, dirname } from 'path';
11
+ import { fileURLToPath } from 'url';
12
+ import { chromium } from 'playwright';
13
+ import chalk from 'chalk';
14
+ import {
15
+ cleanHeadingText,
16
+ toKebabCase,
17
+ isExternalUrl,
18
+ isAnchorOnly,
19
+ normalizeUrl,
20
+ findLineNumber,
21
+ removeCodeBlocksAndFrontmatter,
22
+ resolvePath as resolvePathUtil,
23
+ } from '../../utils/helpers.js';
24
+
25
+ const __filename = fileURLToPath(import.meta.url);
26
+ const __dirname = dirname(__filename);
27
+
28
+ // Configuration
29
+ const DEFAULT_BASE_URL = 'https://docs.nebius.com';
30
+ const EXCLUDED_DIRS = ['snippets'];
31
+ const MDX_DIRS = ['.'];
32
+ const DEFAULT_TIMEOUT = 30000; // 30 seconds
33
+ const DEFAULT_CONCURRENCY = 25;
34
+
35
+ // Link extraction patterns
36
+ const LINK_PATTERNS = {
37
+ markdown: /\[([^\]]+?)\]\(([^)]+?)\)/g,
38
+ htmlAnchor: /<a\s+href=["'](.*?)["'][^>]*?>(.*?)<\/a>/gs,
39
+ jsxCard: /<Card[^>]+?href=["'](.*?)["'][^>]*?(?:title=["'](.*?)["'])?[^>]*?>/g,
40
+ jsxButton: /<Button[^>]+?href=["'](.*?)["'][^>]*?>(.*?)<\/Button>/gs,
41
+ };
42
+
43
+ // Data Structures
44
+ class LinkLocation {
45
+ constructor(filePath, lineNumber, linkText, rawHref, linkType) {
46
+ this.filePath = filePath;
47
+ this.lineNumber = lineNumber;
48
+ this.linkText = linkText;
49
+ this.rawHref = rawHref;
50
+ this.linkType = linkType;
51
+ }
52
+ }
53
+
54
+ class Link {
55
+ constructor(source, targetUrl, basePath, anchor, expectedSlug) {
56
+ this.source = source;
57
+ this.targetUrl = targetUrl;
58
+ this.basePath = basePath;
59
+ this.anchor = anchor;
60
+ this.expectedSlug = expectedSlug;
61
+ }
62
+ }
63
+
64
+ class ValidationResult {
65
+ constructor(
66
+ source,
67
+ targetUrl,
68
+ basePath,
69
+ anchor,
70
+ expectedSlug,
71
+ status,
72
+ actualUrl = null,
73
+ actualHeading = null,
74
+ actualHeadingKebab = null,
75
+ errorMessage = null,
76
+ validationTimeMs = 0
77
+ ) {
78
+ this.source = source;
79
+ this.targetUrl = targetUrl;
80
+ this.basePath = basePath;
81
+ this.anchor = anchor;
82
+ this.expectedSlug = expectedSlug;
83
+ this.status = status;
84
+ this.actualUrl = actualUrl;
85
+ this.actualHeading = actualHeading;
86
+ this.actualHeadingKebab = actualHeadingKebab;
87
+ this.errorMessage = errorMessage;
88
+ this.validationTimeMs = validationTimeMs;
89
+ }
90
+ }
91
+
92
+ // Utility Functions
93
+
94
+ function urlToFilePath(url, baseUrl, repoRoot) {
95
+ let path;
96
+ if (url.startsWith(baseUrl)) {
97
+ path = url.slice(baseUrl.length);
98
+ } else {
99
+ try {
100
+ const parsed = new URL(url);
101
+ path = parsed.pathname;
102
+ } catch {
103
+ return null;
104
+ }
105
+ }
106
+
107
+ path = path.replace(/^\/+/, '');
108
+
109
+ if (!path || path === '/') {
110
+ const indexPath = join(repoRoot, 'index.mdx');
111
+ return existsSync(indexPath) ? indexPath : null;
112
+ }
113
+
114
+ const mdxPath = join(repoRoot, `${path}.mdx`);
115
+ if (existsSync(mdxPath)) {
116
+ return mdxPath;
117
+ }
118
+
119
+ const indexPath = join(repoRoot, path, 'index.mdx');
120
+ if (existsSync(indexPath)) {
121
+ return indexPath;
122
+ }
123
+
124
+ return mdxPath;
125
+ }
126
+
127
+ function resolvePath(mdxFilePath, href, baseUrl, repoRoot) {
128
+ if (isExternalUrl(href)) {
129
+ return null;
130
+ }
131
+
132
+ let path, anchor;
133
+ if (href.includes('#')) {
134
+ [path, anchor] = href.split('#', 2);
135
+ } else {
136
+ path = href;
137
+ anchor = '';
138
+ }
139
+
140
+ if (!path && anchor) {
141
+ const relPath = relative(repoRoot, mdxFilePath);
142
+ const urlPath = relPath.replace(/\.mdx$/, '');
143
+ const fullUrl = normalizeUrl(`${baseUrl}/${urlPath}`);
144
+ return `${fullUrl}#${anchor}`;
145
+ }
146
+
147
+ let fullUrl;
148
+
149
+ if (path.startsWith('/')) {
150
+ fullUrl = normalizeUrl(baseUrl + path);
151
+ } else {
152
+ const mdxDir = dirname(mdxFilePath);
153
+
154
+ if (path.startsWith('./')) {
155
+ path = path.slice(2);
156
+ }
157
+
158
+ const resolved = resolve(mdxDir, path);
159
+
160
+ const relToRoot = relative(repoRoot, resolved);
161
+ if (relToRoot.startsWith('..')) {
162
+ return null;
163
+ }
164
+
165
+ const urlPath = relToRoot.replace(/\.mdx$/, '');
166
+ fullUrl = normalizeUrl(`${baseUrl}/${urlPath}`);
167
+ }
168
+
169
+ if (anchor) {
170
+ fullUrl += '#' + anchor;
171
+ }
172
+
173
+ return fullUrl;
174
+ }
175
+
176
+ // Link Extraction Functions
177
+
178
+ function extractMdxHeadings(filePath) {
179
+ try {
180
+ const content = readFileSync(filePath, 'utf-8');
181
+ const { cleanedContent } = removeCodeBlocksAndFrontmatter(content);
182
+
183
+ const headingPattern = /^#{1,6}\s+(.+)$/gm;
184
+ const headings = [];
185
+
186
+ let match;
187
+ while ((match = headingPattern.exec(cleanedContent)) !== null) {
188
+ let headingText = match[1].trim();
189
+ // Remove any trailing {#custom-id} syntax if present
190
+ headingText = headingText.replace(/\s*\{#[^}]+\}\s*$/, '');
191
+ headings.push(headingText);
192
+ }
193
+
194
+ return headings;
195
+ } catch {
196
+ return [];
197
+ }
198
+ }
199
+
200
+ function extractLinksFromFile(filePath, baseUrl, repoRoot, verbose = false) {
201
+ if (verbose) {
202
+ console.log(` Extracting links from ${relative(repoRoot, filePath)}`);
203
+ }
204
+
205
+ let content;
206
+ try {
207
+ content = readFileSync(filePath, 'utf-8');
208
+ } catch (error) {
209
+ console.error(`Error reading ${filePath}: ${error.message}`);
210
+ return [];
211
+ }
212
+
213
+ const { cleanedContent } = removeCodeBlocksAndFrontmatter(content);
214
+ const links = [];
215
+
216
+ // Extract markdown links [text](url)
217
+ const markdownMatches = [...cleanedContent.matchAll(LINK_PATTERNS.markdown)];
218
+ for (const match of markdownMatches) {
219
+ const linkText = match[1];
220
+ const href = match[2];
221
+
222
+ if (isExternalUrl(href)) continue;
223
+
224
+ const targetUrl = resolvePath(filePath, href, baseUrl, repoRoot);
225
+ if (targetUrl) {
226
+ const location = new LinkLocation(
227
+ relative(repoRoot, filePath),
228
+ findLineNumber(content, match.index),
229
+ linkText.trim(),
230
+ href,
231
+ 'markdown'
232
+ );
233
+
234
+ const [basePath, anchor = ''] = targetUrl.split('#');
235
+ const expectedSlug = new URL(targetUrl).pathname;
236
+
237
+ links.push(
238
+ new Link(
239
+ location,
240
+ targetUrl,
241
+ basePath,
242
+ anchor || null,
243
+ expectedSlug
244
+ )
245
+ );
246
+ }
247
+ }
248
+
249
+ // Extract HTML anchor links <a href="url">text</a>
250
+ const htmlMatches = [...cleanedContent.matchAll(LINK_PATTERNS.htmlAnchor)];
251
+ for (const match of htmlMatches) {
252
+ const href = match[1];
253
+ const linkText = match[2];
254
+
255
+ if (isExternalUrl(href)) continue;
256
+
257
+ const targetUrl = resolvePath(filePath, href, baseUrl, repoRoot);
258
+ if (targetUrl) {
259
+ const location = new LinkLocation(
260
+ relative(repoRoot, filePath),
261
+ findLineNumber(content, match.index),
262
+ linkText.trim(),
263
+ href,
264
+ 'html'
265
+ );
266
+
267
+ const [basePath, anchor = ''] = targetUrl.split('#');
268
+ const expectedSlug = new URL(targetUrl).pathname;
269
+
270
+ links.push(
271
+ new Link(
272
+ location,
273
+ targetUrl,
274
+ basePath,
275
+ anchor || null,
276
+ expectedSlug
277
+ )
278
+ );
279
+ }
280
+ }
281
+
282
+ // Extract JSX Card links <Card href="url" title="text">
283
+ const cardMatches = [...cleanedContent.matchAll(LINK_PATTERNS.jsxCard)];
284
+ for (const match of cardMatches) {
285
+ const href = match[1];
286
+ const linkText = match[2] || href;
287
+
288
+ if (isExternalUrl(href)) continue;
289
+
290
+ const targetUrl = resolvePath(filePath, href, baseUrl, repoRoot);
291
+ if (targetUrl) {
292
+ const location = new LinkLocation(
293
+ relative(repoRoot, filePath),
294
+ findLineNumber(content, match.index),
295
+ linkText.trim(),
296
+ href,
297
+ 'jsx'
298
+ );
299
+
300
+ const [basePath, anchor = ''] = targetUrl.split('#');
301
+ const expectedSlug = new URL(targetUrl).pathname;
302
+
303
+ links.push(
304
+ new Link(
305
+ location,
306
+ targetUrl,
307
+ basePath,
308
+ anchor || null,
309
+ expectedSlug
310
+ )
311
+ );
312
+ }
313
+ }
314
+
315
+ // Extract JSX Button links <Button href="url">text</Button>
316
+ const buttonMatches = [...cleanedContent.matchAll(LINK_PATTERNS.jsxButton)];
317
+ for (const match of buttonMatches) {
318
+ const href = match[1];
319
+ const linkText = match[2];
320
+
321
+ if (isExternalUrl(href)) continue;
322
+
323
+ const targetUrl = resolvePath(filePath, href, baseUrl, repoRoot);
324
+ if (targetUrl) {
325
+ const location = new LinkLocation(
326
+ relative(repoRoot, filePath),
327
+ findLineNumber(content, match.index),
328
+ linkText.trim(),
329
+ href,
330
+ 'jsx'
331
+ );
332
+
333
+ const [basePath, anchor = ''] = targetUrl.split('#');
334
+ const expectedSlug = new URL(targetUrl).pathname;
335
+
336
+ links.push(
337
+ new Link(
338
+ location,
339
+ targetUrl,
340
+ basePath,
341
+ anchor || null,
342
+ expectedSlug
343
+ )
344
+ );
345
+ }
346
+ }
347
+
348
+ return links;
349
+ }
350
+
351
+ function findMdxFiles(repoRoot, directory = null, file = null) {
352
+ if (file) {
353
+ const fullPath = resolve(repoRoot, file);
354
+ return existsSync(fullPath) ? [fullPath] : [];
355
+ }
356
+
357
+ const searchDirs = directory
358
+ ? [resolve(repoRoot, directory)]
359
+ : MDX_DIRS.map(d => join(repoRoot, d));
360
+
361
+ const files = [];
362
+
363
+ function walkDir(dir) {
364
+ if (!existsSync(dir)) return;
365
+
366
+ const entries = readdirSync(dir);
367
+ for (const entry of entries) {
368
+ const fullPath = join(dir, entry);
369
+ const stat = statSync(fullPath);
370
+
371
+ if (stat.isDirectory()) {
372
+ if (!EXCLUDED_DIRS.some(excluded => fullPath.includes(excluded))) {
373
+ walkDir(fullPath);
374
+ }
375
+ } else if (entry.endsWith('.mdx')) {
376
+ files.push(fullPath);
377
+ }
378
+ }
379
+ }
380
+
381
+ for (const searchDir of searchDirs) {
382
+ walkDir(searchDir);
383
+ }
384
+
385
+ return files.sort();
386
+ }
387
+
388
+ // Playwright Validation Functions
389
+
390
+ async function validateAnchor(page, link, baseUrl, repoRoot, verbose = false, progress = '') {
391
+ const startTime = Date.now();
392
+
393
+ try {
394
+ if (verbose) {
395
+ console.log(`${progress} Validating anchor: ${link.anchor}`);
396
+ }
397
+
398
+ // OPTIMIZATION: Check if anchor exists in local MDX file first
399
+ const mdxFilePath = urlToFilePath(link.basePath, baseUrl, repoRoot);
400
+ if (mdxFilePath && existsSync(mdxFilePath)) {
401
+ const mdxHeadings = extractMdxHeadings(mdxFilePath);
402
+ const mdxHeadingsKebab = mdxHeadings.map(h => toKebabCase(h));
403
+
404
+ if (mdxHeadingsKebab.includes(link.anchor)) {
405
+ const heading = mdxHeadings.find(h => toKebabCase(h) === link.anchor);
406
+ if (verbose) {
407
+ console.log(`${progress} ✓ Anchor validated locally in MDX file`);
408
+ }
409
+ return new ValidationResult(
410
+ link.source,
411
+ link.targetUrl,
412
+ link.basePath,
413
+ link.anchor,
414
+ link.expectedSlug,
415
+ 'success',
416
+ link.basePath,
417
+ heading,
418
+ link.anchor,
419
+ null,
420
+ Date.now() - startTime
421
+ );
422
+ } else if (verbose) {
423
+ console.log(`${progress} Anchor not found in local MDX, checking online...`);
424
+ }
425
+ }
426
+
427
+ // Navigate to base page
428
+ await page.goto(link.basePath, { waitUntil: 'networkidle', timeout: DEFAULT_TIMEOUT });
429
+
430
+ // Try to find heading by anchor
431
+ let heading = await page.$(`#${link.anchor}`);
432
+
433
+ if (!heading) {
434
+ heading = await page.$(`[id="${link.anchor}"]`);
435
+ }
436
+
437
+ if (!heading) {
438
+ return new ValidationResult(
439
+ link.source,
440
+ link.targetUrl,
441
+ link.basePath,
442
+ link.anchor,
443
+ link.expectedSlug,
444
+ 'failure',
445
+ null,
446
+ null,
447
+ null,
448
+ `Anchor #${link.anchor} not found on page`,
449
+ Date.now() - startTime
450
+ );
451
+ }
452
+
453
+ // Get heading text and clean it
454
+ const actualText = await heading.innerText();
455
+ const actualTextClean = cleanHeadingText(actualText);
456
+ const actualKebab = toKebabCase(actualTextClean);
457
+
458
+ // Extract headings from the TARGET MDX file to verify
459
+ const mdxFilePath2 = urlToFilePath(link.basePath, baseUrl, repoRoot);
460
+ const mdxHeadings = mdxFilePath2 ? extractMdxHeadings(mdxFilePath2) : [];
461
+ const mdxHeadingsKebab = mdxHeadings.map(h => toKebabCase(h));
462
+
463
+ const matchesMdx = mdxHeadingsKebab.includes(actualKebab);
464
+
465
+ if (actualKebab === link.anchor) {
466
+ if (matchesMdx) {
467
+ return new ValidationResult(
468
+ link.source,
469
+ link.targetUrl,
470
+ link.basePath,
471
+ link.anchor,
472
+ link.expectedSlug,
473
+ 'success',
474
+ link.basePath,
475
+ actualTextClean,
476
+ actualKebab,
477
+ null,
478
+ Date.now() - startTime
479
+ );
480
+ } else {
481
+ return new ValidationResult(
482
+ link.source,
483
+ link.targetUrl,
484
+ link.basePath,
485
+ link.anchor,
486
+ link.expectedSlug,
487
+ 'failure',
488
+ null,
489
+ actualTextClean,
490
+ actualKebab,
491
+ `Anchor "#${link.anchor}" matches page heading "${actualTextClean}" but this heading is not found in the MDX file`,
492
+ Date.now() - startTime
493
+ );
494
+ }
495
+ } else {
496
+ if (matchesMdx) {
497
+ return new ValidationResult(
498
+ link.source,
499
+ link.targetUrl,
500
+ link.basePath,
501
+ link.anchor,
502
+ link.expectedSlug,
503
+ 'failure',
504
+ null,
505
+ actualTextClean,
506
+ actualKebab,
507
+ `Expected anchor "#${link.anchor}" but page heading "${actualTextClean}" should use "#${actualKebab}"`,
508
+ Date.now() - startTime
509
+ );
510
+ } else {
511
+ return new ValidationResult(
512
+ link.source,
513
+ link.targetUrl,
514
+ link.basePath,
515
+ link.anchor,
516
+ link.expectedSlug,
517
+ 'failure',
518
+ null,
519
+ actualTextClean,
520
+ actualKebab,
521
+ `Expected anchor "#${link.anchor}" but found heading "${actualTextClean}" (#${actualKebab}) which is not in the MDX file`,
522
+ Date.now() - startTime
523
+ );
524
+ }
525
+ }
526
+ } catch (error) {
527
+ return new ValidationResult(
528
+ link.source,
529
+ link.targetUrl,
530
+ link.basePath,
531
+ link.anchor,
532
+ link.expectedSlug,
533
+ 'error',
534
+ null,
535
+ null,
536
+ null,
537
+ `Error validating anchor: ${error.message}`,
538
+ Date.now() - startTime
539
+ );
540
+ }
541
+ }
542
+
543
+ async function validateNormalLink(page, link, baseUrl, repoRoot, verbose = false, progress = '') {
544
+ const startTime = Date.now();
545
+
546
+ try {
547
+ if (verbose) {
548
+ console.log(`${progress} Validating link: ${link.targetUrl}`);
549
+ }
550
+
551
+ // OPTIMIZATION: Check if target MDX file exists locally first
552
+ const mdxFilePath = urlToFilePath(link.targetUrl, baseUrl, repoRoot);
553
+ if (mdxFilePath && existsSync(mdxFilePath)) {
554
+ if (verbose) {
555
+ console.log(`${progress} ✓ Link validated locally (file exists)`);
556
+ }
557
+ return new ValidationResult(
558
+ link.source,
559
+ link.targetUrl,
560
+ link.basePath,
561
+ link.anchor,
562
+ link.expectedSlug,
563
+ 'success',
564
+ link.targetUrl,
565
+ null,
566
+ null,
567
+ null,
568
+ Date.now() - startTime
569
+ );
570
+ } else if (verbose) {
571
+ console.log(`${progress} File not found locally, checking online...`);
572
+ }
573
+
574
+ // Navigate to the target URL
575
+ const response = await page.goto(link.targetUrl, { waitUntil: 'networkidle', timeout: DEFAULT_TIMEOUT });
576
+
577
+ if (!response) {
578
+ return new ValidationResult(
579
+ link.source,
580
+ link.targetUrl,
581
+ link.basePath,
582
+ link.anchor,
583
+ link.expectedSlug,
584
+ 'error',
585
+ null,
586
+ null,
587
+ null,
588
+ 'No response received',
589
+ Date.now() - startTime
590
+ );
591
+ }
592
+
593
+ const actualUrl = page.url();
594
+
595
+ if (response.status() >= 400) {
596
+ return new ValidationResult(
597
+ link.source,
598
+ link.targetUrl,
599
+ link.basePath,
600
+ link.anchor,
601
+ link.expectedSlug,
602
+ 'failure',
603
+ actualUrl,
604
+ null,
605
+ null,
606
+ `HTTP ${response.status()}: ${response.statusText()}`,
607
+ Date.now() - startTime
608
+ );
609
+ }
610
+
611
+ return new ValidationResult(
612
+ link.source,
613
+ link.targetUrl,
614
+ link.basePath,
615
+ link.anchor,
616
+ link.expectedSlug,
617
+ 'success',
618
+ actualUrl,
619
+ null,
620
+ null,
621
+ null,
622
+ Date.now() - startTime
623
+ );
624
+ } catch (error) {
625
+ return new ValidationResult(
626
+ link.source,
627
+ link.targetUrl,
628
+ link.basePath,
629
+ link.anchor,
630
+ link.expectedSlug,
631
+ 'error',
632
+ null,
633
+ null,
634
+ null,
635
+ `Error validating link: ${error.message}`,
636
+ Date.now() - startTime
637
+ );
638
+ }
639
+ }
640
+
641
+ async function validateLink(page, link, baseUrl, repoRoot, verbose = false, progress = '') {
642
+ if (link.anchor) {
643
+ return await validateAnchor(page, link, baseUrl, repoRoot, verbose, progress);
644
+ } else {
645
+ return await validateNormalLink(page, link, baseUrl, repoRoot, verbose, progress);
646
+ }
647
+ }
648
+
649
+ async function validateLinksAsync(links, baseUrl, repoRoot, concurrency, headless, verbose) {
650
+ const results = [];
651
+
652
+ let browser;
653
+ try {
654
+ browser = await chromium.launch({ headless });
655
+ } catch (error) {
656
+ if (error.message.includes('Executable doesn\'t exist') ||
657
+ error.message.includes('Browser was not installed') ||
658
+ error.message.includes('browserType.launch')) {
659
+ console.error(chalk.red('\n✗ Playwright browsers are not installed!'));
660
+ console.error(chalk.yellow('\nTo install Playwright browsers, run:'));
661
+ console.error(chalk.cyan(' npx playwright install chromium\n'));
662
+ console.error('Or install all browsers with:');
663
+ console.error(chalk.cyan(' npx playwright install\n'));
664
+ process.exit(1);
665
+ }
666
+ throw error;
667
+ }
668
+
669
+ const activePromises = [];
670
+ let counter = 0;
671
+
672
+ async function validateWithSemaphore(link) {
673
+ counter++;
674
+ const current = counter;
675
+ const progress = verbose ? `[${current}/${links.length}] ` : '';
676
+
677
+ const context = await browser.newContext();
678
+ const page = await context.newPage();
679
+
680
+ try {
681
+ const result = await validateLink(page, link, baseUrl, repoRoot, verbose, progress);
682
+ return result;
683
+ } finally {
684
+ await context.close();
685
+ }
686
+ }
687
+
688
+ if (verbose) {
689
+ console.log(`\nValidating ${links.length} links with concurrency=${concurrency}...\n`);
690
+ }
691
+
692
+ // Process links with concurrency control
693
+ for (let i = 0; i < links.length; i += concurrency) {
694
+ const batch = links.slice(i, i + concurrency);
695
+ const batchResults = await Promise.all(batch.map(link => validateWithSemaphore(link)));
696
+ results.push(...batchResults);
697
+ }
698
+
699
+ await browser.close();
700
+
701
+ return results;
702
+ }
703
+
704
+ // Fix Links in MDX Files
705
+
706
+ function fixLinksFromReport(reportPath, repoRoot, verbose = false) {
707
+ if (!existsSync(reportPath)) {
708
+ console.error(`Error: Report file not found: ${reportPath}`);
709
+ return {};
710
+ }
711
+
712
+ let reportData;
713
+ try {
714
+ reportData = JSON.parse(readFileSync(reportPath, 'utf-8'));
715
+ } catch (error) {
716
+ console.error(`Error reading report file: ${error.message}`);
717
+ return {};
718
+ }
719
+
720
+ const resultsByFile = reportData.results_by_file || {};
721
+
722
+ if (Object.keys(resultsByFile).length === 0) {
723
+ if (verbose) {
724
+ console.log('No failures found in report.');
725
+ }
726
+ return {};
727
+ }
728
+
729
+ const fixesApplied = {};
730
+
731
+ for (const [filePath, failures] of Object.entries(resultsByFile)) {
732
+ const fullPath = join(repoRoot, filePath);
733
+
734
+ if (!existsSync(fullPath)) {
735
+ if (verbose) {
736
+ console.log(`Warning: File not found: ${filePath}`);
737
+ }
738
+ continue;
739
+ }
740
+
741
+ const fixableFailures = failures.filter(
742
+ f => f.status === 'failure' && f.actual_heading_kebab && f.anchor
743
+ );
744
+
745
+ if (fixableFailures.length === 0) continue;
746
+
747
+ try {
748
+ const content = readFileSync(fullPath, 'utf-8');
749
+ let lines = content.split('\n');
750
+ let modified = false;
751
+ let fixesCount = 0;
752
+
753
+ fixableFailures.sort((a, b) => b.source.line_number - a.source.line_number);
754
+
755
+ for (const failure of fixableFailures) {
756
+ const lineNum = failure.source.line_number - 1;
757
+
758
+ if (lineNum >= lines.length) {
759
+ if (verbose) {
760
+ console.log(`Warning: Line ${failure.source.line_number} not found in ${filePath}`);
761
+ }
762
+ continue;
763
+ }
764
+
765
+ let line = lines[lineNum];
766
+ const oldHref = failure.source.raw_href;
767
+ const newAnchor = failure.actual_heading_kebab;
768
+ const linkType = failure.source.link_type;
769
+
770
+ const pathPart = oldHref.includes('#') ? oldHref.split('#')[0] : oldHref;
771
+ const newHref = pathPart ? `${pathPart}#${newAnchor}` : `#${newAnchor}`;
772
+
773
+ if (oldHref === newHref) {
774
+ if (verbose) {
775
+ console.log(`Skipping ${filePath}:${failure.source.line_number} (no change needed)`);
776
+ }
777
+ continue;
778
+ }
779
+
780
+ let replaced = false;
781
+
782
+ if (linkType === 'markdown') {
783
+ const oldPattern = `(${oldHref})`;
784
+ const newPattern = `(${newHref})`;
785
+ if (line.includes(oldPattern)) {
786
+ line = line.replace(oldPattern, newPattern);
787
+ replaced = true;
788
+ }
789
+ } else if (linkType === 'html' || linkType === 'jsx') {
790
+ for (const quote of ['"', "'"]) {
791
+ const oldPattern = `href=${quote}${oldHref}${quote}`;
792
+ const newPattern = `href=${quote}${newHref}${quote}`;
793
+ if (line.includes(oldPattern)) {
794
+ line = line.replace(oldPattern, newPattern);
795
+ replaced = true;
796
+ break;
797
+ }
798
+ }
799
+ }
800
+
801
+ if (replaced) {
802
+ lines[lineNum] = line;
803
+ modified = true;
804
+ fixesCount++;
805
+
806
+ if (verbose) {
807
+ console.log(`Fixed ${filePath}:${failure.source.line_number}`);
808
+ console.log(` Old: ${oldHref}`);
809
+ console.log(` New: ${newHref}`);
810
+ }
811
+ } else if (verbose) {
812
+ console.log(`Warning: Could not find href '${oldHref}' on line ${failure.source.line_number} in ${filePath}`);
813
+ }
814
+ }
815
+
816
+ if (modified) {
817
+ const newContent = lines.join('\n');
818
+ writeFileSync(fullPath, newContent, 'utf-8');
819
+ fixesApplied[filePath] = fixesCount;
820
+
821
+ if (verbose) {
822
+ console.log(`Saved ${fixesCount} fix(es) to ${filePath}`);
823
+ }
824
+ }
825
+ } catch (error) {
826
+ if (verbose) {
827
+ console.log(`Error fixing ${filePath}: ${error.message}`);
828
+ }
829
+ }
830
+ }
831
+
832
+ return fixesApplied;
833
+ }
834
+
835
+ function fixLinks(results, repoRoot, verbose = false) {
836
+ const failuresByFile = {};
837
+
838
+ for (const result of results) {
839
+ if (result.status !== 'failure' || !result.actualHeadingKebab || !result.anchor) {
840
+ continue;
841
+ }
842
+
843
+ const filePath = result.source.filePath;
844
+ if (!failuresByFile[filePath]) {
845
+ failuresByFile[filePath] = [];
846
+ }
847
+
848
+ failuresByFile[filePath].push(result);
849
+ }
850
+
851
+ const fixesApplied = {};
852
+
853
+ for (const [filePath, failures] of Object.entries(failuresByFile)) {
854
+ const fullPath = join(repoRoot, filePath);
855
+
856
+ if (!existsSync(fullPath)) {
857
+ if (verbose) {
858
+ console.log(`Warning: File not found: ${filePath}`);
859
+ }
860
+ continue;
861
+ }
862
+
863
+ try {
864
+ const content = readFileSync(fullPath, 'utf-8');
865
+ let lines = content.split('\n');
866
+ let modified = false;
867
+ let fixesCount = 0;
868
+
869
+ failures.sort((a, b) => b.source.lineNumber - a.source.lineNumber);
870
+
871
+ for (const failure of failures) {
872
+ const lineNum = failure.source.lineNumber - 1;
873
+
874
+ if (lineNum >= lines.length) {
875
+ if (verbose) {
876
+ console.log(`Warning: Line ${failure.source.lineNumber} not found in ${filePath}`);
877
+ }
878
+ continue;
879
+ }
880
+
881
+ let line = lines[lineNum];
882
+ const oldHref = failure.source.rawHref;
883
+ const linkType = failure.source.linkType;
884
+
885
+ const pathPart = oldHref.includes('#') ? oldHref.split('#')[0] : oldHref;
886
+ const newHref = pathPart ? `${pathPart}#${failure.actualHeadingKebab}` : `#${failure.actualHeadingKebab}`;
887
+
888
+ if (oldHref === newHref) {
889
+ if (verbose) {
890
+ console.log(`Skipping ${filePath}:${failure.source.lineNumber} (no change needed)`);
891
+ }
892
+ continue;
893
+ }
894
+
895
+ let replaced = false;
896
+
897
+ if (linkType === 'markdown') {
898
+ const oldPattern = `(${oldHref})`;
899
+ const newPattern = `(${newHref})`;
900
+ if (line.includes(oldPattern)) {
901
+ line = line.replace(oldPattern, newPattern);
902
+ replaced = true;
903
+ }
904
+ } else if (linkType === 'html' || linkType === 'jsx') {
905
+ for (const quote of ['"', "'"]) {
906
+ const oldPattern = `href=${quote}${oldHref}${quote}`;
907
+ const newPattern = `href=${quote}${newHref}${quote}`;
908
+ if (line.includes(oldPattern)) {
909
+ line = line.replace(oldPattern, newPattern);
910
+ replaced = true;
911
+ break;
912
+ }
913
+ }
914
+ }
915
+
916
+ if (replaced) {
917
+ lines[lineNum] = line;
918
+ modified = true;
919
+ fixesCount++;
920
+
921
+ if (verbose) {
922
+ console.log(`Fixed ${filePath}:${failure.source.lineNumber}`);
923
+ console.log(` Old: ${oldHref}`);
924
+ console.log(` New: ${newHref}`);
925
+ }
926
+ } else if (verbose) {
927
+ console.log(`Warning: Could not find href '${oldHref}' on line ${failure.source.lineNumber} in ${filePath}`);
928
+ }
929
+ }
930
+
931
+ if (modified) {
932
+ const newContent = lines.join('\n');
933
+ writeFileSync(fullPath, newContent, 'utf-8');
934
+ fixesApplied[filePath] = fixesCount;
935
+
936
+ if (verbose) {
937
+ console.log(`Saved ${fixesCount} fix(es) to ${filePath}`);
938
+ }
939
+ }
940
+ } catch (error) {
941
+ if (verbose) {
942
+ console.log(`Error fixing ${filePath}: ${error.message}`);
943
+ }
944
+ }
945
+ }
946
+
947
+ return fixesApplied;
948
+ }
949
+
950
+ // Report Generation
951
+
952
+ function generateReport(results, config, outputPath) {
953
+ const total = results.length;
954
+ const success = results.filter(r => r.status === 'success').length;
955
+ const failure = results.filter(r => r.status === 'failure').length;
956
+ const error = results.filter(r => r.status === 'error').length;
957
+
958
+ const summaryByFile = {};
959
+ for (const result of results) {
960
+ const filePath = result.source.filePath;
961
+ if (!summaryByFile[filePath]) {
962
+ summaryByFile[filePath] = { total: 0, success: 0, failure: 0, error: 0 };
963
+ }
964
+
965
+ summaryByFile[filePath].total++;
966
+ summaryByFile[filePath][result.status]++;
967
+ }
968
+
969
+ const resultsByFile = {};
970
+ for (const result of results) {
971
+ if (result.status === 'success') continue;
972
+
973
+ const filePath = result.source.filePath;
974
+ if (!resultsByFile[filePath]) {
975
+ resultsByFile[filePath] = [];
976
+ }
977
+
978
+ resultsByFile[filePath].push(result);
979
+ }
980
+
981
+ const report = {
982
+ timestamp: new Date().toISOString(),
983
+ configuration: config,
984
+ summary: {
985
+ total_links: total,
986
+ success,
987
+ failure,
988
+ error,
989
+ },
990
+ summary_by_file: summaryByFile,
991
+ results_by_file: resultsByFile,
992
+ };
993
+
994
+ writeFileSync(outputPath, JSON.stringify(report, null, 2), 'utf-8');
995
+
996
+ return report;
997
+ }
998
+
999
+ // Main CLI Function
1000
+
1001
+ export async function validateLinks(baseUrl, options) {
1002
+ const repoRoot = process.cwd();
1003
+
1004
+ // Handle --fix-from-report mode
1005
+ if (options.fixFromReport !== undefined) {
1006
+ // If flag is passed with a path, use that path; otherwise use default
1007
+ const reportPath = typeof options.fixFromReport === 'string' && options.fixFromReport
1008
+ ? options.fixFromReport
1009
+ : 'links_report.json';
1010
+
1011
+ if (!options.quiet) {
1012
+ console.log(`Applying fixes from report: ${reportPath}`);
1013
+ }
1014
+
1015
+ const fixesApplied = fixLinksFromReport(
1016
+ reportPath,
1017
+ repoRoot,
1018
+ options.verbose && !options.quiet
1019
+ );
1020
+
1021
+ if (!options.quiet) {
1022
+ if (Object.keys(fixesApplied).length > 0) {
1023
+ const totalFixes = Object.values(fixesApplied).reduce((a, b) => a + b, 0);
1024
+ console.log(`\nFixed ${totalFixes} link(s) in ${Object.keys(fixesApplied).length} file(s):`);
1025
+ for (const [filePath, count] of Object.entries(fixesApplied)) {
1026
+ console.log(` ${filePath}: ${count} fix(es)`);
1027
+ }
1028
+ console.log('\nRun validation again to verify the fixes.');
1029
+ } else {
1030
+ console.log('\nNo fixable issues found in report.');
1031
+ }
1032
+ }
1033
+
1034
+ return;
1035
+ }
1036
+
1037
+ // Normalize base URL - add https:// if not present
1038
+ let normalizedBaseUrl = baseUrl;
1039
+ if (!normalizedBaseUrl.startsWith('http://') && !normalizedBaseUrl.startsWith('https://')) {
1040
+ normalizedBaseUrl = 'https://' + normalizedBaseUrl;
1041
+ }
1042
+ // Remove trailing slash
1043
+ normalizedBaseUrl = normalizedBaseUrl.replace(/\/+$/, '');
1044
+
1045
+ if (options.verbose && !options.quiet) {
1046
+ console.log('Finding MDX files...');
1047
+ }
1048
+
1049
+ const mdxFiles = findMdxFiles(repoRoot, options.dir, options.file);
1050
+
1051
+ if (mdxFiles.length === 0) {
1052
+ console.error('No MDX files found.');
1053
+ process.exit(1);
1054
+ }
1055
+
1056
+ if (options.verbose && !options.quiet) {
1057
+ console.log(`Found ${mdxFiles.length} MDX files\n`);
1058
+ }
1059
+
1060
+ if (options.verbose && !options.quiet) {
1061
+ console.log('Extracting links...');
1062
+ }
1063
+
1064
+ const allLinks = [];
1065
+ for (const mdxFile of mdxFiles) {
1066
+ const links = extractLinksFromFile(mdxFile, normalizedBaseUrl, repoRoot, options.verbose && !options.quiet);
1067
+ allLinks.push(...links);
1068
+ }
1069
+
1070
+ if (allLinks.length === 0) {
1071
+ console.log('No internal links found.');
1072
+ return;
1073
+ }
1074
+
1075
+ if (!options.quiet) {
1076
+ console.log(`\nFound ${allLinks.length} internal links`);
1077
+ }
1078
+
1079
+ if (options.dryRun) {
1080
+ console.log('\nExtracted links:');
1081
+ allLinks.forEach((link, i) => {
1082
+ console.log(`\n${i + 1}. ${link.source.filePath}:${link.source.lineNumber}`);
1083
+ console.log(` Text: ${link.source.linkText}`);
1084
+ console.log(` Raw: ${link.source.rawHref}`);
1085
+ console.log(` URL: ${link.targetUrl}`);
1086
+ if (link.anchor) {
1087
+ console.log(` Anchor: #${link.anchor}`);
1088
+ }
1089
+ });
1090
+ return;
1091
+ }
1092
+
1093
+ const startTime = Date.now();
1094
+
1095
+ if (!options.quiet) {
1096
+ console.log('\nValidating links...');
1097
+ }
1098
+
1099
+ const results = await validateLinksAsync(
1100
+ allLinks,
1101
+ normalizedBaseUrl,
1102
+ repoRoot,
1103
+ parseInt(options.concurrency) || DEFAULT_CONCURRENCY,
1104
+ options.headless !== false,
1105
+ options.verbose && !options.quiet
1106
+ );
1107
+
1108
+ const executionTime = (Date.now() - startTime) / 1000;
1109
+
1110
+ if (options.fix) {
1111
+ if (!options.quiet) {
1112
+ console.log('\nApplying fixes...');
1113
+ }
1114
+
1115
+ const fixesApplied = fixLinks(results, repoRoot, options.verbose && !options.quiet);
1116
+
1117
+ if (!options.quiet) {
1118
+ if (Object.keys(fixesApplied).length > 0) {
1119
+ const totalFixes = Object.values(fixesApplied).reduce((a, b) => a + b, 0);
1120
+ console.log(`\nFixed ${totalFixes} link(s) in ${Object.keys(fixesApplied).length} file(s):`);
1121
+ for (const [filePath, count] of Object.entries(fixesApplied)) {
1122
+ console.log(` ${filePath}: ${count} fix(es)`);
1123
+ }
1124
+ console.log('\nRun validation again to verify the fixes.');
1125
+ } else {
1126
+ console.log('\nNo fixable issues found.');
1127
+ }
1128
+ }
1129
+ }
1130
+
1131
+ const config = {
1132
+ base_url: normalizedBaseUrl,
1133
+ scanned_directories: options.dir || options.file ? [options.dir || options.file] : MDX_DIRS,
1134
+ excluded_directories: EXCLUDED_DIRS,
1135
+ concurrency: parseInt(options.concurrency) || DEFAULT_CONCURRENCY,
1136
+ execution_time_seconds: Math.round(executionTime * 100) / 100,
1137
+ };
1138
+
1139
+ const report = generateReport(results, config, options.output || 'links_report.json');
1140
+
1141
+ if (!options.quiet) {
1142
+ console.log(`\n${'='.repeat(60)}`);
1143
+ console.log('VALIDATION SUMMARY');
1144
+ console.log('='.repeat(60));
1145
+ console.log(`Total links: ${report.summary.total_links}`);
1146
+ console.log(`Success: ${chalk.green(report.summary.success + ' ✓')}`);
1147
+ console.log(`Failure: ${chalk.red(report.summary.failure + ' ✗')}`);
1148
+ console.log(`Error: ${chalk.yellow(report.summary.error + ' ⚠')}`);
1149
+ console.log(`Execution time: ${executionTime.toFixed(2)}s`);
1150
+ console.log(`\nReport saved to: ${options.output || 'links_report.json'}`);
1151
+
1152
+ if (report.summary.failure > 0 || report.summary.error > 0) {
1153
+ console.log(`\n${'='.repeat(60)}`);
1154
+ console.log('ISSUES FOUND');
1155
+ console.log('='.repeat(60));
1156
+ let shown = 0;
1157
+
1158
+ for (const [filePath, fileResults] of Object.entries(report.results_by_file)) {
1159
+ for (const result of fileResults) {
1160
+ if (shown < 10) {
1161
+ console.log(`\n${result.source.filePath}:${result.source.lineNumber}`);
1162
+ console.log(` Link: ${result.source.linkText}`);
1163
+ console.log(` URL: ${result.targetUrl}`);
1164
+ console.log(` Error: ${result.errorMessage}`);
1165
+ shown++;
1166
+ } else {
1167
+ break;
1168
+ }
1169
+ }
1170
+ if (shown >= 10) break;
1171
+ }
1172
+
1173
+ if (shown < report.summary.failure + report.summary.error) {
1174
+ const remaining = report.summary.failure + report.summary.error - shown;
1175
+ console.log(`\n... and ${remaining} more issues. See ${options.output || 'links_report.json'} for full details.`);
1176
+ }
1177
+ }
1178
+ }
1179
+
1180
+ if (report.summary.failure > 0 || report.summary.error > 0) {
1181
+ process.exit(1);
1182
+ }
1183
+ }