recker 1.0.30 → 1.0.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +246 -105
- package/package.json +1 -1
package/dist/cli/index.js
CHANGED
|
@@ -534,130 +534,271 @@ ${colors.gray('Grade:')} ${gradeColor(colors.bold(report.grade))} ${colors.gray
|
|
|
534
534
|
.command('spider')
|
|
535
535
|
.description('Crawl a website following internal links')
|
|
536
536
|
.argument('<url>', 'Starting URL to crawl')
|
|
537
|
-
.
|
|
538
|
-
.option('-l, --limit <n>', 'Maximum pages to crawl', '100')
|
|
539
|
-
.option('-c, --concurrency <n>', 'Concurrent requests', '5')
|
|
540
|
-
.option('--delay <ms>', 'Delay between requests in ms', '100')
|
|
541
|
-
.option('--format <format>', 'Output format: text (default) or json', 'text')
|
|
542
|
-
.option('-o, --output <file>', 'Write JSON results to file')
|
|
537
|
+
.argument('[args...]', 'Options: depth=N limit=N concurrency=N seo output=file.json')
|
|
543
538
|
.addHelpText('after', `
|
|
544
539
|
${colors.bold(colors.yellow('Examples:'))}
|
|
545
|
-
${colors.green('$ rek spider example.com')}
|
|
546
|
-
${colors.green('$ rek spider example.com
|
|
547
|
-
${colors.green('$ rek spider example.com
|
|
548
|
-
${colors.green('$ rek spider example.com
|
|
540
|
+
${colors.green('$ rek spider example.com')} ${colors.gray('Crawl with defaults')}
|
|
541
|
+
${colors.green('$ rek spider example.com depth=3 limit=50')} ${colors.gray('Depth 3, max 50 pages')}
|
|
542
|
+
${colors.green('$ rek spider example.com seo')} ${colors.gray('Crawl + SEO analysis')}
|
|
543
|
+
${colors.green('$ rek spider example.com seo output=report.json')} ${colors.gray('SEO with JSON export')}
|
|
549
544
|
|
|
550
545
|
${colors.bold(colors.yellow('Options:'))}
|
|
551
|
-
${colors.cyan('
|
|
552
|
-
${colors.cyan('
|
|
553
|
-
${colors.cyan('
|
|
554
|
-
${colors.cyan('
|
|
546
|
+
${colors.cyan('depth=N')} Max link depth to follow (default: 5)
|
|
547
|
+
${colors.cyan('limit=N')} Max pages to crawl (default: 100)
|
|
548
|
+
${colors.cyan('concurrency=N')} Parallel requests (default: 5)
|
|
549
|
+
${colors.cyan('seo')} Enable SEO analysis mode
|
|
550
|
+
${colors.cyan('output=file.json')} Save JSON report to file
|
|
555
551
|
`)
|
|
556
|
-
.action(async (url,
|
|
552
|
+
.action(async (url, args) => {
|
|
553
|
+
let maxDepth = 5;
|
|
554
|
+
let maxPages = 100;
|
|
555
|
+
let concurrency = 5;
|
|
556
|
+
let seoEnabled = false;
|
|
557
|
+
let outputFile = '';
|
|
558
|
+
for (const arg of args) {
|
|
559
|
+
if (arg.startsWith('depth=')) {
|
|
560
|
+
maxDepth = parseInt(arg.split('=')[1]) || 5;
|
|
561
|
+
}
|
|
562
|
+
else if (arg.startsWith('limit=')) {
|
|
563
|
+
maxPages = parseInt(arg.split('=')[1]) || 100;
|
|
564
|
+
}
|
|
565
|
+
else if (arg.startsWith('concurrency=')) {
|
|
566
|
+
concurrency = parseInt(arg.split('=')[1]) || 5;
|
|
567
|
+
}
|
|
568
|
+
else if (arg === 'seo') {
|
|
569
|
+
seoEnabled = true;
|
|
570
|
+
}
|
|
571
|
+
else if (arg.startsWith('output=')) {
|
|
572
|
+
outputFile = arg.split('=')[1] || '';
|
|
573
|
+
}
|
|
574
|
+
}
|
|
557
575
|
if (!url.startsWith('http'))
|
|
558
576
|
url = `https://${url}`;
|
|
559
|
-
const
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
const { Spider } = await import('../scrape/spider.js');
|
|
565
|
-
if (!isJson) {
|
|
566
|
-
console.log(colors.gray(`\nCrawling ${url}...`));
|
|
567
|
-
console.log(colors.gray(` Depth: ${maxDepth}, Limit: ${maxPages}, Concurrency: ${concurrency}\n`));
|
|
577
|
+
const modeLabel = seoEnabled ? colors.magenta(' + SEO') : '';
|
|
578
|
+
console.log(colors.cyan(`\nSpider starting: ${url}`));
|
|
579
|
+
console.log(colors.gray(` Depth: ${maxDepth} | Limit: ${maxPages} | Concurrency: ${concurrency}${modeLabel}`));
|
|
580
|
+
if (outputFile) {
|
|
581
|
+
console.log(colors.gray(` Output: ${outputFile}`));
|
|
568
582
|
}
|
|
583
|
+
console.log('');
|
|
569
584
|
try {
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
585
|
+
if (seoEnabled) {
|
|
586
|
+
const { SeoSpider } = await import('../seo/index.js');
|
|
587
|
+
const seoSpider = new SeoSpider({
|
|
588
|
+
maxDepth,
|
|
589
|
+
maxPages,
|
|
590
|
+
concurrency,
|
|
591
|
+
sameDomain: true,
|
|
592
|
+
delay: 100,
|
|
593
|
+
seo: true,
|
|
594
|
+
output: outputFile || undefined,
|
|
595
|
+
onProgress: (progress) => {
|
|
596
|
+
process.stdout.write(`\r${colors.gray(' Crawling:')} ${colors.cyan(progress.crawled.toString())} pages | ${colors.gray('Queue:')} ${progress.queued} | ${colors.gray('Depth:')} ${progress.depth} `);
|
|
597
|
+
},
|
|
598
|
+
});
|
|
599
|
+
const result = await seoSpider.crawl(url);
|
|
600
|
+
process.stdout.write('\r' + ' '.repeat(80) + '\r');
|
|
601
|
+
console.log(colors.green(`\n✔ SEO Spider complete`) + colors.gray(` (${(result.duration / 1000).toFixed(1)}s)`));
|
|
602
|
+
console.log(` ${colors.cyan('Pages crawled')}: ${result.pages.length}`);
|
|
603
|
+
console.log(` ${colors.cyan('Unique URLs')}: ${result.visited.size}`);
|
|
604
|
+
console.log(` ${colors.cyan('Avg SEO Score')}: ${result.summary.avgScore}/100`);
|
|
605
|
+
const responseTimes = result.pages.filter(p => p.duration > 0).map(p => p.duration);
|
|
606
|
+
const avgResponseTime = responseTimes.length > 0
|
|
607
|
+
? Math.round(responseTimes.reduce((a, b) => a + b, 0) / responseTimes.length)
|
|
608
|
+
: 0;
|
|
609
|
+
const minResponseTime = responseTimes.length > 0 ? Math.min(...responseTimes) : 0;
|
|
610
|
+
const maxResponseTime = responseTimes.length > 0 ? Math.max(...responseTimes) : 0;
|
|
611
|
+
const reqPerSec = result.duration > 0 ? (result.pages.length / (result.duration / 1000)).toFixed(1) : '0';
|
|
612
|
+
const statusCounts = new Map();
|
|
613
|
+
for (const page of result.pages) {
|
|
614
|
+
const status = page.status || 0;
|
|
615
|
+
statusCounts.set(status, (statusCounts.get(status) || 0) + 1);
|
|
616
|
+
}
|
|
617
|
+
let totalInternalLinks = 0;
|
|
618
|
+
let totalExternalLinks = 0;
|
|
619
|
+
let totalImages = 0;
|
|
620
|
+
let imagesWithoutAlt = 0;
|
|
621
|
+
let pagesWithoutTitle = 0;
|
|
622
|
+
let pagesWithoutDescription = 0;
|
|
623
|
+
for (const page of result.pages) {
|
|
624
|
+
if (page.seoReport) {
|
|
625
|
+
totalInternalLinks += page.seoReport.links?.internal || 0;
|
|
626
|
+
totalExternalLinks += page.seoReport.links?.external || 0;
|
|
627
|
+
totalImages += page.seoReport.images?.total || 0;
|
|
628
|
+
imagesWithoutAlt += page.seoReport.images?.withoutAlt || 0;
|
|
629
|
+
if (!page.seoReport.title?.text)
|
|
630
|
+
pagesWithoutTitle++;
|
|
631
|
+
if (!page.seoReport.metaDescription?.text)
|
|
632
|
+
pagesWithoutDescription++;
|
|
579
633
|
}
|
|
580
|
-
}
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
634
|
+
}
|
|
635
|
+
console.log(colors.bold('\n Performance:'));
|
|
636
|
+
console.log(` ${colors.gray('Avg Response:')} ${avgResponseTime}ms`);
|
|
637
|
+
console.log(` ${colors.gray('Min/Max:')} ${minResponseTime}ms / ${maxResponseTime}ms`);
|
|
638
|
+
console.log(` ${colors.gray('Throughput:')} ${reqPerSec} req/s`);
|
|
639
|
+
console.log(colors.bold('\n HTTP Status:'));
|
|
640
|
+
const sortedStatuses = Array.from(statusCounts.entries()).sort((a, b) => b[1] - a[1]);
|
|
641
|
+
for (const [status, count] of sortedStatuses.slice(0, 5)) {
|
|
642
|
+
const statusLabel = status === 0 ? 'Error' : status.toString();
|
|
643
|
+
const statusColor = status >= 400 || status === 0 ? colors.red :
|
|
644
|
+
status >= 300 ? colors.yellow : colors.green;
|
|
645
|
+
const pct = ((count / result.pages.length) * 100).toFixed(0);
|
|
646
|
+
console.log(` ${statusColor(statusLabel.padEnd(5))} ${count.toString().padStart(3)} (${pct}%)`);
|
|
647
|
+
}
|
|
648
|
+
console.log(colors.bold('\n Content:'));
|
|
649
|
+
console.log(` ${colors.gray('Internal links:')} ${totalInternalLinks.toLocaleString()}`);
|
|
650
|
+
console.log(` ${colors.gray('External links:')} ${totalExternalLinks.toLocaleString()}`);
|
|
651
|
+
console.log(` ${colors.gray('Images:')} ${totalImages.toLocaleString()} (${imagesWithoutAlt} missing alt)`);
|
|
652
|
+
console.log(` ${colors.gray('Missing title:')} ${pagesWithoutTitle}`);
|
|
653
|
+
console.log(` ${colors.gray('Missing desc:')} ${pagesWithoutDescription}`);
|
|
654
|
+
console.log(colors.bold('\n SEO Summary:'));
|
|
655
|
+
const { summary } = result;
|
|
656
|
+
console.log(` ${colors.red('✗')} Pages with errors: ${summary.pagesWithErrors}`);
|
|
657
|
+
console.log(` ${colors.yellow('⚠')} Pages with warnings: ${summary.pagesWithWarnings}`);
|
|
658
|
+
console.log(` ${colors.magenta('⚐')} Duplicate titles: ${summary.duplicateTitles}`);
|
|
659
|
+
console.log(` ${colors.magenta('⚐')} Duplicate descriptions:${summary.duplicateDescriptions}`);
|
|
660
|
+
console.log(` ${colors.magenta('⚐')} Duplicate H1s: ${summary.duplicateH1s}`);
|
|
661
|
+
console.log(` ${colors.gray('○')} Orphan pages: ${summary.orphanPages}`);
|
|
662
|
+
if (result.siteWideIssues.length > 0) {
|
|
663
|
+
console.log(colors.bold('\n Site-Wide Issues:'));
|
|
664
|
+
for (const issue of result.siteWideIssues.slice(0, 10)) {
|
|
665
|
+
const icon = issue.severity === 'error' ? colors.red('✗') :
|
|
666
|
+
issue.severity === 'warning' ? colors.yellow('⚠') : colors.gray('○');
|
|
667
|
+
console.log(` ${icon} ${issue.message}`);
|
|
668
|
+
if (issue.value) {
|
|
669
|
+
const truncatedValue = issue.value.length > 50 ? issue.value.slice(0, 47) + '...' : issue.value;
|
|
670
|
+
console.log(` ${colors.gray(`"${truncatedValue}"`)}`);
|
|
671
|
+
}
|
|
672
|
+
const uniquePaths = [...new Set(issue.affectedUrls.map(u => new URL(u).pathname))];
|
|
673
|
+
if (uniquePaths.length <= 3) {
|
|
674
|
+
for (const path of uniquePaths) {
|
|
675
|
+
console.log(` ${colors.gray('→')} ${path}`);
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
else {
|
|
679
|
+
console.log(` ${colors.gray(`→ ${uniquePaths.length} pages affected`)}`);
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
if (result.siteWideIssues.length > 10) {
|
|
683
|
+
console.log(colors.gray(` ... and ${result.siteWideIssues.length - 10} more issues`));
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
const pagesWithScores = result.pages
|
|
687
|
+
.filter(p => p.seoReport)
|
|
688
|
+
.sort((a, b) => (a.seoReport?.score || 0) - (b.seoReport?.score || 0));
|
|
689
|
+
const seenPaths = new Set();
|
|
690
|
+
const uniquePages = pagesWithScores.filter(page => {
|
|
691
|
+
const path = new URL(page.url).pathname;
|
|
692
|
+
if (seenPaths.has(path))
|
|
693
|
+
return false;
|
|
694
|
+
seenPaths.add(path);
|
|
695
|
+
return true;
|
|
696
|
+
});
|
|
697
|
+
if (uniquePages.length > 0) {
|
|
698
|
+
console.log(colors.bold('\n Pages by SEO Score:'));
|
|
699
|
+
const worstPages = uniquePages.slice(0, 5);
|
|
700
|
+
for (const page of worstPages) {
|
|
701
|
+
const score = page.seoReport?.score || 0;
|
|
702
|
+
const grade = page.seoReport?.grade || '?';
|
|
703
|
+
const path = new URL(page.url).pathname;
|
|
704
|
+
const scoreColor = score >= 80 ? colors.green : score >= 60 ? colors.yellow : colors.red;
|
|
705
|
+
console.log(` ${scoreColor(`${score.toString().padStart(3)}`)} ${colors.gray(`[${grade}]`)} ${path.slice(0, 50)}`);
|
|
706
|
+
}
|
|
707
|
+
if (uniquePages.length > 5) {
|
|
708
|
+
console.log(colors.gray(` ... and ${uniquePages.length - 5} more pages`));
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
if (outputFile) {
|
|
712
|
+
console.log(colors.green(`\n Report saved to: ${outputFile}`));
|
|
713
|
+
}
|
|
586
714
|
}
|
|
587
|
-
|
|
588
|
-
const
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
715
|
+
else {
|
|
716
|
+
const { Spider } = await import('../scrape/spider.js');
|
|
717
|
+
const spider = new Spider({
|
|
718
|
+
maxDepth,
|
|
719
|
+
maxPages,
|
|
720
|
+
concurrency,
|
|
721
|
+
sameDomain: true,
|
|
722
|
+
delay: 100,
|
|
723
|
+
onProgress: (progress) => {
|
|
724
|
+
process.stdout.write(`\r${colors.gray(' Crawling:')} ${colors.cyan(progress.crawled.toString())} pages | ${colors.gray('Queue:')} ${progress.queued} | ${colors.gray('Depth:')} ${progress.depth} `);
|
|
597
725
|
},
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
};
|
|
609
|
-
if (options.output) {
|
|
610
|
-
await fs.writeFile(options.output, JSON.stringify(jsonOutput, null, 2));
|
|
611
|
-
console.log(colors.green(`✓ Results saved to ${options.output}`));
|
|
726
|
+
});
|
|
727
|
+
const result = await spider.crawl(url);
|
|
728
|
+
process.stdout.write('\r' + ' '.repeat(80) + '\r');
|
|
729
|
+
console.log(colors.green(`\n✔ Spider complete`) + colors.gray(` (${(result.duration / 1000).toFixed(1)}s)`));
|
|
730
|
+
console.log(` ${colors.cyan('Pages crawled')}: ${result.pages.length}`);
|
|
731
|
+
console.log(` ${colors.cyan('Unique URLs')}: ${result.visited.size}`);
|
|
732
|
+
console.log(` ${colors.cyan('Errors')}: ${result.errors.length}`);
|
|
733
|
+
const byDepth = new Map();
|
|
734
|
+
for (const page of result.pages) {
|
|
735
|
+
byDepth.set(page.depth, (byDepth.get(page.depth) || 0) + 1);
|
|
612
736
|
}
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
737
|
+
console.log(colors.bold('\n Pages by depth:'));
|
|
738
|
+
for (const [depth, count] of Array.from(byDepth.entries()).sort((a, b) => a[0] - b[0])) {
|
|
739
|
+
const bar = '█'.repeat(Math.min(count, 40));
|
|
740
|
+
console.log(` ${colors.gray(`d${depth}:`)} ${bar} ${count}`);
|
|
616
741
|
}
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
console.log('');
|
|
628
|
-
const byDepth = new Map();
|
|
629
|
-
for (const page of result.pages) {
|
|
630
|
-
byDepth.set(page.depth, (byDepth.get(page.depth) || 0) + 1);
|
|
631
|
-
}
|
|
632
|
-
console.log(`${colors.bold('Pages by Depth:')}`);
|
|
633
|
-
for (const [depth, count] of [...byDepth.entries()].sort((a, b) => a[0] - b[0])) {
|
|
634
|
-
console.log(` ${colors.gray(`Depth ${depth}:`)} ${count} pages`);
|
|
635
|
-
}
|
|
636
|
-
console.log('');
|
|
637
|
-
if (result.errors.length > 0) {
|
|
638
|
-
console.log(`${colors.bold(colors.red('Errors:'))}`);
|
|
639
|
-
for (const err of result.errors.slice(0, 10)) {
|
|
640
|
-
const shortUrl = err.url.replace(url, '');
|
|
641
|
-
const statusMatch = err.error.match(/status code (\d+)/);
|
|
642
|
-
const errorMsg = statusMatch ? `HTTP ${statusMatch[1]}` : err.error.slice(0, 50);
|
|
643
|
-
console.log(` ${colors.red('✗')} ${colors.gray(shortUrl || '/')} → ${errorMsg}`);
|
|
742
|
+
const topPages = [...result.pages]
|
|
743
|
+
.filter(p => !p.error)
|
|
744
|
+
.sort((a, b) => b.links.length - a.links.length)
|
|
745
|
+
.slice(0, 10);
|
|
746
|
+
if (topPages.length > 0) {
|
|
747
|
+
console.log(colors.bold('\n Top pages by outgoing links:'));
|
|
748
|
+
for (const page of topPages) {
|
|
749
|
+
const title = page.title.slice(0, 40) || new URL(page.url).pathname;
|
|
750
|
+
console.log(` ${colors.cyan(page.links.length.toString().padStart(3))} ${title}`);
|
|
751
|
+
}
|
|
644
752
|
}
|
|
645
|
-
|
|
646
|
-
|
|
753
|
+
const formatError = (error) => {
|
|
754
|
+
const statusMatch = error.match(/status code (\d{3})/i);
|
|
755
|
+
if (statusMatch) {
|
|
756
|
+
return `HTTP ${statusMatch[1]}`;
|
|
757
|
+
}
|
|
758
|
+
return error.length > 50 ? error.slice(0, 47) + '...' : error;
|
|
759
|
+
};
|
|
760
|
+
if (result.errors.length > 0 && result.errors.length <= 10) {
|
|
761
|
+
console.log(colors.bold('\n Errors:'));
|
|
762
|
+
for (const err of result.errors) {
|
|
763
|
+
const path = new URL(err.url).pathname;
|
|
764
|
+
console.log(` ${colors.red('✗')} ${path.padEnd(30)} → ${formatError(err.error)}`);
|
|
765
|
+
}
|
|
647
766
|
}
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
console.log(` ${colors.green('✓')} ${colors.gray((shortUrl || '/').padEnd(40))} ${page.title.slice(0, 40)}`);
|
|
767
|
+
else if (result.errors.length > 10) {
|
|
768
|
+
console.log(colors.bold('\n Errors:'));
|
|
769
|
+
for (const err of result.errors.slice(0, 5)) {
|
|
770
|
+
const path = new URL(err.url).pathname;
|
|
771
|
+
console.log(` ${colors.red('✗')} ${path.padEnd(30)} → ${formatError(err.error)}`);
|
|
772
|
+
}
|
|
773
|
+
console.log(colors.gray(` ... and ${result.errors.length - 5} more errors`));
|
|
656
774
|
}
|
|
657
|
-
if (
|
|
658
|
-
|
|
775
|
+
if (outputFile) {
|
|
776
|
+
const jsonOutput = {
|
|
777
|
+
startUrl: result.startUrl,
|
|
778
|
+
crawledAt: new Date().toISOString(),
|
|
779
|
+
duration: result.duration,
|
|
780
|
+
summary: {
|
|
781
|
+
totalPages: result.pages.length,
|
|
782
|
+
successCount: result.pages.filter(p => !p.error).length,
|
|
783
|
+
errorCount: result.errors.length,
|
|
784
|
+
uniqueUrls: result.visited.size,
|
|
785
|
+
},
|
|
786
|
+
pages: result.pages.map(p => ({
|
|
787
|
+
url: p.url,
|
|
788
|
+
status: p.status,
|
|
789
|
+
title: p.title,
|
|
790
|
+
depth: p.depth,
|
|
791
|
+
linksCount: p.links.length,
|
|
792
|
+
duration: p.duration,
|
|
793
|
+
error: p.error,
|
|
794
|
+
})),
|
|
795
|
+
errors: result.errors,
|
|
796
|
+
};
|
|
797
|
+
await fs.writeFile(outputFile, JSON.stringify(jsonOutput, null, 2));
|
|
798
|
+
console.log(colors.green(`\n Report saved to: ${outputFile}`));
|
|
659
799
|
}
|
|
660
800
|
}
|
|
801
|
+
console.log('');
|
|
661
802
|
}
|
|
662
803
|
catch (error) {
|
|
663
804
|
console.error(colors.red(`\nSpider failed: ${error.message}`));
|