recker 1.0.28-next.4354f8c → 1.0.28-next.857660a
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/tui/shell.js +276 -70
- package/dist/scrape/spider.d.ts +2 -0
- package/dist/scrape/spider.js +64 -23
- package/dist/seo/analyzer.js +15 -0
- package/dist/seo/index.d.ts +3 -1
- package/dist/seo/index.js +1 -0
- package/dist/seo/seo-spider.d.ts +47 -0
- package/dist/seo/seo-spider.js +362 -0
- package/dist/seo/types.d.ts +24 -0
- package/package.json +1 -1
package/dist/cli/tui/shell.js
CHANGED
|
@@ -15,7 +15,7 @@ import colors from '../../utils/colors.js';
|
|
|
15
15
|
import { getShellSearch } from './shell-search.js';
|
|
16
16
|
import { openSearchPanel } from './search-panel.js';
|
|
17
17
|
import { ScrollBuffer, parseScrollKey, parseMouseScroll, disableMouseReporting } from './scroll-buffer.js';
|
|
18
|
-
import { analyzeSeo } from '../../seo/index.js';
|
|
18
|
+
import { analyzeSeo, SeoSpider } from '../../seo/index.js';
|
|
19
19
|
let highlight;
|
|
20
20
|
async function initDependencies() {
|
|
21
21
|
if (!highlight) {
|
|
@@ -972,15 +972,21 @@ ${colors.bold('Details:')}`);
|
|
|
972
972
|
}
|
|
973
973
|
const startTime = performance.now();
|
|
974
974
|
try {
|
|
975
|
+
const ttfbStart = performance.now();
|
|
975
976
|
const res = await this.client.get(url);
|
|
977
|
+
const ttfb = Math.round(performance.now() - ttfbStart);
|
|
976
978
|
const html = await res.text();
|
|
977
979
|
const duration = Math.round(performance.now() - startTime);
|
|
978
980
|
const report = await analyzeSeo(html, { baseUrl: url });
|
|
981
|
+
report.timing = {
|
|
982
|
+
ttfb,
|
|
983
|
+
total: duration,
|
|
984
|
+
};
|
|
979
985
|
if (jsonOutput) {
|
|
980
986
|
const jsonResult = {
|
|
981
987
|
url,
|
|
982
988
|
analyzedAt: new Date().toISOString(),
|
|
983
|
-
|
|
989
|
+
timing: report.timing,
|
|
984
990
|
score: report.score,
|
|
985
991
|
grade: report.grade,
|
|
986
992
|
title: report.title,
|
|
@@ -989,8 +995,9 @@ ${colors.bold('Details:')}`);
|
|
|
989
995
|
headings: report.headings,
|
|
990
996
|
links: report.links,
|
|
991
997
|
images: report.images,
|
|
992
|
-
openGraph: report.
|
|
993
|
-
twitterCard: report.
|
|
998
|
+
openGraph: report.openGraph,
|
|
999
|
+
twitterCard: report.twitterCard,
|
|
1000
|
+
social: report.social,
|
|
994
1001
|
jsonLd: report.jsonLd,
|
|
995
1002
|
technical: report.technical,
|
|
996
1003
|
checks: report.checks,
|
|
@@ -1028,6 +1035,36 @@ Grade: ${gradeColor(colors.bold(report.grade))} (${report.score}/100)
|
|
|
1028
1035
|
: report.metaDescription.text;
|
|
1029
1036
|
console.log(colors.bold('Description:') + ` ${desc} ` + colors.gray(`(${report.metaDescription.length} chars)`));
|
|
1030
1037
|
}
|
|
1038
|
+
if (report.openGraph && Object.values(report.openGraph).some(v => v)) {
|
|
1039
|
+
console.log('');
|
|
1040
|
+
console.log(colors.bold(colors.cyan('OpenGraph:')));
|
|
1041
|
+
if (report.openGraph.title) {
|
|
1042
|
+
const ogTitle = report.openGraph.title.length > 60
|
|
1043
|
+
? report.openGraph.title.slice(0, 57) + '...'
|
|
1044
|
+
: report.openGraph.title;
|
|
1045
|
+
console.log(` ${colors.gray('og:title:')} ${ogTitle}`);
|
|
1046
|
+
}
|
|
1047
|
+
if (report.openGraph.description) {
|
|
1048
|
+
const ogDesc = report.openGraph.description.length > 60
|
|
1049
|
+
? report.openGraph.description.slice(0, 57) + '...'
|
|
1050
|
+
: report.openGraph.description;
|
|
1051
|
+
console.log(` ${colors.gray('og:description:')} ${ogDesc}`);
|
|
1052
|
+
}
|
|
1053
|
+
if (report.openGraph.image) {
|
|
1054
|
+
const ogImg = report.openGraph.image.length > 50
|
|
1055
|
+
? '...' + report.openGraph.image.slice(-47)
|
|
1056
|
+
: report.openGraph.image;
|
|
1057
|
+
console.log(` ${colors.gray('og:image:')} ${colors.blue(ogImg)}`);
|
|
1058
|
+
}
|
|
1059
|
+
if (report.openGraph.type) {
|
|
1060
|
+
console.log(` ${colors.gray('og:type:')} ${report.openGraph.type}`);
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
if (report.timing?.ttfb !== undefined) {
|
|
1064
|
+
console.log('');
|
|
1065
|
+
console.log(colors.bold('Timing:') + ` TTFB ${report.timing.ttfb}ms` +
|
|
1066
|
+
(report.timing.total ? `, Total ${report.timing.total}ms` : ''));
|
|
1067
|
+
}
|
|
1031
1068
|
if (report.content) {
|
|
1032
1069
|
console.log(colors.bold('Content:') + ` ${report.content.wordCount} words, ${report.content.paragraphCount} paragraphs, ~${report.content.readingTimeMinutes} min read`);
|
|
1033
1070
|
}
|
|
@@ -1443,18 +1480,26 @@ ${colors.bold('Network:')}
|
|
|
1443
1480
|
let maxDepth = 3;
|
|
1444
1481
|
let maxPages = 100;
|
|
1445
1482
|
let concurrency = 5;
|
|
1483
|
+
let seoEnabled = false;
|
|
1484
|
+
let outputFile = '';
|
|
1446
1485
|
for (let i = 0; i < args.length; i++) {
|
|
1447
1486
|
const arg = args[i];
|
|
1448
|
-
if (arg.startsWith('
|
|
1449
|
-
maxDepth = parseInt(arg.split('=')[1]) ||
|
|
1487
|
+
if (arg.startsWith('depth=')) {
|
|
1488
|
+
maxDepth = parseInt(arg.split('=')[1]) || 4;
|
|
1450
1489
|
}
|
|
1451
|
-
else if (arg.startsWith('
|
|
1490
|
+
else if (arg.startsWith('limit=')) {
|
|
1452
1491
|
maxPages = parseInt(arg.split('=')[1]) || 100;
|
|
1453
1492
|
}
|
|
1454
|
-
else if (arg.startsWith('
|
|
1493
|
+
else if (arg.startsWith('concurrency=')) {
|
|
1455
1494
|
concurrency = parseInt(arg.split('=')[1]) || 5;
|
|
1456
1495
|
}
|
|
1457
|
-
else if (
|
|
1496
|
+
else if (arg === 'seo') {
|
|
1497
|
+
seoEnabled = true;
|
|
1498
|
+
}
|
|
1499
|
+
else if (arg.startsWith('output=')) {
|
|
1500
|
+
outputFile = arg.split('=')[1] || '';
|
|
1501
|
+
}
|
|
1502
|
+
else if (!arg.includes('=')) {
|
|
1458
1503
|
url = arg;
|
|
1459
1504
|
}
|
|
1460
1505
|
}
|
|
@@ -1462,12 +1507,15 @@ ${colors.bold('Network:')}
|
|
|
1462
1507
|
if (!this.baseUrl) {
|
|
1463
1508
|
console.log(colors.yellow('Usage: spider <url> [options]'));
|
|
1464
1509
|
console.log(colors.gray(' Options:'));
|
|
1465
|
-
console.log(colors.gray('
|
|
1466
|
-
console.log(colors.gray('
|
|
1467
|
-
console.log(colors.gray('
|
|
1510
|
+
console.log(colors.gray(' depth=4 Max crawl depth'));
|
|
1511
|
+
console.log(colors.gray(' limit=100 Max pages to crawl'));
|
|
1512
|
+
console.log(colors.gray(' concurrency=5 Concurrent requests'));
|
|
1513
|
+
console.log(colors.gray(' seo Enable SEO analysis'));
|
|
1514
|
+
console.log(colors.gray(' output=file.json Save JSON report'));
|
|
1468
1515
|
console.log(colors.gray(' Examples:'));
|
|
1469
|
-
console.log(colors.gray(' spider
|
|
1470
|
-
console.log(colors.gray(' spider
|
|
1516
|
+
console.log(colors.gray(' spider example.com'));
|
|
1517
|
+
console.log(colors.gray(' spider example.com depth=2 limit=50'));
|
|
1518
|
+
console.log(colors.gray(' spider example.com seo output=seo-report.json'));
|
|
1471
1519
|
return;
|
|
1472
1520
|
}
|
|
1473
1521
|
url = this.baseUrl;
|
|
@@ -1476,64 +1524,222 @@ ${colors.bold('Network:')}
|
|
|
1476
1524
|
url = `https://${url}`;
|
|
1477
1525
|
}
|
|
1478
1526
|
console.log(colors.cyan(`\nSpider starting: ${url}`));
|
|
1479
|
-
|
|
1527
|
+
const modeLabel = seoEnabled ? colors.magenta(' + SEO') : '';
|
|
1528
|
+
console.log(colors.gray(` Depth: ${maxDepth} | Limit: ${maxPages} | Concurrency: ${concurrency}${modeLabel}`));
|
|
1529
|
+
if (outputFile) {
|
|
1530
|
+
console.log(colors.gray(` Output: ${outputFile}`));
|
|
1531
|
+
}
|
|
1480
1532
|
console.log('');
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1533
|
+
if (seoEnabled) {
|
|
1534
|
+
const seoSpider = new SeoSpider({
|
|
1535
|
+
maxDepth,
|
|
1536
|
+
maxPages,
|
|
1537
|
+
concurrency,
|
|
1538
|
+
sameDomain: true,
|
|
1539
|
+
delay: 100,
|
|
1540
|
+
seo: true,
|
|
1541
|
+
output: outputFile || undefined,
|
|
1542
|
+
onProgress: (progress) => {
|
|
1543
|
+
process.stdout.write(`\r${colors.gray(' Crawling:')} ${colors.cyan(progress.crawled.toString())} pages | ${colors.gray('Queue:')} ${progress.queued} | ${colors.gray('Depth:')} ${progress.depth} `);
|
|
1544
|
+
},
|
|
1545
|
+
});
|
|
1546
|
+
try {
|
|
1547
|
+
const result = await seoSpider.crawl(url);
|
|
1548
|
+
process.stdout.write('\r' + ' '.repeat(80) + '\r');
|
|
1549
|
+
console.log(colors.green(`\n✔ SEO Spider complete`) + colors.gray(` (${(result.duration / 1000).toFixed(1)}s)`));
|
|
1550
|
+
console.log(` ${colors.cyan('Pages crawled')}: ${result.pages.length}`);
|
|
1551
|
+
console.log(` ${colors.cyan('Unique URLs')}: ${result.visited.size}`);
|
|
1552
|
+
console.log(` ${colors.cyan('Avg SEO Score')}: ${result.summary.avgScore}/100`);
|
|
1553
|
+
const responseTimes = result.pages.filter(p => p.duration > 0).map(p => p.duration);
|
|
1554
|
+
const avgResponseTime = responseTimes.length > 0
|
|
1555
|
+
? Math.round(responseTimes.reduce((a, b) => a + b, 0) / responseTimes.length)
|
|
1556
|
+
: 0;
|
|
1557
|
+
const minResponseTime = responseTimes.length > 0 ? Math.min(...responseTimes) : 0;
|
|
1558
|
+
const maxResponseTime = responseTimes.length > 0 ? Math.max(...responseTimes) : 0;
|
|
1559
|
+
const reqPerSec = result.duration > 0 ? (result.pages.length / (result.duration / 1000)).toFixed(1) : '0';
|
|
1560
|
+
const statusCounts = new Map();
|
|
1561
|
+
for (const page of result.pages) {
|
|
1562
|
+
const status = page.status || 0;
|
|
1563
|
+
statusCounts.set(status, (statusCounts.get(status) || 0) + 1);
|
|
1564
|
+
}
|
|
1565
|
+
let totalInternalLinks = 0;
|
|
1566
|
+
let totalExternalLinks = 0;
|
|
1567
|
+
let totalImages = 0;
|
|
1568
|
+
let imagesWithoutAlt = 0;
|
|
1569
|
+
let pagesWithoutTitle = 0;
|
|
1570
|
+
let pagesWithoutDescription = 0;
|
|
1571
|
+
for (const page of result.pages) {
|
|
1572
|
+
if (page.seoReport) {
|
|
1573
|
+
totalInternalLinks += page.seoReport.links?.internal || 0;
|
|
1574
|
+
totalExternalLinks += page.seoReport.links?.external || 0;
|
|
1575
|
+
totalImages += page.seoReport.images?.total || 0;
|
|
1576
|
+
imagesWithoutAlt += page.seoReport.images?.withoutAlt || 0;
|
|
1577
|
+
if (!page.seoReport.title?.text)
|
|
1578
|
+
pagesWithoutTitle++;
|
|
1579
|
+
if (!page.seoReport.metaDescription?.text)
|
|
1580
|
+
pagesWithoutDescription++;
|
|
1581
|
+
}
|
|
1582
|
+
}
|
|
1583
|
+
console.log(colors.bold('\n Performance:'));
|
|
1584
|
+
console.log(` ${colors.gray('Avg Response:')} ${avgResponseTime}ms`);
|
|
1585
|
+
console.log(` ${colors.gray('Min/Max:')} ${minResponseTime}ms / ${maxResponseTime}ms`);
|
|
1586
|
+
console.log(` ${colors.gray('Throughput:')} ${reqPerSec} req/s`);
|
|
1587
|
+
console.log(colors.bold('\n HTTP Status:'));
|
|
1588
|
+
const sortedStatuses = Array.from(statusCounts.entries()).sort((a, b) => b[1] - a[1]);
|
|
1589
|
+
for (const [status, count] of sortedStatuses.slice(0, 5)) {
|
|
1590
|
+
const statusLabel = status === 0 ? 'Error' : status.toString();
|
|
1591
|
+
const statusColor = status >= 400 || status === 0 ? colors.red :
|
|
1592
|
+
status >= 300 ? colors.yellow : colors.green;
|
|
1593
|
+
const pct = ((count / result.pages.length) * 100).toFixed(0);
|
|
1594
|
+
console.log(` ${statusColor(statusLabel.padEnd(5))} ${count.toString().padStart(3)} (${pct}%)`);
|
|
1595
|
+
}
|
|
1596
|
+
console.log(colors.bold('\n Content:'));
|
|
1597
|
+
console.log(` ${colors.gray('Internal links:')} ${totalInternalLinks.toLocaleString()}`);
|
|
1598
|
+
console.log(` ${colors.gray('External links:')} ${totalExternalLinks.toLocaleString()}`);
|
|
1599
|
+
console.log(` ${colors.gray('Images:')} ${totalImages.toLocaleString()} (${imagesWithoutAlt} missing alt)`);
|
|
1600
|
+
console.log(` ${colors.gray('Missing title:')} ${pagesWithoutTitle}`);
|
|
1601
|
+
console.log(` ${colors.gray('Missing desc:')} ${pagesWithoutDescription}`);
|
|
1602
|
+
console.log(colors.bold('\n SEO Summary:'));
|
|
1603
|
+
const { summary } = result;
|
|
1604
|
+
console.log(` ${colors.red('✗')} Pages with errors: ${summary.pagesWithErrors}`);
|
|
1605
|
+
console.log(` ${colors.yellow('⚠')} Pages with warnings: ${summary.pagesWithWarnings}`);
|
|
1606
|
+
console.log(` ${colors.magenta('⚐')} Duplicate titles: ${summary.duplicateTitles}`);
|
|
1607
|
+
console.log(` ${colors.magenta('⚐')} Duplicate descriptions:${summary.duplicateDescriptions}`);
|
|
1608
|
+
console.log(` ${colors.magenta('⚐')} Duplicate H1s: ${summary.duplicateH1s}`);
|
|
1609
|
+
console.log(` ${colors.gray('○')} Orphan pages: ${summary.orphanPages}`);
|
|
1610
|
+
if (result.siteWideIssues.length > 0) {
|
|
1611
|
+
console.log(colors.bold('\n Site-Wide Issues:'));
|
|
1612
|
+
for (const issue of result.siteWideIssues.slice(0, 10)) {
|
|
1613
|
+
const icon = issue.severity === 'error' ? colors.red('✗') :
|
|
1614
|
+
issue.severity === 'warning' ? colors.yellow('⚠') : colors.gray('○');
|
|
1615
|
+
console.log(` ${icon} ${issue.message}`);
|
|
1616
|
+
if (issue.value) {
|
|
1617
|
+
const truncatedValue = issue.value.length > 50 ? issue.value.slice(0, 47) + '...' : issue.value;
|
|
1618
|
+
console.log(` ${colors.gray(`"${truncatedValue}"`)}`);
|
|
1619
|
+
}
|
|
1620
|
+
const uniquePaths = [...new Set(issue.affectedUrls.map(u => new URL(u).pathname))];
|
|
1621
|
+
if (uniquePaths.length <= 3) {
|
|
1622
|
+
for (const path of uniquePaths) {
|
|
1623
|
+
console.log(` ${colors.gray('→')} ${path}`);
|
|
1624
|
+
}
|
|
1625
|
+
}
|
|
1626
|
+
else {
|
|
1627
|
+
console.log(` ${colors.gray(`→ ${uniquePaths.length} pages affected`)}`);
|
|
1628
|
+
}
|
|
1629
|
+
}
|
|
1630
|
+
if (result.siteWideIssues.length > 10) {
|
|
1631
|
+
console.log(colors.gray(` ... and ${result.siteWideIssues.length - 10} more issues`));
|
|
1632
|
+
}
|
|
1530
1633
|
}
|
|
1634
|
+
const pagesWithScores = result.pages
|
|
1635
|
+
.filter(p => p.seoReport)
|
|
1636
|
+
.sort((a, b) => (a.seoReport?.score || 0) - (b.seoReport?.score || 0));
|
|
1637
|
+
const seenPaths = new Set();
|
|
1638
|
+
const uniquePages = pagesWithScores.filter(page => {
|
|
1639
|
+
const path = new URL(page.url).pathname;
|
|
1640
|
+
if (seenPaths.has(path))
|
|
1641
|
+
return false;
|
|
1642
|
+
seenPaths.add(path);
|
|
1643
|
+
return true;
|
|
1644
|
+
});
|
|
1645
|
+
if (uniquePages.length > 0) {
|
|
1646
|
+
console.log(colors.bold('\n Pages by SEO Score:'));
|
|
1647
|
+
const worstPages = uniquePages.slice(0, 5);
|
|
1648
|
+
for (const page of worstPages) {
|
|
1649
|
+
const score = page.seoReport?.score || 0;
|
|
1650
|
+
const grade = page.seoReport?.grade || '?';
|
|
1651
|
+
const path = new URL(page.url).pathname;
|
|
1652
|
+
const scoreColor = score >= 80 ? colors.green : score >= 60 ? colors.yellow : colors.red;
|
|
1653
|
+
console.log(` ${scoreColor(`${score.toString().padStart(3)}`)} ${colors.gray(`[${grade}]`)} ${path.slice(0, 50)}`);
|
|
1654
|
+
}
|
|
1655
|
+
if (uniquePages.length > 5) {
|
|
1656
|
+
console.log(colors.gray(` ... and ${uniquePages.length - 5} more pages`));
|
|
1657
|
+
}
|
|
1658
|
+
}
|
|
1659
|
+
if (outputFile) {
|
|
1660
|
+
console.log(colors.green(`\n Report saved to: ${outputFile}`));
|
|
1661
|
+
}
|
|
1662
|
+
this.lastResponse = result;
|
|
1663
|
+
console.log(colors.gray('\n Result stored in lastResponse.'));
|
|
1664
|
+
}
|
|
1665
|
+
catch (error) {
|
|
1666
|
+
console.error(colors.red(`SEO Spider failed: ${error.message}`));
|
|
1531
1667
|
}
|
|
1532
|
-
this.lastResponse = result;
|
|
1533
|
-
console.log(colors.gray('\n Result stored in lastResponse. Use $links to explore.'));
|
|
1534
1668
|
}
|
|
1535
|
-
|
|
1536
|
-
|
|
1669
|
+
else {
|
|
1670
|
+
const spider = new Spider({
|
|
1671
|
+
maxDepth,
|
|
1672
|
+
maxPages,
|
|
1673
|
+
concurrency,
|
|
1674
|
+
sameDomain: true,
|
|
1675
|
+
delay: 100,
|
|
1676
|
+
onProgress: (progress) => {
|
|
1677
|
+
process.stdout.write(`\r${colors.gray(' Crawling:')} ${colors.cyan(progress.crawled.toString())} pages | ${colors.gray('Queue:')} ${progress.queued} | ${colors.gray('Depth:')} ${progress.depth} `);
|
|
1678
|
+
},
|
|
1679
|
+
});
|
|
1680
|
+
try {
|
|
1681
|
+
const result = await spider.crawl(url);
|
|
1682
|
+
process.stdout.write('\r' + ' '.repeat(80) + '\r');
|
|
1683
|
+
console.log(colors.green(`\n✔ Spider complete`) + colors.gray(` (${(result.duration / 1000).toFixed(1)}s)`));
|
|
1684
|
+
console.log(` ${colors.cyan('Pages crawled')}: ${result.pages.length}`);
|
|
1685
|
+
console.log(` ${colors.cyan('Unique URLs')}: ${result.visited.size}`);
|
|
1686
|
+
console.log(` ${colors.cyan('Errors')}: ${result.errors.length}`);
|
|
1687
|
+
const byDepth = new Map();
|
|
1688
|
+
for (const page of result.pages) {
|
|
1689
|
+
byDepth.set(page.depth, (byDepth.get(page.depth) || 0) + 1);
|
|
1690
|
+
}
|
|
1691
|
+
console.log(colors.bold('\n Pages by depth:'));
|
|
1692
|
+
for (const [depth, count] of Array.from(byDepth.entries()).sort((a, b) => a[0] - b[0])) {
|
|
1693
|
+
const bar = '█'.repeat(Math.min(count, 40));
|
|
1694
|
+
console.log(` ${colors.gray(`d${depth}:`)} ${bar} ${count}`);
|
|
1695
|
+
}
|
|
1696
|
+
const topPages = [...result.pages]
|
|
1697
|
+
.filter(p => !p.error)
|
|
1698
|
+
.sort((a, b) => b.links.length - a.links.length)
|
|
1699
|
+
.slice(0, 10);
|
|
1700
|
+
if (topPages.length > 0) {
|
|
1701
|
+
console.log(colors.bold('\n Top pages by outgoing links:'));
|
|
1702
|
+
for (const page of topPages) {
|
|
1703
|
+
const title = page.title.slice(0, 40) || new URL(page.url).pathname;
|
|
1704
|
+
console.log(` ${colors.cyan(page.links.length.toString().padStart(3))} ${title}`);
|
|
1705
|
+
}
|
|
1706
|
+
}
|
|
1707
|
+
const formatError = (error) => {
|
|
1708
|
+
const statusMatch = error.match(/status code (\d{3})/i);
|
|
1709
|
+
if (statusMatch) {
|
|
1710
|
+
return `HTTP ${statusMatch[1]}`;
|
|
1711
|
+
}
|
|
1712
|
+
return error.length > 50 ? error.slice(0, 47) + '...' : error;
|
|
1713
|
+
};
|
|
1714
|
+
if (result.errors.length > 0 && result.errors.length <= 10) {
|
|
1715
|
+
console.log(colors.bold('\n Errors:'));
|
|
1716
|
+
for (const err of result.errors) {
|
|
1717
|
+
const path = new URL(err.url).pathname;
|
|
1718
|
+
console.log(` ${colors.red('✗')} ${path.padEnd(25)} ${colors.gray('→')} ${formatError(err.error)}`);
|
|
1719
|
+
}
|
|
1720
|
+
}
|
|
1721
|
+
else if (result.errors.length > 10) {
|
|
1722
|
+
console.log(colors.yellow(`\n ${result.errors.length} errors (showing first 10):`));
|
|
1723
|
+
for (const err of result.errors.slice(0, 10)) {
|
|
1724
|
+
const path = new URL(err.url).pathname;
|
|
1725
|
+
console.log(` ${colors.red('✗')} ${path.padEnd(25)} ${colors.gray('→')} ${formatError(err.error)}`);
|
|
1726
|
+
}
|
|
1727
|
+
}
|
|
1728
|
+
if (outputFile) {
|
|
1729
|
+
const reportData = {
|
|
1730
|
+
...result,
|
|
1731
|
+
visited: Array.from(result.visited),
|
|
1732
|
+
generatedAt: new Date().toISOString(),
|
|
1733
|
+
};
|
|
1734
|
+
await fs.writeFile(outputFile, JSON.stringify(reportData, null, 2), 'utf-8');
|
|
1735
|
+
console.log(colors.green(`\n Report saved to: ${outputFile}`));
|
|
1736
|
+
}
|
|
1737
|
+
this.lastResponse = result;
|
|
1738
|
+
console.log(colors.gray('\n Result stored in lastResponse. Use $links to explore.'));
|
|
1739
|
+
}
|
|
1740
|
+
catch (error) {
|
|
1741
|
+
console.error(colors.red(`Spider failed: ${error.message}`));
|
|
1742
|
+
}
|
|
1537
1743
|
}
|
|
1538
1744
|
console.log('');
|
|
1539
1745
|
}
|
|
@@ -2464,7 +2670,7 @@ ${colors.bold('Network:')}
|
|
|
2464
2670
|
${colors.bold('Web Crawler:')}
|
|
2465
2671
|
${colors.green('spider <url>')} Crawl website following internal links.
|
|
2466
2672
|
${colors.gray('Options:')}
|
|
2467
|
-
${colors.white('--depth=
|
|
2673
|
+
${colors.white('--depth=4')} ${colors.gray('Maximum depth to crawl')}
|
|
2468
2674
|
${colors.white('--limit=100')} ${colors.gray('Maximum pages to crawl')}
|
|
2469
2675
|
${colors.white('--concurrency=5')} ${colors.gray('Parallel requests')}
|
|
2470
2676
|
|
|
@@ -2485,7 +2691,7 @@ ${colors.bold('Network:')}
|
|
|
2485
2691
|
› post /post name="Neo" active:=true role:Admin
|
|
2486
2692
|
› load /heavy-endpoint users=100 mode=stress
|
|
2487
2693
|
› chat openai gpt-5.1
|
|
2488
|
-
› spider
|
|
2694
|
+
› spider example.com depth=2 limit=50
|
|
2489
2695
|
`);
|
|
2490
2696
|
}
|
|
2491
2697
|
}
|
package/dist/scrape/spider.d.ts
CHANGED
|
@@ -42,6 +42,7 @@ export interface SpiderResult {
|
|
|
42
42
|
export declare class Spider {
|
|
43
43
|
private options;
|
|
44
44
|
private client;
|
|
45
|
+
private pool;
|
|
45
46
|
private visited;
|
|
46
47
|
private queue;
|
|
47
48
|
private results;
|
|
@@ -49,6 +50,7 @@ export declare class Spider {
|
|
|
49
50
|
private baseHost;
|
|
50
51
|
private running;
|
|
51
52
|
private aborted;
|
|
53
|
+
private pendingCount;
|
|
52
54
|
constructor(options?: SpiderOptions);
|
|
53
55
|
crawl(startUrl: string): Promise<SpiderResult>;
|
|
54
56
|
private crawlPage;
|
package/dist/scrape/spider.js
CHANGED
|
@@ -1,9 +1,30 @@
|
|
|
1
1
|
import { createClient } from '../core/client.js';
|
|
2
2
|
import { ScrapeDocument } from './document.js';
|
|
3
|
+
import { RequestPool } from '../utils/request-pool.js';
|
|
4
|
+
const TRACKING_PARAMS = new Set([
|
|
5
|
+
'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content',
|
|
6
|
+
'gclid', 'gclsrc', 'dclid',
|
|
7
|
+
'fbclid', 'fb_action_ids', 'fb_action_types', 'fb_source', 'fb_ref',
|
|
8
|
+
'msclkid',
|
|
9
|
+
'twclid',
|
|
10
|
+
'ref', 'referer', 'referrer', 'source',
|
|
11
|
+
'_ga', '_gl', '_hsenc', '_hsmi',
|
|
12
|
+
'mc_cid', 'mc_eid',
|
|
13
|
+
'yclid', 'ymclid',
|
|
14
|
+
'igshid',
|
|
15
|
+
'_t', 't', 'timestamp', 'ts', 'nocache', 'cache',
|
|
16
|
+
]);
|
|
3
17
|
function normalizeUrl(urlStr) {
|
|
4
18
|
try {
|
|
5
19
|
const url = new URL(urlStr);
|
|
6
20
|
url.hash = '';
|
|
21
|
+
const paramsToDelete = [];
|
|
22
|
+
url.searchParams.forEach((_, key) => {
|
|
23
|
+
if (TRACKING_PARAMS.has(key.toLowerCase())) {
|
|
24
|
+
paramsToDelete.push(key);
|
|
25
|
+
}
|
|
26
|
+
});
|
|
27
|
+
paramsToDelete.forEach(key => url.searchParams.delete(key));
|
|
7
28
|
url.searchParams.sort();
|
|
8
29
|
if (url.pathname !== '/' && url.pathname.endsWith('/')) {
|
|
9
30
|
url.pathname = url.pathname.slice(0, -1);
|
|
@@ -54,6 +75,7 @@ function sleep(ms) {
|
|
|
54
75
|
export class Spider {
|
|
55
76
|
options;
|
|
56
77
|
client;
|
|
78
|
+
pool;
|
|
57
79
|
visited = new Set();
|
|
58
80
|
queue = [];
|
|
59
81
|
results = [];
|
|
@@ -61,9 +83,10 @@ export class Spider {
|
|
|
61
83
|
baseHost = '';
|
|
62
84
|
running = false;
|
|
63
85
|
aborted = false;
|
|
86
|
+
pendingCount = 0;
|
|
64
87
|
constructor(options = {}) {
|
|
65
88
|
this.options = {
|
|
66
|
-
maxDepth: options.maxDepth ??
|
|
89
|
+
maxDepth: options.maxDepth ?? 4,
|
|
67
90
|
maxPages: options.maxPages ?? 100,
|
|
68
91
|
sameDomain: options.sameDomain ?? true,
|
|
69
92
|
concurrency: options.concurrency ?? 5,
|
|
@@ -83,42 +106,60 @@ export class Spider {
|
|
|
83
106
|
'User-Agent': this.options.userAgent,
|
|
84
107
|
},
|
|
85
108
|
});
|
|
109
|
+
this.pool = new RequestPool({
|
|
110
|
+
concurrency: this.options.concurrency,
|
|
111
|
+
...(this.options.delay > 0 ? {
|
|
112
|
+
requestsPerInterval: 1,
|
|
113
|
+
interval: this.options.delay,
|
|
114
|
+
} : {}),
|
|
115
|
+
});
|
|
86
116
|
}
|
|
87
117
|
async crawl(startUrl) {
|
|
88
118
|
const startTime = performance.now();
|
|
89
119
|
const normalizedStart = normalizeUrl(startUrl);
|
|
90
120
|
this.baseHost = new URL(normalizedStart).hostname;
|
|
91
121
|
this.visited.clear();
|
|
92
|
-
this.queue = [
|
|
122
|
+
this.queue = [];
|
|
93
123
|
this.results = [];
|
|
94
124
|
this.errors = [];
|
|
95
125
|
this.running = true;
|
|
96
126
|
this.aborted = false;
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
127
|
+
this.pendingCount = 0;
|
|
128
|
+
const pending = new Map();
|
|
129
|
+
const scheduleUrl = (item) => {
|
|
130
|
+
const normalized = normalizeUrl(item.url);
|
|
131
|
+
if (this.visited.has(normalized))
|
|
132
|
+
return;
|
|
133
|
+
if (pending.has(normalized))
|
|
134
|
+
return;
|
|
135
|
+
if (item.depth > this.options.maxDepth)
|
|
136
|
+
return;
|
|
137
|
+
if (this.results.length + pending.size >= this.options.maxPages)
|
|
138
|
+
return;
|
|
139
|
+
this.visited.add(normalized);
|
|
140
|
+
this.pendingCount++;
|
|
141
|
+
const promise = this.pool.run(() => this.crawlPage({ ...item, url: normalized }))
|
|
142
|
+
.finally(() => {
|
|
143
|
+
pending.delete(normalized);
|
|
144
|
+
this.pendingCount--;
|
|
145
|
+
});
|
|
146
|
+
pending.set(normalized, promise);
|
|
147
|
+
};
|
|
148
|
+
scheduleUrl({ url: normalizedStart, depth: 0 });
|
|
149
|
+
while ((pending.size > 0 || this.queue.length > 0) && !this.aborted) {
|
|
150
|
+
while (this.queue.length > 0 && !this.aborted) {
|
|
103
151
|
const item = this.queue.shift();
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
}
|
|
108
|
-
if (item.depth > this.options.maxDepth) {
|
|
109
|
-
continue;
|
|
110
|
-
}
|
|
111
|
-
this.visited.add(normalized);
|
|
112
|
-
batch.push({ ...item, url: normalized });
|
|
113
|
-
}
|
|
114
|
-
if (batch.length === 0) {
|
|
115
|
-
continue;
|
|
152
|
+
if (this.results.length + pending.size >= this.options.maxPages)
|
|
153
|
+
break;
|
|
154
|
+
scheduleUrl(item);
|
|
116
155
|
}
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
await sleep(this.options.delay);
|
|
156
|
+
if (pending.size > 0) {
|
|
157
|
+
await Promise.race(pending.values());
|
|
120
158
|
}
|
|
121
159
|
}
|
|
160
|
+
if (pending.size > 0) {
|
|
161
|
+
await Promise.all(pending.values());
|
|
162
|
+
}
|
|
122
163
|
this.running = false;
|
|
123
164
|
return {
|
|
124
165
|
startUrl: normalizedStart,
|
package/dist/seo/analyzer.js
CHANGED
|
@@ -57,6 +57,21 @@ export class SeoAnalyzer {
|
|
|
57
57
|
checks,
|
|
58
58
|
title: meta.title ? { text: meta.title, length: meta.title.length } : undefined,
|
|
59
59
|
metaDescription: meta.description ? { text: meta.description, length: meta.description.length } : undefined,
|
|
60
|
+
openGraph: Object.keys(og).length > 0 ? {
|
|
61
|
+
title: og.title,
|
|
62
|
+
description: og.description,
|
|
63
|
+
image: Array.isArray(og.image) ? og.image[0] : og.image,
|
|
64
|
+
url: og.url,
|
|
65
|
+
type: og.type,
|
|
66
|
+
siteName: og.siteName,
|
|
67
|
+
} : undefined,
|
|
68
|
+
twitterCard: Object.keys(twitter).length > 0 ? {
|
|
69
|
+
card: twitter.card,
|
|
70
|
+
title: twitter.title,
|
|
71
|
+
description: twitter.description,
|
|
72
|
+
image: Array.isArray(twitter.image) ? twitter.image[0] : twitter.image,
|
|
73
|
+
site: twitter.site,
|
|
74
|
+
} : undefined,
|
|
60
75
|
headings: headings,
|
|
61
76
|
content,
|
|
62
77
|
links: linkAnalysis,
|
package/dist/seo/index.d.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
export { SeoAnalyzer, analyzeSeo } from './analyzer.js';
|
|
2
|
+
export { SeoSpider, seoSpider } from './seo-spider.js';
|
|
3
|
+
export type { SeoSpiderOptions, SeoPageResult, SiteWideIssue, SeoSpiderResult, } from './seo-spider.js';
|
|
2
4
|
export { SeoRulesEngine, createRulesEngine, SEO_THRESHOLDS, ALL_SEO_RULES, } from './rules/index.js';
|
|
3
|
-
export type { SeoReport, SeoCheckResult, SeoStatus, HeadingAnalysis, HeadingInfo, ContentMetrics, LinkAnalysis, ImageAnalysis, SocialMetaAnalysis, TechnicalSeo, SeoAnalyzerOptions, } from './types.js';
|
|
5
|
+
export type { SeoReport, SeoCheckResult, SeoStatus, SeoTiming, HeadingAnalysis, HeadingInfo, ContentMetrics, LinkAnalysis, ImageAnalysis, SocialMetaAnalysis, TechnicalSeo, SeoAnalyzerOptions, } from './types.js';
|
|
4
6
|
export type { SeoRule, RuleContext, RuleResult, RuleEvidence, RuleCategory, RuleSeverity, RulesEngineOptions, } from './rules/index.js';
|
|
5
7
|
export type { SeoAnalyzerFullOptions } from './analyzer.js';
|
package/dist/seo/index.js
CHANGED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { SpiderOptions, SpiderResult, SpiderPageResult } from '../scrape/spider.js';
|
|
2
|
+
import type { SeoReport } from './types.js';
|
|
3
|
+
export interface SeoSpiderOptions extends SpiderOptions {
|
|
4
|
+
seo?: boolean;
|
|
5
|
+
output?: string;
|
|
6
|
+
onSeoAnalysis?: (result: SeoPageResult) => void;
|
|
7
|
+
}
|
|
8
|
+
export interface SeoPageResult extends SpiderPageResult {
|
|
9
|
+
seoReport?: SeoReport;
|
|
10
|
+
}
|
|
11
|
+
export interface SiteWideIssue {
|
|
12
|
+
type: 'duplicate-title' | 'duplicate-description' | 'duplicate-h1' | 'missing-canonical' | 'orphan-page';
|
|
13
|
+
severity: 'error' | 'warning' | 'info';
|
|
14
|
+
message: string;
|
|
15
|
+
affectedUrls: string[];
|
|
16
|
+
value?: string;
|
|
17
|
+
}
|
|
18
|
+
export interface SeoSpiderResult extends Omit<SpiderResult, 'pages'> {
|
|
19
|
+
pages: SeoPageResult[];
|
|
20
|
+
siteWideIssues: SiteWideIssue[];
|
|
21
|
+
summary: {
|
|
22
|
+
totalPages: number;
|
|
23
|
+
pagesWithErrors: number;
|
|
24
|
+
pagesWithWarnings: number;
|
|
25
|
+
avgScore: number;
|
|
26
|
+
duplicateTitles: number;
|
|
27
|
+
duplicateDescriptions: number;
|
|
28
|
+
duplicateH1s: number;
|
|
29
|
+
orphanPages: number;
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
export declare class SeoSpider {
|
|
33
|
+
private spider;
|
|
34
|
+
private options;
|
|
35
|
+
private seoResults;
|
|
36
|
+
constructor(options?: SeoSpiderOptions);
|
|
37
|
+
crawl(startUrl: string): Promise<SeoSpiderResult>;
|
|
38
|
+
private analyzePages;
|
|
39
|
+
private createReportFromPageData;
|
|
40
|
+
private detectSiteWideIssues;
|
|
41
|
+
private calculateSummary;
|
|
42
|
+
private scoreToGrade;
|
|
43
|
+
private saveReport;
|
|
44
|
+
abort(): void;
|
|
45
|
+
isRunning(): boolean;
|
|
46
|
+
}
|
|
47
|
+
export declare function seoSpider(url: string, options?: SeoSpiderOptions): Promise<SeoSpiderResult>;
|
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
import { Spider } from '../scrape/spider.js';
|
|
2
|
+
import { analyzeSeo } from './analyzer.js';
|
|
3
|
+
import { createClient } from '../core/client.js';
|
|
4
|
+
import * as fs from 'fs/promises';
|
|
5
|
+
export class SeoSpider {
|
|
6
|
+
spider;
|
|
7
|
+
options;
|
|
8
|
+
seoResults = new Map();
|
|
9
|
+
constructor(options = {}) {
|
|
10
|
+
this.options = options;
|
|
11
|
+
this.spider = new Spider(options);
|
|
12
|
+
}
|
|
13
|
+
async crawl(startUrl) {
|
|
14
|
+
const result = await this.spider.crawl(startUrl);
|
|
15
|
+
if (!this.options.seo) {
|
|
16
|
+
return {
|
|
17
|
+
...result,
|
|
18
|
+
pages: result.pages,
|
|
19
|
+
siteWideIssues: [],
|
|
20
|
+
summary: {
|
|
21
|
+
totalPages: result.pages.length,
|
|
22
|
+
pagesWithErrors: 0,
|
|
23
|
+
pagesWithWarnings: 0,
|
|
24
|
+
avgScore: 0,
|
|
25
|
+
duplicateTitles: 0,
|
|
26
|
+
duplicateDescriptions: 0,
|
|
27
|
+
duplicateH1s: 0,
|
|
28
|
+
orphanPages: 0,
|
|
29
|
+
},
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
const seoPages = await this.analyzePages(result.pages);
|
|
33
|
+
const siteWideIssues = this.detectSiteWideIssues(seoPages);
|
|
34
|
+
const summary = this.calculateSummary(seoPages, siteWideIssues);
|
|
35
|
+
const seoResult = {
|
|
36
|
+
...result,
|
|
37
|
+
pages: seoPages,
|
|
38
|
+
siteWideIssues,
|
|
39
|
+
summary,
|
|
40
|
+
};
|
|
41
|
+
if (this.options.output) {
|
|
42
|
+
await this.saveReport(seoResult);
|
|
43
|
+
}
|
|
44
|
+
return seoResult;
|
|
45
|
+
}
|
|
46
|
+
async analyzePages(pages) {
|
|
47
|
+
const results = [];
|
|
48
|
+
const client = createClient({
|
|
49
|
+
timeout: this.options.timeout || 10000,
|
|
50
|
+
headers: {
|
|
51
|
+
'User-Agent': this.options.userAgent || 'Recker Spider/1.0',
|
|
52
|
+
},
|
|
53
|
+
});
|
|
54
|
+
for (const page of pages) {
|
|
55
|
+
if (page.error || page.status >= 400) {
|
|
56
|
+
results.push({
|
|
57
|
+
...page,
|
|
58
|
+
seoReport: undefined,
|
|
59
|
+
});
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
try {
|
|
63
|
+
const response = await client.get(page.url);
|
|
64
|
+
const html = await response.text();
|
|
65
|
+
const seoReport = await analyzeSeo(html, { baseUrl: page.url });
|
|
66
|
+
const seoPage = {
|
|
67
|
+
...page,
|
|
68
|
+
seoReport,
|
|
69
|
+
};
|
|
70
|
+
results.push(seoPage);
|
|
71
|
+
this.seoResults.set(page.url, seoReport);
|
|
72
|
+
this.options.onSeoAnalysis?.(seoPage);
|
|
73
|
+
}
|
|
74
|
+
catch {
|
|
75
|
+
results.push({
|
|
76
|
+
...page,
|
|
77
|
+
seoReport: undefined,
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return results;
|
|
82
|
+
}
|
|
83
|
+
createReportFromPageData(page) {
|
|
84
|
+
const checks = [];
|
|
85
|
+
if (page.title) {
|
|
86
|
+
const titleLength = page.title.length;
|
|
87
|
+
if (titleLength < 30) {
|
|
88
|
+
checks.push({
|
|
89
|
+
name: 'Title Length',
|
|
90
|
+
status: 'warn',
|
|
91
|
+
message: `Title is too short (${titleLength} chars)`,
|
|
92
|
+
value: titleLength,
|
|
93
|
+
recommendation: 'Title should be 50-60 characters',
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
else if (titleLength > 60) {
|
|
97
|
+
checks.push({
|
|
98
|
+
name: 'Title Length',
|
|
99
|
+
status: 'warn',
|
|
100
|
+
message: `Title is too long (${titleLength} chars)`,
|
|
101
|
+
value: titleLength,
|
|
102
|
+
recommendation: 'Title should be 50-60 characters',
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
else {
|
|
106
|
+
checks.push({
|
|
107
|
+
name: 'Title Length',
|
|
108
|
+
status: 'pass',
|
|
109
|
+
message: `Good title length (${titleLength} chars)`,
|
|
110
|
+
value: titleLength,
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
checks.push({
|
|
116
|
+
name: 'Title',
|
|
117
|
+
status: 'fail',
|
|
118
|
+
message: 'Page has no title',
|
|
119
|
+
recommendation: 'Add a descriptive <title> tag',
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
const internalLinks = page.links.filter(l => l.type === 'internal').length;
|
|
123
|
+
const externalLinks = page.links.filter(l => l.type === 'external').length;
|
|
124
|
+
if (internalLinks === 0) {
|
|
125
|
+
checks.push({
|
|
126
|
+
name: 'Internal Links',
|
|
127
|
+
status: 'warn',
|
|
128
|
+
message: 'No internal links found',
|
|
129
|
+
recommendation: 'Add internal links to improve site structure',
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
else {
|
|
133
|
+
checks.push({
|
|
134
|
+
name: 'Internal Links',
|
|
135
|
+
status: 'pass',
|
|
136
|
+
message: `${internalLinks} internal links found`,
|
|
137
|
+
value: internalLinks,
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
const scoreSum = checks.reduce((sum, c) => {
|
|
141
|
+
if (c.status === 'pass')
|
|
142
|
+
return sum + 100;
|
|
143
|
+
if (c.status === 'warn')
|
|
144
|
+
return sum + 50;
|
|
145
|
+
return sum;
|
|
146
|
+
}, 0);
|
|
147
|
+
const score = checks.length > 0 ? Math.round(scoreSum / checks.length) : 0;
|
|
148
|
+
return {
|
|
149
|
+
url: page.url,
|
|
150
|
+
timestamp: new Date(),
|
|
151
|
+
grade: this.scoreToGrade(score),
|
|
152
|
+
score,
|
|
153
|
+
checks,
|
|
154
|
+
title: page.title ? { text: page.title, length: page.title.length } : undefined,
|
|
155
|
+
headings: {
|
|
156
|
+
structure: [],
|
|
157
|
+
h1Count: 0,
|
|
158
|
+
hasProperHierarchy: false,
|
|
159
|
+
issues: [],
|
|
160
|
+
},
|
|
161
|
+
content: {
|
|
162
|
+
wordCount: 0,
|
|
163
|
+
characterCount: 0,
|
|
164
|
+
sentenceCount: 0,
|
|
165
|
+
paragraphCount: 0,
|
|
166
|
+
readingTimeMinutes: 0,
|
|
167
|
+
avgWordsPerSentence: 0,
|
|
168
|
+
avgParagraphLength: 0,
|
|
169
|
+
listCount: 0,
|
|
170
|
+
strongTagCount: 0,
|
|
171
|
+
emTagCount: 0,
|
|
172
|
+
},
|
|
173
|
+
links: {
|
|
174
|
+
total: page.links.length,
|
|
175
|
+
internal: internalLinks,
|
|
176
|
+
external: externalLinks,
|
|
177
|
+
nofollow: 0,
|
|
178
|
+
broken: 0,
|
|
179
|
+
withoutText: page.links.filter(l => !l.text?.trim()).length,
|
|
180
|
+
sponsoredLinks: 0,
|
|
181
|
+
ugcLinks: 0,
|
|
182
|
+
},
|
|
183
|
+
images: {
|
|
184
|
+
total: 0,
|
|
185
|
+
withAlt: 0,
|
|
186
|
+
withoutAlt: 0,
|
|
187
|
+
lazy: 0,
|
|
188
|
+
missingDimensions: 0,
|
|
189
|
+
modernFormats: 0,
|
|
190
|
+
altTextLengths: [],
|
|
191
|
+
imageFilenames: [],
|
|
192
|
+
imagesWithAsyncDecoding: 0,
|
|
193
|
+
},
|
|
194
|
+
social: {
|
|
195
|
+
openGraph: {
|
|
196
|
+
present: false,
|
|
197
|
+
hasTitle: false,
|
|
198
|
+
hasDescription: false,
|
|
199
|
+
hasImage: false,
|
|
200
|
+
hasUrl: false,
|
|
201
|
+
issues: [],
|
|
202
|
+
},
|
|
203
|
+
twitterCard: {
|
|
204
|
+
present: false,
|
|
205
|
+
hasCard: false,
|
|
206
|
+
hasTitle: false,
|
|
207
|
+
hasDescription: false,
|
|
208
|
+
hasImage: false,
|
|
209
|
+
issues: [],
|
|
210
|
+
},
|
|
211
|
+
},
|
|
212
|
+
technical: {
|
|
213
|
+
hasCanonical: false,
|
|
214
|
+
hasRobotsMeta: false,
|
|
215
|
+
hasViewport: false,
|
|
216
|
+
hasCharset: false,
|
|
217
|
+
hasLang: false,
|
|
218
|
+
},
|
|
219
|
+
jsonLd: {
|
|
220
|
+
count: 0,
|
|
221
|
+
types: [],
|
|
222
|
+
},
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
detectSiteWideIssues(pages) {
|
|
226
|
+
const issues = [];
|
|
227
|
+
const titleGroups = new Map();
|
|
228
|
+
const descriptionGroups = new Map();
|
|
229
|
+
const h1Groups = new Map();
|
|
230
|
+
for (const page of pages) {
|
|
231
|
+
if (!page.seoReport)
|
|
232
|
+
continue;
|
|
233
|
+
const title = page.seoReport.title?.text?.trim();
|
|
234
|
+
if (title) {
|
|
235
|
+
const urls = titleGroups.get(title) || [];
|
|
236
|
+
urls.push(page.url);
|
|
237
|
+
titleGroups.set(title, urls);
|
|
238
|
+
}
|
|
239
|
+
const desc = page.seoReport.metaDescription?.text?.trim();
|
|
240
|
+
if (desc) {
|
|
241
|
+
const urls = descriptionGroups.get(desc) || [];
|
|
242
|
+
urls.push(page.url);
|
|
243
|
+
descriptionGroups.set(desc, urls);
|
|
244
|
+
}
|
|
245
|
+
const h1 = page.seoReport.headings?.structure?.find(h => h.level === 1)?.text?.trim();
|
|
246
|
+
if (h1) {
|
|
247
|
+
const urls = h1Groups.get(h1) || [];
|
|
248
|
+
urls.push(page.url);
|
|
249
|
+
h1Groups.set(h1, urls);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
for (const [title, urls] of titleGroups) {
|
|
253
|
+
if (urls.length > 1) {
|
|
254
|
+
issues.push({
|
|
255
|
+
type: 'duplicate-title',
|
|
256
|
+
severity: 'error',
|
|
257
|
+
message: `${urls.length} pages share the same title`,
|
|
258
|
+
affectedUrls: urls,
|
|
259
|
+
value: title,
|
|
260
|
+
});
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
for (const [desc, urls] of descriptionGroups) {
|
|
264
|
+
if (urls.length > 1) {
|
|
265
|
+
issues.push({
|
|
266
|
+
type: 'duplicate-description',
|
|
267
|
+
severity: 'warning',
|
|
268
|
+
message: `${urls.length} pages share the same meta description`,
|
|
269
|
+
affectedUrls: urls,
|
|
270
|
+
value: desc,
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
for (const [h1, urls] of h1Groups) {
|
|
275
|
+
if (urls.length > 1) {
|
|
276
|
+
issues.push({
|
|
277
|
+
type: 'duplicate-h1',
|
|
278
|
+
severity: 'warning',
|
|
279
|
+
message: `${urls.length} pages share the same H1 heading`,
|
|
280
|
+
affectedUrls: urls,
|
|
281
|
+
value: h1,
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
const linkedUrls = new Set();
|
|
286
|
+
for (const page of pages) {
|
|
287
|
+
for (const link of page.links) {
|
|
288
|
+
if (link.type === 'internal' && link.href) {
|
|
289
|
+
linkedUrls.add(link.href);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
const orphanPages = pages
|
|
294
|
+
.filter(p => !linkedUrls.has(p.url) && p.depth > 0)
|
|
295
|
+
.map(p => p.url);
|
|
296
|
+
if (orphanPages.length > 0) {
|
|
297
|
+
issues.push({
|
|
298
|
+
type: 'orphan-page',
|
|
299
|
+
severity: 'warning',
|
|
300
|
+
message: `${orphanPages.length} page(s) have no internal links pointing to them`,
|
|
301
|
+
affectedUrls: orphanPages,
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
return issues;
|
|
305
|
+
}
|
|
306
|
+
calculateSummary(pages, issues) {
|
|
307
|
+
const pagesWithSeo = pages.filter(p => p.seoReport);
|
|
308
|
+
const scores = pagesWithSeo.map(p => p.seoReport.score);
|
|
309
|
+
const avgScore = scores.length > 0
|
|
310
|
+
? Math.round(scores.reduce((a, b) => a + b, 0) / scores.length)
|
|
311
|
+
: 0;
|
|
312
|
+
const pagesWithErrors = pagesWithSeo.filter(p => p.seoReport.checks.some(c => c.status === 'fail')).length;
|
|
313
|
+
const pagesWithWarnings = pagesWithSeo.filter(p => p.seoReport.checks.some(c => c.status === 'warn')).length;
|
|
314
|
+
const duplicateTitles = issues.filter(i => i.type === 'duplicate-title').length;
|
|
315
|
+
const duplicateDescriptions = issues.filter(i => i.type === 'duplicate-description').length;
|
|
316
|
+
const duplicateH1s = issues.filter(i => i.type === 'duplicate-h1').length;
|
|
317
|
+
const orphanPages = issues
|
|
318
|
+
.filter(i => i.type === 'orphan-page')
|
|
319
|
+
.reduce((sum, i) => sum + i.affectedUrls.length, 0);
|
|
320
|
+
return {
|
|
321
|
+
totalPages: pages.length,
|
|
322
|
+
pagesWithErrors,
|
|
323
|
+
pagesWithWarnings,
|
|
324
|
+
avgScore,
|
|
325
|
+
duplicateTitles,
|
|
326
|
+
duplicateDescriptions,
|
|
327
|
+
duplicateH1s,
|
|
328
|
+
orphanPages,
|
|
329
|
+
};
|
|
330
|
+
}
|
|
331
|
+
scoreToGrade(score) {
|
|
332
|
+
if (score >= 90)
|
|
333
|
+
return 'A';
|
|
334
|
+
if (score >= 80)
|
|
335
|
+
return 'B';
|
|
336
|
+
if (score >= 70)
|
|
337
|
+
return 'C';
|
|
338
|
+
if (score >= 60)
|
|
339
|
+
return 'D';
|
|
340
|
+
return 'F';
|
|
341
|
+
}
|
|
342
|
+
async saveReport(result) {
|
|
343
|
+
if (!this.options.output)
|
|
344
|
+
return;
|
|
345
|
+
const reportData = {
|
|
346
|
+
...result,
|
|
347
|
+
visited: Array.from(result.visited),
|
|
348
|
+
generatedAt: new Date().toISOString(),
|
|
349
|
+
};
|
|
350
|
+
await fs.writeFile(this.options.output, JSON.stringify(reportData, null, 2), 'utf-8');
|
|
351
|
+
}
|
|
352
|
+
abort() {
|
|
353
|
+
this.spider.abort();
|
|
354
|
+
}
|
|
355
|
+
isRunning() {
|
|
356
|
+
return this.spider.isRunning();
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
export async function seoSpider(url, options) {
|
|
360
|
+
const spider = new SeoSpider(options);
|
|
361
|
+
return spider.crawl(url);
|
|
362
|
+
}
|
package/dist/seo/types.d.ts
CHANGED
|
@@ -86,11 +86,20 @@ export interface TechnicalSeo {
|
|
|
86
86
|
hasLang: boolean;
|
|
87
87
|
langValue?: string;
|
|
88
88
|
}
|
|
89
|
+
export interface SeoTiming {
|
|
90
|
+
ttfb?: number;
|
|
91
|
+
total?: number;
|
|
92
|
+
dns?: number;
|
|
93
|
+
tcp?: number;
|
|
94
|
+
tls?: number;
|
|
95
|
+
download?: number;
|
|
96
|
+
}
|
|
89
97
|
export interface SeoReport {
|
|
90
98
|
url: string;
|
|
91
99
|
timestamp: Date;
|
|
92
100
|
grade: string;
|
|
93
101
|
score: number;
|
|
102
|
+
timing?: SeoTiming;
|
|
94
103
|
checks: SeoCheckResult[];
|
|
95
104
|
title?: {
|
|
96
105
|
text: string;
|
|
@@ -100,6 +109,21 @@ export interface SeoReport {
|
|
|
100
109
|
text: string;
|
|
101
110
|
length: number;
|
|
102
111
|
};
|
|
112
|
+
openGraph?: {
|
|
113
|
+
title?: string;
|
|
114
|
+
description?: string;
|
|
115
|
+
image?: string;
|
|
116
|
+
url?: string;
|
|
117
|
+
type?: string;
|
|
118
|
+
siteName?: string;
|
|
119
|
+
};
|
|
120
|
+
twitterCard?: {
|
|
121
|
+
card?: string;
|
|
122
|
+
title?: string;
|
|
123
|
+
description?: string;
|
|
124
|
+
image?: string;
|
|
125
|
+
site?: string;
|
|
126
|
+
};
|
|
103
127
|
headings: HeadingAnalysis;
|
|
104
128
|
content: ContentMetrics;
|
|
105
129
|
links: LinkAnalysis;
|