recker 1.0.28 → 1.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/tui/shell.d.ts +1 -0
- package/dist/cli/tui/shell.js +339 -5
- package/dist/scrape/index.d.ts +2 -0
- package/dist/scrape/index.js +1 -0
- package/dist/scrape/spider.d.ts +61 -0
- package/dist/scrape/spider.js +250 -0
- package/dist/seo/analyzer.js +27 -0
- package/dist/seo/index.d.ts +3 -1
- package/dist/seo/index.js +1 -0
- package/dist/seo/rules/accessibility.js +620 -54
- package/dist/seo/rules/best-practices.d.ts +2 -0
- package/dist/seo/rules/best-practices.js +188 -0
- package/dist/seo/rules/crawl.d.ts +2 -0
- package/dist/seo/rules/crawl.js +307 -0
- package/dist/seo/rules/cwv.d.ts +2 -0
- package/dist/seo/rules/cwv.js +337 -0
- package/dist/seo/rules/ecommerce.d.ts +2 -0
- package/dist/seo/rules/ecommerce.js +252 -0
- package/dist/seo/rules/i18n.d.ts +2 -0
- package/dist/seo/rules/i18n.js +222 -0
- package/dist/seo/rules/index.d.ts +32 -0
- package/dist/seo/rules/index.js +71 -0
- package/dist/seo/rules/internal-linking.d.ts +2 -0
- package/dist/seo/rules/internal-linking.js +375 -0
- package/dist/seo/rules/local.d.ts +2 -0
- package/dist/seo/rules/local.js +265 -0
- package/dist/seo/rules/pwa.d.ts +2 -0
- package/dist/seo/rules/pwa.js +302 -0
- package/dist/seo/rules/readability.d.ts +2 -0
- package/dist/seo/rules/readability.js +255 -0
- package/dist/seo/rules/security.js +406 -28
- package/dist/seo/rules/social.d.ts +2 -0
- package/dist/seo/rules/social.js +373 -0
- package/dist/seo/rules/types.d.ts +155 -0
- package/dist/seo/seo-spider.d.ts +47 -0
- package/dist/seo/seo-spider.js +362 -0
- package/dist/seo/types.d.ts +24 -0
- package/package.json +1 -1
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import { createResult } from './types.js';
|
|
2
|
+
export const bestPracticesRules = [
|
|
3
|
+
{
|
|
4
|
+
id: 'bp-doctype',
|
|
5
|
+
name: 'HTML Doctype',
|
|
6
|
+
category: 'technical',
|
|
7
|
+
severity: 'error',
|
|
8
|
+
description: 'Page must have the HTML doctype',
|
|
9
|
+
check: (ctx) => {
|
|
10
|
+
if (ctx.hasDoctype === undefined)
|
|
11
|
+
return null;
|
|
12
|
+
if (!ctx.hasDoctype) {
|
|
13
|
+
return createResult({ id: 'bp-doctype', name: 'HTML Doctype', category: 'technical', severity: 'error' }, 'fail', 'Page is missing HTML doctype', {
|
|
14
|
+
recommendation: 'Add <!DOCTYPE html> at the start of the document',
|
|
15
|
+
evidence: {
|
|
16
|
+
expected: '<!DOCTYPE html>',
|
|
17
|
+
impact: 'Missing doctype triggers quirks mode in browsers',
|
|
18
|
+
learnMore: 'https://developer.mozilla.org/en-US/docs/Glossary/Doctype',
|
|
19
|
+
},
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
return createResult({ id: 'bp-doctype', name: 'HTML Doctype', category: 'technical', severity: 'error' }, 'pass', 'Page has HTML doctype');
|
|
23
|
+
},
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
id: 'bp-charset',
|
|
27
|
+
name: 'Character Encoding',
|
|
28
|
+
category: 'technical',
|
|
29
|
+
severity: 'error',
|
|
30
|
+
description: 'Page should properly define charset',
|
|
31
|
+
check: (ctx) => {
|
|
32
|
+
if (ctx.hasCharset === undefined)
|
|
33
|
+
return null;
|
|
34
|
+
if (!ctx.hasCharset) {
|
|
35
|
+
return createResult({ id: 'bp-charset', name: 'Character Encoding', category: 'technical', severity: 'error' }, 'fail', 'Page is missing character encoding declaration', {
|
|
36
|
+
recommendation: 'Add <meta charset="UTF-8"> in the <head>',
|
|
37
|
+
evidence: {
|
|
38
|
+
expected: '<meta charset="UTF-8">',
|
|
39
|
+
impact: 'Characters may render incorrectly without charset declaration',
|
|
40
|
+
},
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
if (ctx.charset && ctx.charset.toLowerCase() !== 'utf-8') {
|
|
44
|
+
return createResult({ id: 'bp-charset', name: 'Character Encoding', category: 'technical', severity: 'error' }, 'warn', `Non-UTF-8 charset: ${ctx.charset}`, {
|
|
45
|
+
value: ctx.charset,
|
|
46
|
+
recommendation: 'Use UTF-8 encoding for maximum compatibility',
|
|
47
|
+
evidence: {
|
|
48
|
+
found: ctx.charset,
|
|
49
|
+
expected: 'UTF-8',
|
|
50
|
+
},
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
return createResult({ id: 'bp-charset', name: 'Character Encoding', category: 'technical', severity: 'error' }, 'pass', `Charset: ${ctx.charset || 'UTF-8'}`);
|
|
54
|
+
},
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
id: 'bp-http-status',
|
|
58
|
+
name: 'HTTP Status Code',
|
|
59
|
+
category: 'technical',
|
|
60
|
+
severity: 'error',
|
|
61
|
+
description: 'Page should have successful HTTP status code',
|
|
62
|
+
check: (ctx) => {
|
|
63
|
+
if (ctx.httpStatusCode === undefined)
|
|
64
|
+
return null;
|
|
65
|
+
const status = ctx.httpStatusCode;
|
|
66
|
+
if (status >= 400) {
|
|
67
|
+
return createResult({ id: 'bp-http-status', name: 'HTTP Status Code', category: 'technical', severity: 'error' }, 'fail', `HTTP ${status} error response`, {
|
|
68
|
+
value: status,
|
|
69
|
+
recommendation: 'Fix server configuration to return successful status codes',
|
|
70
|
+
evidence: {
|
|
71
|
+
found: status,
|
|
72
|
+
expected: '200-299',
|
|
73
|
+
impact: 'Error pages will not be indexed by search engines',
|
|
74
|
+
},
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
if (status >= 300) {
|
|
78
|
+
return createResult({ id: 'bp-http-status', name: 'HTTP Status Code', category: 'technical', severity: 'error' }, 'info', `HTTP ${status} redirect response`, {
|
|
79
|
+
value: status,
|
|
80
|
+
recommendation: 'Redirects add latency; consider updating links to final URLs',
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
return createResult({ id: 'bp-http-status', name: 'HTTP Status Code', category: 'technical', severity: 'error' }, 'pass', `HTTP ${status} OK`);
|
|
84
|
+
},
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
id: 'bp-indexable',
|
|
88
|
+
name: 'Page Indexable',
|
|
89
|
+
category: 'technical',
|
|
90
|
+
severity: 'error',
|
|
91
|
+
description: 'Page should not be blocked from indexing',
|
|
92
|
+
check: (ctx) => {
|
|
93
|
+
if (ctx.metaRobots === undefined)
|
|
94
|
+
return null;
|
|
95
|
+
const robots = Array.isArray(ctx.metaRobots) ? ctx.metaRobots : [ctx.metaRobots];
|
|
96
|
+
const blocked = robots.some(r => r.toLowerCase().includes('noindex') || r.toLowerCase().includes('none'));
|
|
97
|
+
if (blocked) {
|
|
98
|
+
return createResult({ id: 'bp-indexable', name: 'Page Indexable', category: 'technical', severity: 'error' }, 'fail', 'Page is blocked from indexing', {
|
|
99
|
+
recommendation: 'Remove noindex directive if this page should be searchable',
|
|
100
|
+
evidence: {
|
|
101
|
+
found: robots.join(', '),
|
|
102
|
+
expected: 'No noindex directive',
|
|
103
|
+
impact: 'Page will not appear in search results',
|
|
104
|
+
},
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
return createResult({ id: 'bp-indexable', name: 'Page Indexable', category: 'technical', severity: 'error' }, 'pass', 'Page is indexable');
|
|
108
|
+
},
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
id: 'bp-links-crawlable',
|
|
112
|
+
name: 'Crawlable Links',
|
|
113
|
+
category: 'links',
|
|
114
|
+
severity: 'warning',
|
|
115
|
+
description: 'Links should be crawlable by search engines',
|
|
116
|
+
check: (ctx) => {
|
|
117
|
+
if (ctx.uncrawlableLinksCount === undefined)
|
|
118
|
+
return null;
|
|
119
|
+
const count = ctx.uncrawlableLinksCount;
|
|
120
|
+
if (count > 0) {
|
|
121
|
+
return createResult({ id: 'bp-links-crawlable', name: 'Crawlable Links', category: 'links', severity: 'warning' }, 'warn', `${count} link(s) are not crawlable`, {
|
|
122
|
+
value: count,
|
|
123
|
+
recommendation: 'Use <a href> tags instead of JavaScript navigation for important links',
|
|
124
|
+
evidence: {
|
|
125
|
+
found: count,
|
|
126
|
+
expected: 0,
|
|
127
|
+
impact: 'Search engines cannot discover linked content',
|
|
128
|
+
learnMore: 'https://developers.google.com/search/docs/crawling-indexing/links-crawlable',
|
|
129
|
+
},
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
return createResult({ id: 'bp-links-crawlable', name: 'Crawlable Links', category: 'links', severity: 'warning' }, 'pass', 'All links are crawlable');
|
|
133
|
+
},
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
id: 'bp-robots-txt',
|
|
137
|
+
name: 'Valid robots.txt',
|
|
138
|
+
category: 'technical',
|
|
139
|
+
severity: 'info',
|
|
140
|
+
description: 'robots.txt should be valid and accessible',
|
|
141
|
+
check: (ctx) => {
|
|
142
|
+
if (ctx.robotsTxtValid === undefined)
|
|
143
|
+
return null;
|
|
144
|
+
if (!ctx.robotsTxtValid) {
|
|
145
|
+
return createResult({ id: 'bp-robots-txt', name: 'Valid robots.txt', category: 'technical', severity: 'info' }, 'warn', 'robots.txt is invalid or inaccessible', {
|
|
146
|
+
recommendation: 'Ensure robots.txt is valid and returns 200 status',
|
|
147
|
+
evidence: {
|
|
148
|
+
found: ctx.robotsTxtError || 'Invalid',
|
|
149
|
+
expected: 'Valid robots.txt',
|
|
150
|
+
impact: 'Invalid robots.txt may cause crawling issues',
|
|
151
|
+
learnMore: 'https://developers.google.com/search/docs/crawling-indexing/robots/intro',
|
|
152
|
+
},
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
return createResult({ id: 'bp-robots-txt', name: 'Valid robots.txt', category: 'technical', severity: 'info' }, 'pass', 'robots.txt is valid');
|
|
156
|
+
},
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
id: 'bp-structured-data',
|
|
160
|
+
name: 'Structured Data',
|
|
161
|
+
category: 'structured-data',
|
|
162
|
+
severity: 'info',
|
|
163
|
+
description: 'Page should have valid structured data',
|
|
164
|
+
check: (ctx) => {
|
|
165
|
+
if (ctx.structuredDataErrors === undefined)
|
|
166
|
+
return null;
|
|
167
|
+
const errors = ctx.structuredDataErrors;
|
|
168
|
+
if (errors > 0) {
|
|
169
|
+
return createResult({ id: 'bp-structured-data', name: 'Structured Data', category: 'structured-data', severity: 'info' }, 'warn', `${errors} structured data error(s) found`, {
|
|
170
|
+
value: errors,
|
|
171
|
+
recommendation: 'Validate structured data using Google Rich Results Test',
|
|
172
|
+
evidence: {
|
|
173
|
+
found: errors,
|
|
174
|
+
expected: 0,
|
|
175
|
+
impact: 'Invalid structured data may not generate rich results',
|
|
176
|
+
learnMore: 'https://search.google.com/test/rich-results',
|
|
177
|
+
},
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
if (ctx.jsonLdCount && ctx.jsonLdCount > 0) {
|
|
181
|
+
return createResult({ id: 'bp-structured-data', name: 'Structured Data', category: 'structured-data', severity: 'info' }, 'pass', `${ctx.jsonLdCount} structured data block(s) found`);
|
|
182
|
+
}
|
|
183
|
+
return createResult({ id: 'bp-structured-data', name: 'Structured Data', category: 'structured-data', severity: 'info' }, 'info', 'No structured data found', {
|
|
184
|
+
recommendation: 'Add schema.org structured data for rich search results',
|
|
185
|
+
});
|
|
186
|
+
},
|
|
187
|
+
},
|
|
188
|
+
];
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
import { createResult } from './types.js';
|
|
2
|
+
export const crawlRules = [
|
|
3
|
+
{
|
|
4
|
+
id: 'crawl-sitemap-reference',
|
|
5
|
+
name: 'Sitemap Reference',
|
|
6
|
+
category: 'technical',
|
|
7
|
+
severity: 'info',
|
|
8
|
+
description: 'Page should reference sitemap location',
|
|
9
|
+
check: (ctx) => {
|
|
10
|
+
if (ctx.hasSitemapLink) {
|
|
11
|
+
return createResult({ id: 'crawl-sitemap-reference', name: 'Sitemap Reference', category: 'technical', severity: 'info' }, 'pass', `Sitemap link found: ${ctx.sitemapUrl || 'referenced'}`);
|
|
12
|
+
}
|
|
13
|
+
if (ctx.robotsHasSitemap) {
|
|
14
|
+
return createResult({ id: 'crawl-sitemap-reference', name: 'Sitemap Reference', category: 'technical', severity: 'info' }, 'pass', 'Sitemap referenced in robots.txt');
|
|
15
|
+
}
|
|
16
|
+
return createResult({ id: 'crawl-sitemap-reference', name: 'Sitemap Reference', category: 'technical', severity: 'info' }, 'info', 'No sitemap reference found', {
|
|
17
|
+
recommendation: 'Add sitemap reference in robots.txt or HTML',
|
|
18
|
+
evidence: {
|
|
19
|
+
expected: 'Sitemap: https://example.com/sitemap.xml in robots.txt',
|
|
20
|
+
example: '<link rel="sitemap" type="application/xml" href="/sitemap.xml">',
|
|
21
|
+
impact: 'Helps search engines discover all pages',
|
|
22
|
+
learnMore: 'https://developers.google.com/search/docs/crawling-indexing/sitemaps/overview',
|
|
23
|
+
},
|
|
24
|
+
});
|
|
25
|
+
},
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
id: 'crawl-robots-noindex',
|
|
29
|
+
name: 'Robots Noindex',
|
|
30
|
+
category: 'technical',
|
|
31
|
+
severity: 'error',
|
|
32
|
+
description: 'Check if page is blocked from indexing',
|
|
33
|
+
check: (ctx) => {
|
|
34
|
+
if (!ctx.metaRobots)
|
|
35
|
+
return null;
|
|
36
|
+
const hasNoindex = ctx.metaRobots.some(r => r.toLowerCase().includes('noindex'));
|
|
37
|
+
if (hasNoindex) {
|
|
38
|
+
return createResult({ id: 'crawl-robots-noindex', name: 'Robots Noindex', category: 'technical', severity: 'error' }, 'fail', 'Page is set to noindex', {
|
|
39
|
+
evidence: {
|
|
40
|
+
found: ctx.metaRobots.join(', '),
|
|
41
|
+
issue: 'This page will NOT appear in search results',
|
|
42
|
+
impact: 'Remove noindex if this page should be indexed',
|
|
43
|
+
},
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
return null;
|
|
47
|
+
},
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
id: 'crawl-robots-nofollow',
|
|
51
|
+
name: 'Robots Nofollow',
|
|
52
|
+
category: 'technical',
|
|
53
|
+
severity: 'warning',
|
|
54
|
+
description: 'Check if page links are blocked from following',
|
|
55
|
+
check: (ctx) => {
|
|
56
|
+
if (!ctx.metaRobots)
|
|
57
|
+
return null;
|
|
58
|
+
const hasNofollow = ctx.metaRobots.some(r => r.toLowerCase().includes('nofollow'));
|
|
59
|
+
if (hasNofollow) {
|
|
60
|
+
return createResult({ id: 'crawl-robots-nofollow', name: 'Robots Nofollow', category: 'technical', severity: 'warning' }, 'warn', 'Page has nofollow directive', {
|
|
61
|
+
evidence: {
|
|
62
|
+
found: ctx.metaRobots.join(', '),
|
|
63
|
+
issue: 'Links on this page will not pass PageRank',
|
|
64
|
+
impact: 'Internal pages should usually not have nofollow',
|
|
65
|
+
},
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
return null;
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
id: 'crawl-robots-combined',
|
|
73
|
+
name: 'Robots Directives',
|
|
74
|
+
category: 'technical',
|
|
75
|
+
severity: 'info',
|
|
76
|
+
description: 'Review all robots meta directives',
|
|
77
|
+
check: (ctx) => {
|
|
78
|
+
if (!ctx.metaRobots || ctx.metaRobots.length === 0) {
|
|
79
|
+
return createResult({ id: 'crawl-robots-combined', name: 'Robots Directives', category: 'technical', severity: 'info' }, 'info', 'No robots meta tag (defaults to index, follow)', {
|
|
80
|
+
recommendation: 'Explicitly set robots directives if needed',
|
|
81
|
+
evidence: {
|
|
82
|
+
expected: '<meta name="robots" content="index, follow">',
|
|
83
|
+
impact: 'Default behavior allows full indexing and following',
|
|
84
|
+
},
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
const directives = ctx.metaRobots.join(', ').toLowerCase();
|
|
88
|
+
const hasIndex = directives.includes('index') && !directives.includes('noindex');
|
|
89
|
+
const hasNoindex = directives.includes('noindex');
|
|
90
|
+
if (hasIndex && hasNoindex) {
|
|
91
|
+
return createResult({ id: 'crawl-robots-combined', name: 'Robots Directives', category: 'technical', severity: 'info' }, 'warn', 'Conflicting robots directives detected', {
|
|
92
|
+
evidence: {
|
|
93
|
+
found: ctx.metaRobots.join(', '),
|
|
94
|
+
issue: 'Both index and noindex specified',
|
|
95
|
+
},
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
return createResult({ id: 'crawl-robots-combined', name: 'Robots Directives', category: 'technical', severity: 'info' }, 'pass', `Robots: ${ctx.metaRobots.join(', ')}`);
|
|
99
|
+
},
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
id: 'crawl-x-robots-tag',
|
|
103
|
+
name: 'X-Robots-Tag Header',
|
|
104
|
+
category: 'technical',
|
|
105
|
+
severity: 'warning',
|
|
106
|
+
description: 'Check X-Robots-Tag HTTP header for indexing directives',
|
|
107
|
+
check: (ctx) => {
|
|
108
|
+
if (!ctx.responseHeaders)
|
|
109
|
+
return null;
|
|
110
|
+
const xRobotsTag = ctx.responseHeaders['x-robots-tag'] ||
|
|
111
|
+
ctx.responseHeaders['X-Robots-Tag'];
|
|
112
|
+
if (!xRobotsTag)
|
|
113
|
+
return null;
|
|
114
|
+
const tagValue = Array.isArray(xRobotsTag) ? xRobotsTag.join(', ') : xRobotsTag;
|
|
115
|
+
if (tagValue.toLowerCase().includes('noindex')) {
|
|
116
|
+
return createResult({ id: 'crawl-x-robots-tag', name: 'X-Robots-Tag Header', category: 'technical', severity: 'warning' }, 'fail', 'X-Robots-Tag contains noindex', {
|
|
117
|
+
evidence: {
|
|
118
|
+
found: tagValue,
|
|
119
|
+
issue: 'HTTP header is blocking indexing',
|
|
120
|
+
impact: 'Page will not appear in search results',
|
|
121
|
+
},
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
return createResult({ id: 'crawl-x-robots-tag', name: 'X-Robots-Tag Header', category: 'technical', severity: 'warning' }, 'info', `X-Robots-Tag: ${tagValue}`);
|
|
125
|
+
},
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
id: 'crawl-canonical-present',
|
|
129
|
+
name: 'Canonical URL',
|
|
130
|
+
category: 'technical',
|
|
131
|
+
severity: 'warning',
|
|
132
|
+
description: 'Pages should have a canonical URL to prevent duplicate content',
|
|
133
|
+
check: (ctx) => {
|
|
134
|
+
if (!ctx.hasCanonical) {
|
|
135
|
+
return createResult({ id: 'crawl-canonical-present', name: 'Canonical URL', category: 'technical', severity: 'warning' }, 'warn', 'Missing canonical URL', {
|
|
136
|
+
recommendation: 'Add a canonical link to prevent duplicate content issues',
|
|
137
|
+
evidence: {
|
|
138
|
+
expected: '<link rel="canonical" href="https://example.com/page">',
|
|
139
|
+
impact: 'Without canonical, search engines may index multiple versions',
|
|
140
|
+
learnMore: 'https://developers.google.com/search/docs/crawling-indexing/canonicalization',
|
|
141
|
+
},
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
return createResult({ id: 'crawl-canonical-present', name: 'Canonical URL', category: 'technical', severity: 'warning' }, 'pass', `Canonical: ${ctx.canonicalUrl}`);
|
|
145
|
+
},
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
id: 'crawl-canonical-self',
|
|
149
|
+
name: 'Canonical Self-Reference',
|
|
150
|
+
category: 'technical',
|
|
151
|
+
severity: 'info',
|
|
152
|
+
description: 'Canonical should point to the current page or explicit alternate',
|
|
153
|
+
check: (ctx) => {
|
|
154
|
+
if (!ctx.hasCanonical || !ctx.canonicalUrl || !ctx.url)
|
|
155
|
+
return null;
|
|
156
|
+
const normalizeUrl = (url) => {
|
|
157
|
+
try {
|
|
158
|
+
const u = new URL(url);
|
|
159
|
+
let normalized = u.origin + u.pathname.replace(/\/$/, '');
|
|
160
|
+
return normalized.toLowerCase();
|
|
161
|
+
}
|
|
162
|
+
catch {
|
|
163
|
+
return url.toLowerCase().replace(/\/$/, '');
|
|
164
|
+
}
|
|
165
|
+
};
|
|
166
|
+
const currentNorm = normalizeUrl(ctx.url);
|
|
167
|
+
const canonicalNorm = normalizeUrl(ctx.canonicalUrl);
|
|
168
|
+
if (currentNorm !== canonicalNorm) {
|
|
169
|
+
return createResult({ id: 'crawl-canonical-self', name: 'Canonical Self-Reference', category: 'technical', severity: 'info' }, 'info', 'Canonical points to different URL', {
|
|
170
|
+
evidence: {
|
|
171
|
+
found: [`Current: ${ctx.url}`, `Canonical: ${ctx.canonicalUrl}`],
|
|
172
|
+
issue: 'This page canonicalizes to a different URL',
|
|
173
|
+
impact: 'Ensure this is intentional (e.g., www vs non-www consolidation)',
|
|
174
|
+
},
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
return null;
|
|
178
|
+
},
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
id: 'crawl-canonical-absolute',
|
|
182
|
+
name: 'Canonical Absolute URL',
|
|
183
|
+
category: 'technical',
|
|
184
|
+
severity: 'warning',
|
|
185
|
+
description: 'Canonical URL should be absolute, not relative',
|
|
186
|
+
check: (ctx) => {
|
|
187
|
+
if (!ctx.canonicalUrl)
|
|
188
|
+
return null;
|
|
189
|
+
const isAbsolute = ctx.canonicalUrl.startsWith('http://') ||
|
|
190
|
+
ctx.canonicalUrl.startsWith('https://');
|
|
191
|
+
if (!isAbsolute) {
|
|
192
|
+
return createResult({ id: 'crawl-canonical-absolute', name: 'Canonical Absolute URL', category: 'technical', severity: 'warning' }, 'warn', 'Canonical URL is relative', {
|
|
193
|
+
evidence: {
|
|
194
|
+
found: ctx.canonicalUrl,
|
|
195
|
+
expected: 'Absolute URL starting with https://',
|
|
196
|
+
impact: 'Relative canonicals may be misinterpreted',
|
|
197
|
+
},
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
return null;
|
|
201
|
+
},
|
|
202
|
+
},
|
|
203
|
+
{
|
|
204
|
+
id: 'crawl-canonical-https',
|
|
205
|
+
name: 'Canonical HTTPS',
|
|
206
|
+
category: 'technical',
|
|
207
|
+
severity: 'warning',
|
|
208
|
+
description: 'Canonical URL should use HTTPS',
|
|
209
|
+
check: (ctx) => {
|
|
210
|
+
if (!ctx.canonicalUrl)
|
|
211
|
+
return null;
|
|
212
|
+
if (ctx.canonicalUrl.startsWith('http://')) {
|
|
213
|
+
return createResult({ id: 'crawl-canonical-https', name: 'Canonical HTTPS', category: 'technical', severity: 'warning' }, 'warn', 'Canonical URL uses HTTP instead of HTTPS', {
|
|
214
|
+
evidence: {
|
|
215
|
+
found: ctx.canonicalUrl,
|
|
216
|
+
expected: 'HTTPS canonical URL',
|
|
217
|
+
impact: 'Google prefers HTTPS URLs for ranking',
|
|
218
|
+
},
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
return null;
|
|
222
|
+
},
|
|
223
|
+
},
|
|
224
|
+
{
|
|
225
|
+
id: 'crawl-url-parameters',
|
|
226
|
+
name: 'URL Parameters',
|
|
227
|
+
category: 'technical',
|
|
228
|
+
severity: 'info',
|
|
229
|
+
description: 'URLs with tracking parameters should have proper canonical',
|
|
230
|
+
check: (ctx) => {
|
|
231
|
+
if (!ctx.url)
|
|
232
|
+
return null;
|
|
233
|
+
try {
|
|
234
|
+
const url = new URL(ctx.url);
|
|
235
|
+
const trackingParams = ['utm_source', 'utm_medium', 'utm_campaign', 'fbclid', 'gclid', 'ref'];
|
|
236
|
+
const hasTracking = trackingParams.some(p => url.searchParams.has(p));
|
|
237
|
+
if (hasTracking && !ctx.hasCanonical) {
|
|
238
|
+
return createResult({ id: 'crawl-url-parameters', name: 'URL Parameters', category: 'technical', severity: 'info' }, 'warn', 'URL has tracking parameters but no canonical', {
|
|
239
|
+
recommendation: 'Add canonical pointing to clean URL without parameters',
|
|
240
|
+
evidence: {
|
|
241
|
+
found: url.search,
|
|
242
|
+
expected: 'Canonical to base URL without tracking params',
|
|
243
|
+
impact: 'Tracking parameters can cause duplicate content',
|
|
244
|
+
},
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
catch {
|
|
249
|
+
}
|
|
250
|
+
return null;
|
|
251
|
+
},
|
|
252
|
+
},
|
|
253
|
+
{
|
|
254
|
+
id: 'crawl-pagination-rel',
|
|
255
|
+
name: 'Pagination Links',
|
|
256
|
+
category: 'technical',
|
|
257
|
+
severity: 'info',
|
|
258
|
+
description: 'Paginated content should use proper rel attributes',
|
|
259
|
+
check: (ctx) => {
|
|
260
|
+
if (!ctx.isPaginatedPage)
|
|
261
|
+
return null;
|
|
262
|
+
const hasPrevNext = ctx.hasRelPrev || ctx.hasRelNext;
|
|
263
|
+
if (!hasPrevNext) {
|
|
264
|
+
return createResult({ id: 'crawl-pagination-rel', name: 'Pagination Links', category: 'technical', severity: 'info' }, 'info', 'Paginated page missing rel="prev/next" (deprecated but still useful)', {
|
|
265
|
+
recommendation: 'Consider using rel="prev" and rel="next" for pagination',
|
|
266
|
+
evidence: {
|
|
267
|
+
example: '<link rel="prev" href="/page/1">\n<link rel="next" href="/page/3">',
|
|
268
|
+
impact: 'Helps search engines understand pagination structure',
|
|
269
|
+
learnMore: 'https://developers.google.com/search/docs/specialty/ecommerce/pagination-and-incremental-page-loading',
|
|
270
|
+
},
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
return createResult({ id: 'crawl-pagination-rel', name: 'Pagination Links', category: 'technical', severity: 'info' }, 'pass', 'Pagination links present');
|
|
274
|
+
},
|
|
275
|
+
},
|
|
276
|
+
{
|
|
277
|
+
id: 'crawl-noarchive',
|
|
278
|
+
name: 'Cache Directives',
|
|
279
|
+
category: 'technical',
|
|
280
|
+
severity: 'info',
|
|
281
|
+
description: 'Check for noarchive and nocache directives',
|
|
282
|
+
check: (ctx) => {
|
|
283
|
+
if (!ctx.metaRobots)
|
|
284
|
+
return null;
|
|
285
|
+
const directives = ctx.metaRobots.join(', ').toLowerCase();
|
|
286
|
+
const hasNoarchive = directives.includes('noarchive');
|
|
287
|
+
const hasNocache = directives.includes('nocache');
|
|
288
|
+
const hasNosnippet = directives.includes('nosnippet');
|
|
289
|
+
const restrictions = [];
|
|
290
|
+
if (hasNoarchive)
|
|
291
|
+
restrictions.push('noarchive (no cached version)');
|
|
292
|
+
if (hasNocache)
|
|
293
|
+
restrictions.push('nocache (no cache)');
|
|
294
|
+
if (hasNosnippet)
|
|
295
|
+
restrictions.push('nosnippet (no search snippet)');
|
|
296
|
+
if (restrictions.length > 0) {
|
|
297
|
+
return createResult({ id: 'crawl-noarchive', name: 'Cache Directives', category: 'technical', severity: 'info' }, 'info', `Search restrictions: ${restrictions.join(', ')}`, {
|
|
298
|
+
evidence: {
|
|
299
|
+
found: restrictions,
|
|
300
|
+
impact: 'These directives limit how search engines display your page',
|
|
301
|
+
},
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
return null;
|
|
305
|
+
},
|
|
306
|
+
},
|
|
307
|
+
];
|