@rankcli/agent-runtime 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +242 -0
- package/dist/analyzer-2CSWIQGD.mjs +6 -0
- package/dist/chunk-YNZYHEYM.mjs +774 -0
- package/dist/index.d.mts +4012 -0
- package/dist/index.d.ts +4012 -0
- package/dist/index.js +29672 -0
- package/dist/index.mjs +28602 -0
- package/package.json +53 -0
- package/scripts/build-deno.ts +134 -0
- package/src/audit/ai/analyzer.ts +347 -0
- package/src/audit/ai/index.ts +29 -0
- package/src/audit/ai/prompts/content-analysis.ts +271 -0
- package/src/audit/ai/types.ts +179 -0
- package/src/audit/checks/additional-checks.ts +439 -0
- package/src/audit/checks/ai-citation-worthiness.ts +399 -0
- package/src/audit/checks/ai-content-structure.ts +325 -0
- package/src/audit/checks/ai-readiness.ts +339 -0
- package/src/audit/checks/anchor-text.ts +179 -0
- package/src/audit/checks/answer-conciseness.ts +322 -0
- package/src/audit/checks/asset-minification.ts +270 -0
- package/src/audit/checks/bing-optimization.ts +206 -0
- package/src/audit/checks/brand-mention-optimization.ts +349 -0
- package/src/audit/checks/caching-headers.ts +305 -0
- package/src/audit/checks/canonical-advanced.ts +150 -0
- package/src/audit/checks/canonical-domain.ts +196 -0
- package/src/audit/checks/citation-quality.ts +358 -0
- package/src/audit/checks/client-rendering.ts +542 -0
- package/src/audit/checks/color-contrast.ts +342 -0
- package/src/audit/checks/content-freshness.ts +170 -0
- package/src/audit/checks/content-science.ts +589 -0
- package/src/audit/checks/conversion-elements.ts +526 -0
- package/src/audit/checks/crawlability.ts +220 -0
- package/src/audit/checks/directory-listing.ts +172 -0
- package/src/audit/checks/dom-analysis.ts +191 -0
- package/src/audit/checks/dom-size.ts +246 -0
- package/src/audit/checks/duplicate-content.ts +194 -0
- package/src/audit/checks/eeat-signals.ts +990 -0
- package/src/audit/checks/entity-seo.ts +396 -0
- package/src/audit/checks/featured-snippet.ts +473 -0
- package/src/audit/checks/freshness-signals.ts +443 -0
- package/src/audit/checks/funnel-intent.ts +463 -0
- package/src/audit/checks/hreflang.ts +174 -0
- package/src/audit/checks/html-compliance.ts +302 -0
- package/src/audit/checks/image-dimensions.ts +167 -0
- package/src/audit/checks/images.ts +160 -0
- package/src/audit/checks/indexnow.ts +275 -0
- package/src/audit/checks/interactive-tools.ts +475 -0
- package/src/audit/checks/internal-link-graph.ts +436 -0
- package/src/audit/checks/keyword-analysis.ts +239 -0
- package/src/audit/checks/keyword-cannibalization.ts +385 -0
- package/src/audit/checks/keyword-placement.ts +471 -0
- package/src/audit/checks/links.ts +203 -0
- package/src/audit/checks/llms-txt.ts +224 -0
- package/src/audit/checks/local-seo.ts +296 -0
- package/src/audit/checks/mobile.ts +167 -0
- package/src/audit/checks/modern-images.ts +226 -0
- package/src/audit/checks/navboost-signals.ts +395 -0
- package/src/audit/checks/on-page.ts +209 -0
- package/src/audit/checks/page-resources.ts +285 -0
- package/src/audit/checks/pagination.ts +180 -0
- package/src/audit/checks/performance.ts +153 -0
- package/src/audit/checks/platform-presence.ts +580 -0
- package/src/audit/checks/redirect-analysis.ts +153 -0
- package/src/audit/checks/redirect-chain.ts +389 -0
- package/src/audit/checks/resource-hints.ts +420 -0
- package/src/audit/checks/responsive-css.ts +247 -0
- package/src/audit/checks/responsive-images.ts +396 -0
- package/src/audit/checks/review-ecosystem.ts +415 -0
- package/src/audit/checks/robots-validation.ts +373 -0
- package/src/audit/checks/security-headers.ts +172 -0
- package/src/audit/checks/security.ts +144 -0
- package/src/audit/checks/serp-preview.ts +251 -0
- package/src/audit/checks/site-maturity.ts +444 -0
- package/src/audit/checks/social-meta.test.ts +275 -0
- package/src/audit/checks/social-meta.ts +134 -0
- package/src/audit/checks/soft-404.ts +151 -0
- package/src/audit/checks/structured-data.ts +238 -0
- package/src/audit/checks/tech-detection.ts +496 -0
- package/src/audit/checks/topical-clusters.ts +435 -0
- package/src/audit/checks/tracker-bloat.ts +462 -0
- package/src/audit/checks/tracking-verification.test.ts +371 -0
- package/src/audit/checks/tracking-verification.ts +636 -0
- package/src/audit/checks/url-safety.ts +682 -0
- package/src/audit/deno-entry.ts +66 -0
- package/src/audit/discovery/index.ts +15 -0
- package/src/audit/discovery/link-crawler.ts +232 -0
- package/src/audit/discovery/repo-routes.ts +347 -0
- package/src/audit/engine.ts +620 -0
- package/src/audit/fixes/index.ts +209 -0
- package/src/audit/fixes/social-meta-fixes.test.ts +329 -0
- package/src/audit/fixes/social-meta-fixes.ts +463 -0
- package/src/audit/index.ts +74 -0
- package/src/audit/runner.test.ts +299 -0
- package/src/audit/runner.ts +130 -0
- package/src/audit/types.ts +1953 -0
- package/src/content/featured-snippet.ts +367 -0
- package/src/content/generator.test.ts +534 -0
- package/src/content/generator.ts +501 -0
- package/src/content/headline.ts +317 -0
- package/src/content/index.ts +62 -0
- package/src/content/intent.ts +258 -0
- package/src/content/keyword-density.ts +349 -0
- package/src/content/readability.ts +262 -0
- package/src/executor.ts +336 -0
- package/src/fixer.ts +416 -0
- package/src/frameworks/detector.test.ts +248 -0
- package/src/frameworks/detector.ts +371 -0
- package/src/frameworks/index.ts +68 -0
- package/src/frameworks/recipes/angular.yaml +171 -0
- package/src/frameworks/recipes/astro.yaml +206 -0
- package/src/frameworks/recipes/django.yaml +180 -0
- package/src/frameworks/recipes/laravel.yaml +137 -0
- package/src/frameworks/recipes/nextjs.yaml +268 -0
- package/src/frameworks/recipes/nuxt.yaml +175 -0
- package/src/frameworks/recipes/rails.yaml +188 -0
- package/src/frameworks/recipes/react.yaml +202 -0
- package/src/frameworks/recipes/sveltekit.yaml +154 -0
- package/src/frameworks/recipes/vue.yaml +137 -0
- package/src/frameworks/recipes/wordpress.yaml +209 -0
- package/src/frameworks/suggestion-engine.ts +320 -0
- package/src/geo/geo-content.test.ts +305 -0
- package/src/geo/geo-content.ts +266 -0
- package/src/geo/geo-history.test.ts +473 -0
- package/src/geo/geo-history.ts +433 -0
- package/src/geo/geo-tracker.test.ts +359 -0
- package/src/geo/geo-tracker.ts +411 -0
- package/src/geo/index.ts +10 -0
- package/src/git/commit-helper.test.ts +261 -0
- package/src/git/commit-helper.ts +329 -0
- package/src/git/index.ts +12 -0
- package/src/git/pr-helper.test.ts +284 -0
- package/src/git/pr-helper.ts +307 -0
- package/src/index.ts +66 -0
- package/src/keywords/ai-keyword-engine.ts +1062 -0
- package/src/keywords/ai-summarizer.ts +387 -0
- package/src/keywords/ci-mode.ts +555 -0
- package/src/keywords/engine.ts +359 -0
- package/src/keywords/index.ts +151 -0
- package/src/keywords/llm-judge.ts +357 -0
- package/src/keywords/nlp-analysis.ts +706 -0
- package/src/keywords/prioritizer.ts +295 -0
- package/src/keywords/site-crawler.ts +342 -0
- package/src/keywords/sources/autocomplete.ts +139 -0
- package/src/keywords/sources/competitive-search.ts +450 -0
- package/src/keywords/sources/competitor-analysis.ts +374 -0
- package/src/keywords/sources/dataforseo.ts +206 -0
- package/src/keywords/sources/free-sources.ts +294 -0
- package/src/keywords/sources/gsc.ts +123 -0
- package/src/keywords/topic-grouping.ts +327 -0
- package/src/keywords/types.ts +144 -0
- package/src/keywords/wizard.ts +457 -0
- package/src/loader.ts +40 -0
- package/src/reports/index.ts +7 -0
- package/src/reports/report-generator.test.ts +293 -0
- package/src/reports/report-generator.ts +713 -0
- package/src/scheduler/alerts.test.ts +458 -0
- package/src/scheduler/alerts.ts +328 -0
- package/src/scheduler/index.ts +8 -0
- package/src/scheduler/scheduled-audit.test.ts +377 -0
- package/src/scheduler/scheduled-audit.ts +149 -0
- package/src/test/integration-test.ts +325 -0
- package/src/tools/analyzer.ts +373 -0
- package/src/tools/crawl.ts +293 -0
- package/src/tools/files.ts +301 -0
- package/src/tools/h1-fixer.ts +249 -0
- package/src/tools/index.ts +67 -0
- package/src/tracking/github-action.ts +326 -0
- package/src/tracking/google-analytics.ts +265 -0
- package/src/tracking/index.ts +45 -0
- package/src/tracking/report-generator.ts +386 -0
- package/src/tracking/search-console.ts +335 -0
- package/src/types.ts +134 -0
- package/src/utils/http.ts +302 -0
- package/src/wasm-adapter.ts +297 -0
- package/src/wasm-entry.ts +14 -0
- package/tsconfig.json +17 -0
- package/tsup.wasm.config.ts +26 -0
- package/vitest.config.ts +15 -0
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* robots.txt Syntax Validation
|
|
3
|
+
*
|
|
4
|
+
* Validates robots.txt according to Google's specification:
|
|
5
|
+
* https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt
|
|
6
|
+
*
|
|
7
|
+
* Valid directives:
|
|
8
|
+
* - User-agent: <bot-name>
|
|
9
|
+
* - Disallow: <path>
|
|
10
|
+
* - Allow: <path>
|
|
11
|
+
* - Sitemap: <url>
|
|
12
|
+
*
|
|
13
|
+
* Common extensions (non-standard but widely supported):
|
|
14
|
+
* - Crawl-delay: <seconds>
|
|
15
|
+
* - Host: <domain> (Yandex)
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { httpGet } from '../../utils/http.js';
|
|
19
|
+
import type { AuditIssue } from '../types.js';
|
|
20
|
+
|
|
21
|
+
export interface RobotsValidationData {
|
|
22
|
+
exists: boolean;
|
|
23
|
+
isValid: boolean;
|
|
24
|
+
lineCount: number;
|
|
25
|
+
errors: RobotsError[];
|
|
26
|
+
warnings: RobotsWarning[];
|
|
27
|
+
directives: {
|
|
28
|
+
userAgents: string[];
|
|
29
|
+
sitemaps: string[];
|
|
30
|
+
hasWildcardAgent: boolean;
|
|
31
|
+
hasCrawlDelay: boolean;
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
interface RobotsError {
|
|
36
|
+
line: number;
|
|
37
|
+
content: string;
|
|
38
|
+
message: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
interface RobotsWarning {
|
|
42
|
+
line: number;
|
|
43
|
+
content: string;
|
|
44
|
+
message: string;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Standard directives (case-insensitive)
|
|
48
|
+
const STANDARD_DIRECTIVES = [
|
|
49
|
+
'user-agent',
|
|
50
|
+
'disallow',
|
|
51
|
+
'allow',
|
|
52
|
+
'sitemap',
|
|
53
|
+
];
|
|
54
|
+
|
|
55
|
+
// Common non-standard but accepted directives
|
|
56
|
+
const EXTENDED_DIRECTIVES = [
|
|
57
|
+
'crawl-delay',
|
|
58
|
+
'host',
|
|
59
|
+
'clean-param', // Yandex
|
|
60
|
+
'request-rate', // Some crawlers
|
|
61
|
+
];
|
|
62
|
+
|
|
63
|
+
// All recognized directives
|
|
64
|
+
const ALL_DIRECTIVES = [...STANDARD_DIRECTIVES, ...EXTENDED_DIRECTIVES];
|
|
65
|
+
|
|
66
|
+
export async function validateRobotsTxt(
|
|
67
|
+
url: string
|
|
68
|
+
): Promise<{ issues: AuditIssue[]; data: RobotsValidationData }> {
|
|
69
|
+
const issues: AuditIssue[] = [];
|
|
70
|
+
const parsedUrl = new URL(url);
|
|
71
|
+
const robotsUrl = new URL('/robots.txt', parsedUrl.origin).href;
|
|
72
|
+
|
|
73
|
+
try {
|
|
74
|
+
const response = await httpGet<string>(robotsUrl, {
|
|
75
|
+
timeout: 10000,
|
|
76
|
+
validateStatus: () => true,
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
if (response.status === 404) {
|
|
80
|
+
return {
|
|
81
|
+
issues,
|
|
82
|
+
data: {
|
|
83
|
+
exists: false,
|
|
84
|
+
isValid: true, // Missing is not invalid
|
|
85
|
+
lineCount: 0,
|
|
86
|
+
errors: [],
|
|
87
|
+
warnings: [],
|
|
88
|
+
directives: {
|
|
89
|
+
userAgents: [],
|
|
90
|
+
sitemaps: [],
|
|
91
|
+
hasWildcardAgent: false,
|
|
92
|
+
hasCrawlDelay: false,
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const content = response.data as string;
|
|
99
|
+
const validation = parseAndValidate(content);
|
|
100
|
+
|
|
101
|
+
// Generate issues based on validation
|
|
102
|
+
if (validation.errors.length > 0) {
|
|
103
|
+
issues.push({
|
|
104
|
+
code: 'ROBOTS_TXT_INVALID_SYNTAX',
|
|
105
|
+
severity: 'warning',
|
|
106
|
+
category: 'crawlability',
|
|
107
|
+
title: 'robots.txt contains syntax errors',
|
|
108
|
+
description: `Found ${validation.errors.length} syntax error(s) in robots.txt. Invalid syntax may cause crawlers to misinterpret your rules.`,
|
|
109
|
+
impact: 'Search engines may not properly understand your crawling rules, potentially blocking or allowing unintended pages.',
|
|
110
|
+
howToFix: 'Fix the syntax errors in robots.txt. Each directive should be on its own line with format "Directive: value".',
|
|
111
|
+
affectedUrls: [robotsUrl],
|
|
112
|
+
details: {
|
|
113
|
+
errors: validation.errors.map(e => ({
|
|
114
|
+
line: e.line,
|
|
115
|
+
content: e.content,
|
|
116
|
+
message: e.message,
|
|
117
|
+
})),
|
|
118
|
+
},
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (validation.warnings.length > 0) {
|
|
123
|
+
issues.push({
|
|
124
|
+
code: 'ROBOTS_TXT_WARNINGS',
|
|
125
|
+
severity: 'notice',
|
|
126
|
+
category: 'crawlability',
|
|
127
|
+
title: 'robots.txt has potential issues',
|
|
128
|
+
description: `Found ${validation.warnings.length} warning(s) in robots.txt.`,
|
|
129
|
+
impact: 'Some crawlers may not recognize non-standard directives.',
|
|
130
|
+
howToFix: 'Review the warnings and update if needed.',
|
|
131
|
+
affectedUrls: [robotsUrl],
|
|
132
|
+
details: {
|
|
133
|
+
warnings: validation.warnings.map(w => ({
|
|
134
|
+
line: w.line,
|
|
135
|
+
content: w.content,
|
|
136
|
+
message: w.message,
|
|
137
|
+
})),
|
|
138
|
+
},
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
issues,
|
|
144
|
+
data: {
|
|
145
|
+
exists: true,
|
|
146
|
+
isValid: validation.errors.length === 0,
|
|
147
|
+
lineCount: content.split('\n').length,
|
|
148
|
+
errors: validation.errors,
|
|
149
|
+
warnings: validation.warnings,
|
|
150
|
+
directives: validation.directives,
|
|
151
|
+
},
|
|
152
|
+
};
|
|
153
|
+
} catch (error) {
|
|
154
|
+
return {
|
|
155
|
+
issues,
|
|
156
|
+
data: {
|
|
157
|
+
exists: false,
|
|
158
|
+
isValid: true,
|
|
159
|
+
lineCount: 0,
|
|
160
|
+
errors: [],
|
|
161
|
+
warnings: [],
|
|
162
|
+
directives: {
|
|
163
|
+
userAgents: [],
|
|
164
|
+
sitemaps: [],
|
|
165
|
+
hasWildcardAgent: false,
|
|
166
|
+
hasCrawlDelay: false,
|
|
167
|
+
},
|
|
168
|
+
},
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function parseAndValidate(content: string): {
|
|
174
|
+
errors: RobotsError[];
|
|
175
|
+
warnings: RobotsWarning[];
|
|
176
|
+
directives: RobotsValidationData['directives'];
|
|
177
|
+
} {
|
|
178
|
+
const errors: RobotsError[] = [];
|
|
179
|
+
const warnings: RobotsWarning[] = [];
|
|
180
|
+
const userAgents: string[] = [];
|
|
181
|
+
const sitemaps: string[] = [];
|
|
182
|
+
let hasWildcardAgent = false;
|
|
183
|
+
let hasCrawlDelay = false;
|
|
184
|
+
|
|
185
|
+
const lines = content.split('\n');
|
|
186
|
+
let currentUserAgent: string | null = null;
|
|
187
|
+
let hasDirectiveAfterUserAgent = false;
|
|
188
|
+
|
|
189
|
+
for (let i = 0; i < lines.length; i++) {
|
|
190
|
+
const lineNum = i + 1;
|
|
191
|
+
const rawLine = lines[i];
|
|
192
|
+
const line = rawLine.trim();
|
|
193
|
+
|
|
194
|
+
// Skip empty lines
|
|
195
|
+
if (line === '') {
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Skip comments
|
|
200
|
+
if (line.startsWith('#')) {
|
|
201
|
+
continue;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Handle inline comments
|
|
205
|
+
const commentIndex = line.indexOf('#');
|
|
206
|
+
const effectiveLine = commentIndex > 0 ? line.substring(0, commentIndex).trim() : line;
|
|
207
|
+
|
|
208
|
+
if (effectiveLine === '') {
|
|
209
|
+
continue;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// Check for colon (required separator)
|
|
213
|
+
const colonIndex = effectiveLine.indexOf(':');
|
|
214
|
+
if (colonIndex === -1) {
|
|
215
|
+
errors.push({
|
|
216
|
+
line: lineNum,
|
|
217
|
+
content: rawLine,
|
|
218
|
+
message: 'Missing colon separator. Format should be "Directive: value"',
|
|
219
|
+
});
|
|
220
|
+
continue;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const directive = effectiveLine.substring(0, colonIndex).trim().toLowerCase();
|
|
224
|
+
const value = effectiveLine.substring(colonIndex + 1).trim();
|
|
225
|
+
|
|
226
|
+
// Check if directive is recognized
|
|
227
|
+
if (!ALL_DIRECTIVES.includes(directive)) {
|
|
228
|
+
errors.push({
|
|
229
|
+
line: lineNum,
|
|
230
|
+
content: rawLine,
|
|
231
|
+
message: `Unknown directive "${directive}". Valid directives: User-agent, Disallow, Allow, Sitemap`,
|
|
232
|
+
});
|
|
233
|
+
continue;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Check for non-standard directives
|
|
237
|
+
if (EXTENDED_DIRECTIVES.includes(directive) && !STANDARD_DIRECTIVES.includes(directive)) {
|
|
238
|
+
warnings.push({
|
|
239
|
+
line: lineNum,
|
|
240
|
+
content: rawLine,
|
|
241
|
+
message: `"${directive}" is a non-standard directive. Not all crawlers support it.`,
|
|
242
|
+
});
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Directive-specific validation
|
|
246
|
+
switch (directive) {
|
|
247
|
+
case 'user-agent':
|
|
248
|
+
if (value === '') {
|
|
249
|
+
errors.push({
|
|
250
|
+
line: lineNum,
|
|
251
|
+
content: rawLine,
|
|
252
|
+
message: 'User-agent value cannot be empty',
|
|
253
|
+
});
|
|
254
|
+
} else {
|
|
255
|
+
currentUserAgent = value;
|
|
256
|
+
hasDirectiveAfterUserAgent = false;
|
|
257
|
+
if (!userAgents.includes(value)) {
|
|
258
|
+
userAgents.push(value);
|
|
259
|
+
}
|
|
260
|
+
if (value === '*') {
|
|
261
|
+
hasWildcardAgent = true;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
break;
|
|
265
|
+
|
|
266
|
+
case 'disallow':
|
|
267
|
+
case 'allow':
|
|
268
|
+
if (currentUserAgent === null) {
|
|
269
|
+
errors.push({
|
|
270
|
+
line: lineNum,
|
|
271
|
+
content: rawLine,
|
|
272
|
+
message: `${directive} must come after a User-agent directive`,
|
|
273
|
+
});
|
|
274
|
+
} else {
|
|
275
|
+
hasDirectiveAfterUserAgent = true;
|
|
276
|
+
// Value can be empty for Disallow (means allow all)
|
|
277
|
+
// Check for invalid characters in path
|
|
278
|
+
if (value && !isValidPath(value)) {
|
|
279
|
+
warnings.push({
|
|
280
|
+
line: lineNum,
|
|
281
|
+
content: rawLine,
|
|
282
|
+
message: `Path "${value}" contains unusual characters`,
|
|
283
|
+
});
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
break;
|
|
287
|
+
|
|
288
|
+
case 'sitemap':
|
|
289
|
+
// Sitemap can appear anywhere
|
|
290
|
+
if (value === '') {
|
|
291
|
+
errors.push({
|
|
292
|
+
line: lineNum,
|
|
293
|
+
content: rawLine,
|
|
294
|
+
message: 'Sitemap URL cannot be empty',
|
|
295
|
+
});
|
|
296
|
+
} else if (!isValidUrl(value)) {
|
|
297
|
+
errors.push({
|
|
298
|
+
line: lineNum,
|
|
299
|
+
content: rawLine,
|
|
300
|
+
message: `Invalid sitemap URL: "${value}"`,
|
|
301
|
+
});
|
|
302
|
+
} else {
|
|
303
|
+
sitemaps.push(value);
|
|
304
|
+
}
|
|
305
|
+
break;
|
|
306
|
+
|
|
307
|
+
case 'crawl-delay':
|
|
308
|
+
hasCrawlDelay = true;
|
|
309
|
+
if (currentUserAgent === null) {
|
|
310
|
+
warnings.push({
|
|
311
|
+
line: lineNum,
|
|
312
|
+
content: rawLine,
|
|
313
|
+
message: 'Crawl-delay should come after a User-agent directive',
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
if (value === '' || isNaN(parseFloat(value))) {
|
|
317
|
+
errors.push({
|
|
318
|
+
line: lineNum,
|
|
319
|
+
content: rawLine,
|
|
320
|
+
message: 'Crawl-delay must be a number',
|
|
321
|
+
});
|
|
322
|
+
}
|
|
323
|
+
break;
|
|
324
|
+
|
|
325
|
+
case 'host':
|
|
326
|
+
// Yandex-specific, should be a valid domain
|
|
327
|
+
if (value === '') {
|
|
328
|
+
errors.push({
|
|
329
|
+
line: lineNum,
|
|
330
|
+
content: rawLine,
|
|
331
|
+
message: 'Host value cannot be empty',
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
break;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
return {
|
|
339
|
+
errors,
|
|
340
|
+
warnings,
|
|
341
|
+
directives: {
|
|
342
|
+
userAgents,
|
|
343
|
+
sitemaps,
|
|
344
|
+
hasWildcardAgent,
|
|
345
|
+
hasCrawlDelay,
|
|
346
|
+
},
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
function isValidPath(path: string): boolean {
|
|
351
|
+
// Basic path validation
|
|
352
|
+
// Paths should start with / or be * or $
|
|
353
|
+
if (path === '' || path === '*' || path === '$') {
|
|
354
|
+
return true;
|
|
355
|
+
}
|
|
356
|
+
if (!path.startsWith('/') && !path.startsWith('*')) {
|
|
357
|
+
return false;
|
|
358
|
+
}
|
|
359
|
+
// Check for obviously invalid characters
|
|
360
|
+
if (/[\x00-\x1f]/.test(path)) {
|
|
361
|
+
return false;
|
|
362
|
+
}
|
|
363
|
+
return true;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
function isValidUrl(url: string): boolean {
|
|
367
|
+
try {
|
|
368
|
+
new URL(url);
|
|
369
|
+
return true;
|
|
370
|
+
} catch {
|
|
371
|
+
return false;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
// Security Headers Checks
|
|
2
|
+
// Checks for CSP, X-Frame-Options, HSTS, and other security headers
|
|
3
|
+
|
|
4
|
+
import { httpGet } from '../../utils/http.js';
|
|
5
|
+
import type { AuditIssue } from '../types.js';
|
|
6
|
+
import { ISSUE_DEFINITIONS } from '../types.js';
|
|
7
|
+
|
|
8
|
+
export interface SecurityHeadersData {
|
|
9
|
+
https: boolean;
|
|
10
|
+
headers: {
|
|
11
|
+
hsts: string | null;
|
|
12
|
+
csp: string | null;
|
|
13
|
+
xFrameOptions: string | null;
|
|
14
|
+
xContentTypeOptions: string | null;
|
|
15
|
+
referrerPolicy: string | null;
|
|
16
|
+
permissionsPolicy: string | null;
|
|
17
|
+
};
|
|
18
|
+
hstsMaxAge: number | null;
|
|
19
|
+
hstsIncludesSubdomains: boolean;
|
|
20
|
+
hstsPreload: boolean;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Parse HSTS header
|
|
25
|
+
*/
|
|
26
|
+
function parseHSTS(header: string | null): {
|
|
27
|
+
maxAge: number | null;
|
|
28
|
+
includeSubdomains: boolean;
|
|
29
|
+
preload: boolean;
|
|
30
|
+
} {
|
|
31
|
+
if (!header) {
|
|
32
|
+
return { maxAge: null, includeSubdomains: false, preload: false };
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const result = { maxAge: null as number | null, includeSubdomains: false, preload: false };
|
|
36
|
+
|
|
37
|
+
const maxAgeMatch = header.match(/max-age=(\d+)/i);
|
|
38
|
+
if (maxAgeMatch) {
|
|
39
|
+
result.maxAge = parseInt(maxAgeMatch[1], 10);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
result.includeSubdomains = /includesubdomains/i.test(header);
|
|
43
|
+
result.preload = /preload/i.test(header);
|
|
44
|
+
|
|
45
|
+
return result;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Analyze security headers
|
|
50
|
+
*/
|
|
51
|
+
export async function analyzeSecurityHeaders(url: string): Promise<{ issues: AuditIssue[]; data: SecurityHeadersData }> {
|
|
52
|
+
const issues: AuditIssue[] = [];
|
|
53
|
+
|
|
54
|
+
try {
|
|
55
|
+
const response = await httpGet<string>(url, {
|
|
56
|
+
timeout: 15000,
|
|
57
|
+
validateStatus: () => true,
|
|
58
|
+
maxRedirects: 5,
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
const headers = response.headers;
|
|
62
|
+
const isHttps = url.startsWith('https://');
|
|
63
|
+
|
|
64
|
+
// Extract security headers (case-insensitive)
|
|
65
|
+
const getHeader = (name: string): string | null => {
|
|
66
|
+
const key = Object.keys(headers).find((k) => k.toLowerCase() === name.toLowerCase());
|
|
67
|
+
return key ? (headers[key] as string) : null;
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
const hsts = getHeader('strict-transport-security');
|
|
71
|
+
const csp = getHeader('content-security-policy');
|
|
72
|
+
const xFrameOptions = getHeader('x-frame-options');
|
|
73
|
+
const xContentTypeOptions = getHeader('x-content-type-options');
|
|
74
|
+
const referrerPolicy = getHeader('referrer-policy');
|
|
75
|
+
const permissionsPolicy = getHeader('permissions-policy') || getHeader('feature-policy');
|
|
76
|
+
|
|
77
|
+
// Parse HSTS
|
|
78
|
+
const hstsData = parseHSTS(hsts);
|
|
79
|
+
|
|
80
|
+
// Check for HTTPS
|
|
81
|
+
if (!isHttps) {
|
|
82
|
+
issues.push({
|
|
83
|
+
...ISSUE_DEFINITIONS.NOT_HTTPS,
|
|
84
|
+
affectedUrls: [url],
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Check for HSTS
|
|
89
|
+
if (isHttps && !hsts) {
|
|
90
|
+
issues.push({
|
|
91
|
+
...ISSUE_DEFINITIONS.HSTS_MISSING,
|
|
92
|
+
affectedUrls: [url],
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Check for CSP
|
|
97
|
+
if (!csp) {
|
|
98
|
+
issues.push({
|
|
99
|
+
...ISSUE_DEFINITIONS.CSP_MISSING,
|
|
100
|
+
affectedUrls: [url],
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Check for X-Frame-Options
|
|
105
|
+
if (!xFrameOptions) {
|
|
106
|
+
issues.push({
|
|
107
|
+
...ISSUE_DEFINITIONS.X_FRAME_OPTIONS_MISSING,
|
|
108
|
+
affectedUrls: [url],
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Check for X-Content-Type-Options
|
|
113
|
+
if (!xContentTypeOptions) {
|
|
114
|
+
issues.push({
|
|
115
|
+
...ISSUE_DEFINITIONS.X_CONTENT_TYPE_OPTIONS_MISSING,
|
|
116
|
+
affectedUrls: [url],
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Check for Referrer-Policy
|
|
121
|
+
if (!referrerPolicy) {
|
|
122
|
+
issues.push({
|
|
123
|
+
...ISSUE_DEFINITIONS.REFERRER_POLICY_MISSING,
|
|
124
|
+
affectedUrls: [url],
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Check for Permissions-Policy
|
|
129
|
+
if (!permissionsPolicy) {
|
|
130
|
+
issues.push({
|
|
131
|
+
...ISSUE_DEFINITIONS.PERMISSIONS_POLICY_MISSING,
|
|
132
|
+
affectedUrls: [url],
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
issues,
|
|
138
|
+
data: {
|
|
139
|
+
https: isHttps,
|
|
140
|
+
headers: {
|
|
141
|
+
hsts,
|
|
142
|
+
csp,
|
|
143
|
+
xFrameOptions,
|
|
144
|
+
xContentTypeOptions,
|
|
145
|
+
referrerPolicy,
|
|
146
|
+
permissionsPolicy,
|
|
147
|
+
},
|
|
148
|
+
hstsMaxAge: hstsData.maxAge,
|
|
149
|
+
hstsIncludesSubdomains: hstsData.includeSubdomains,
|
|
150
|
+
hstsPreload: hstsData.preload,
|
|
151
|
+
},
|
|
152
|
+
};
|
|
153
|
+
} catch (error) {
|
|
154
|
+
return {
|
|
155
|
+
issues,
|
|
156
|
+
data: {
|
|
157
|
+
https: url.startsWith('https://'),
|
|
158
|
+
headers: {
|
|
159
|
+
hsts: null,
|
|
160
|
+
csp: null,
|
|
161
|
+
xFrameOptions: null,
|
|
162
|
+
xContentTypeOptions: null,
|
|
163
|
+
referrerPolicy: null,
|
|
164
|
+
permissionsPolicy: null,
|
|
165
|
+
},
|
|
166
|
+
hstsMaxAge: null,
|
|
167
|
+
hstsIncludesSubdomains: false,
|
|
168
|
+
hstsPreload: false,
|
|
169
|
+
},
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
}
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
2
|
+
import type { AuditIssue } from '../types.js';
|
|
3
|
+
import { ISSUE_DEFINITIONS } from '../types.js';
|
|
4
|
+
|
|
5
|
+
export interface SecurityData {
|
|
6
|
+
isHttps: boolean;
|
|
7
|
+
hasMixedContent: boolean;
|
|
8
|
+
mixedContentUrls: string[];
|
|
9
|
+
hasHsts: boolean;
|
|
10
|
+
hstsMaxAge?: number;
|
|
11
|
+
certificateExpiry?: Date;
|
|
12
|
+
certificateIssuer?: string;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export async function analyzeSecurity(
|
|
16
|
+
html: string,
|
|
17
|
+
url: string,
|
|
18
|
+
headers: Record<string, string>
|
|
19
|
+
): Promise<{ issues: AuditIssue[]; data: SecurityData }> {
|
|
20
|
+
const issues: AuditIssue[] = [];
|
|
21
|
+
const $ = cheerio.load(html);
|
|
22
|
+
const parsedUrl = new URL(url);
|
|
23
|
+
const isHttps = parsedUrl.protocol === 'https:';
|
|
24
|
+
|
|
25
|
+
// Check for HTTPS
|
|
26
|
+
if (!isHttps) {
|
|
27
|
+
issues.push({
|
|
28
|
+
...ISSUE_DEFINITIONS.NOT_HTTPS,
|
|
29
|
+
affectedUrls: [url],
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Check for mixed content (HTTP resources on HTTPS page)
|
|
34
|
+
const mixedContentUrls: string[] = [];
|
|
35
|
+
if (isHttps) {
|
|
36
|
+
// Check scripts
|
|
37
|
+
$('script[src]').each((_, el) => {
|
|
38
|
+
const src = $(el).attr('src');
|
|
39
|
+
if (src?.startsWith('http://')) {
|
|
40
|
+
mixedContentUrls.push(src);
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
// Check stylesheets
|
|
45
|
+
$('link[rel="stylesheet"][href]').each((_, el) => {
|
|
46
|
+
const href = $(el).attr('href');
|
|
47
|
+
if (href?.startsWith('http://')) {
|
|
48
|
+
mixedContentUrls.push(href);
|
|
49
|
+
}
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
// Check images
|
|
53
|
+
$('img[src]').each((_, el) => {
|
|
54
|
+
const src = $(el).attr('src');
|
|
55
|
+
if (src?.startsWith('http://')) {
|
|
56
|
+
mixedContentUrls.push(src);
|
|
57
|
+
}
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
// Check iframes
|
|
61
|
+
$('iframe[src]').each((_, el) => {
|
|
62
|
+
const src = $(el).attr('src');
|
|
63
|
+
if (src?.startsWith('http://')) {
|
|
64
|
+
mixedContentUrls.push(src);
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
if (mixedContentUrls.length > 0) {
|
|
69
|
+
issues.push({
|
|
70
|
+
...ISSUE_DEFINITIONS.MIXED_CONTENT,
|
|
71
|
+
affectedUrls: [url],
|
|
72
|
+
details: {
|
|
73
|
+
count: mixedContentUrls.length,
|
|
74
|
+
resources: mixedContentUrls.slice(0, 5),
|
|
75
|
+
},
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Check HSTS header
|
|
81
|
+
const hstsHeader = headers['strict-transport-security'];
|
|
82
|
+
const hasHsts = !!hstsHeader;
|
|
83
|
+
let hstsMaxAge: number | undefined;
|
|
84
|
+
|
|
85
|
+
if (hstsHeader) {
|
|
86
|
+
const maxAgeMatch = hstsHeader.match(/max-age=(\d+)/);
|
|
87
|
+
if (maxAgeMatch) {
|
|
88
|
+
hstsMaxAge = parseInt(maxAgeMatch[1], 10);
|
|
89
|
+
}
|
|
90
|
+
} else if (isHttps) {
|
|
91
|
+
issues.push({
|
|
92
|
+
...ISSUE_DEFINITIONS.HSTS_MISSING,
|
|
93
|
+
affectedUrls: [url],
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const data: SecurityData = {
|
|
98
|
+
isHttps,
|
|
99
|
+
hasMixedContent: mixedContentUrls.length > 0,
|
|
100
|
+
mixedContentUrls,
|
|
101
|
+
hasHsts,
|
|
102
|
+
hstsMaxAge,
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
return { issues, data };
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Check SSL certificate
|
|
109
|
+
// Note: Using fetch for isomorphic support - detailed cert info (expiry, issuer)
|
|
110
|
+
// is not available via fetch API. This only verifies the cert is valid.
|
|
111
|
+
export async function checkCertificate(url: string): Promise<{
|
|
112
|
+
valid: boolean;
|
|
113
|
+
expiresAt?: Date;
|
|
114
|
+
issuer?: string;
|
|
115
|
+
daysUntilExpiry?: number;
|
|
116
|
+
issues: AuditIssue[];
|
|
117
|
+
}> {
|
|
118
|
+
const issues: AuditIssue[] = [];
|
|
119
|
+
const parsedUrl = new URL(url);
|
|
120
|
+
|
|
121
|
+
if (parsedUrl.protocol !== 'https:') {
|
|
122
|
+
return { valid: false, issues };
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
try {
|
|
126
|
+
// Use fetch to verify the certificate is valid
|
|
127
|
+
// This will throw if the certificate is invalid/expired
|
|
128
|
+
const controller = new AbortController();
|
|
129
|
+
const timeoutId = setTimeout(() => controller.abort(), 5000);
|
|
130
|
+
|
|
131
|
+
await fetch(url, {
|
|
132
|
+
method: 'HEAD',
|
|
133
|
+
signal: controller.signal,
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
clearTimeout(timeoutId);
|
|
137
|
+
|
|
138
|
+
// Certificate is valid if we got here
|
|
139
|
+
return { valid: true, issues };
|
|
140
|
+
} catch (error) {
|
|
141
|
+
// Connection failed - could be cert issue or network error
|
|
142
|
+
return { valid: false, issues };
|
|
143
|
+
}
|
|
144
|
+
}
|