recker 1.0.31 → 1.0.32-next.e0741bf

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/cli/index.js +2350 -43
  2. package/dist/cli/tui/shell-search.js +10 -8
  3. package/dist/cli/tui/shell.d.ts +29 -0
  4. package/dist/cli/tui/shell.js +1733 -9
  5. package/dist/mcp/search/hybrid-search.js +4 -2
  6. package/dist/seo/analyzer.d.ts +7 -0
  7. package/dist/seo/analyzer.js +200 -4
  8. package/dist/seo/rules/ai-search.d.ts +2 -0
  9. package/dist/seo/rules/ai-search.js +423 -0
  10. package/dist/seo/rules/canonical.d.ts +12 -0
  11. package/dist/seo/rules/canonical.js +249 -0
  12. package/dist/seo/rules/crawl.js +113 -0
  13. package/dist/seo/rules/cwv.js +0 -95
  14. package/dist/seo/rules/i18n.js +27 -0
  15. package/dist/seo/rules/images.js +23 -27
  16. package/dist/seo/rules/index.js +14 -0
  17. package/dist/seo/rules/internal-linking.js +6 -6
  18. package/dist/seo/rules/links.js +321 -0
  19. package/dist/seo/rules/meta.js +24 -0
  20. package/dist/seo/rules/mobile.js +0 -20
  21. package/dist/seo/rules/performance.js +124 -0
  22. package/dist/seo/rules/redirects.d.ts +16 -0
  23. package/dist/seo/rules/redirects.js +193 -0
  24. package/dist/seo/rules/resources.d.ts +2 -0
  25. package/dist/seo/rules/resources.js +373 -0
  26. package/dist/seo/rules/security.js +290 -0
  27. package/dist/seo/rules/technical-advanced.d.ts +10 -0
  28. package/dist/seo/rules/technical-advanced.js +283 -0
  29. package/dist/seo/rules/technical.js +74 -18
  30. package/dist/seo/rules/types.d.ts +103 -3
  31. package/dist/seo/seo-spider.d.ts +2 -0
  32. package/dist/seo/seo-spider.js +47 -2
  33. package/dist/seo/types.d.ts +48 -28
  34. package/dist/seo/utils/index.d.ts +1 -0
  35. package/dist/seo/utils/index.js +1 -0
  36. package/dist/seo/utils/similarity.d.ts +47 -0
  37. package/dist/seo/utils/similarity.js +273 -0
  38. package/dist/seo/validators/index.d.ts +3 -0
  39. package/dist/seo/validators/index.js +3 -0
  40. package/dist/seo/validators/llms-txt.d.ts +57 -0
  41. package/dist/seo/validators/llms-txt.js +317 -0
  42. package/dist/seo/validators/robots.d.ts +54 -0
  43. package/dist/seo/validators/robots.js +382 -0
  44. package/dist/seo/validators/sitemap.d.ts +69 -0
  45. package/dist/seo/validators/sitemap.js +424 -0
  46. package/package.json +1 -1
@@ -0,0 +1,54 @@
1
+ export interface RobotsDirective {
2
+ type: 'user-agent' | 'allow' | 'disallow' | 'sitemap' | 'crawl-delay' | 'host' | 'clean-param';
3
+ value: string;
4
+ line: number;
5
+ }
6
+ export interface RobotsUserAgentBlock {
7
+ userAgents: string[];
8
+ rules: Array<{
9
+ type: 'allow' | 'disallow';
10
+ path: string;
11
+ line: number;
12
+ }>;
13
+ crawlDelay?: number;
14
+ }
15
+ export interface RobotsParseResult {
16
+ valid: boolean;
17
+ errors: Array<{
18
+ line: number;
19
+ message: string;
20
+ }>;
21
+ warnings: Array<{
22
+ line: number;
23
+ message: string;
24
+ }>;
25
+ directives: RobotsDirective[];
26
+ userAgentBlocks: RobotsUserAgentBlock[];
27
+ sitemaps: string[];
28
+ host?: string;
29
+ blocksAllRobots: boolean;
30
+ blocksImportantPaths: boolean;
31
+ size: number;
32
+ }
33
+ export interface RobotsValidationIssue {
34
+ type: 'error' | 'warning' | 'info';
35
+ code: string;
36
+ message: string;
37
+ line?: number;
38
+ recommendation?: string;
39
+ }
40
+ export interface RobotsValidationResult {
41
+ valid: boolean;
42
+ issues: RobotsValidationIssue[];
43
+ parseResult: RobotsParseResult;
44
+ }
45
+ export declare function parseRobotsTxt(content: string): RobotsParseResult;
46
+ export declare function validateRobotsTxt(content: string, baseUrl?: string): RobotsValidationResult;
47
+ export declare function isPathAllowed(parseResult: RobotsParseResult, path: string, userAgent?: string): boolean;
48
+ export declare function fetchAndValidateRobotsTxt(url: string, fetcher?: (url: string) => Promise<{
49
+ status: number;
50
+ text: string;
51
+ }>): Promise<RobotsValidationResult & {
52
+ exists: boolean;
53
+ status?: number;
54
+ }>;
@@ -0,0 +1,382 @@
1
+ const IMPORTANT_PATHS = [
2
+ '/',
3
+ '/sitemap.xml',
4
+ '/sitemap',
5
+ '/.well-known/',
6
+ '/robots.txt',
7
+ ];
8
+ const SEO_CRITICAL_PATHS = [
9
+ '/css',
10
+ '/js',
11
+ '/images',
12
+ '/assets',
13
+ '/static',
14
+ '/fonts',
15
+ ];
16
+ export function parseRobotsTxt(content) {
17
+ const lines = content.split(/\r?\n/);
18
+ const directives = [];
19
+ const errors = [];
20
+ const warnings = [];
21
+ const sitemaps = [];
22
+ const userAgentBlocks = [];
23
+ let currentBlock = null;
24
+ let host;
25
+ let blocksAllRobots = false;
26
+ let blocksImportantPaths = false;
27
+ for (let i = 0; i < lines.length; i++) {
28
+ const lineNum = i + 1;
29
+ let line = lines[i].trim();
30
+ const commentIndex = line.indexOf('#');
31
+ if (commentIndex !== -1) {
32
+ line = line.substring(0, commentIndex).trim();
33
+ }
34
+ if (!line)
35
+ continue;
36
+ const colonIndex = line.indexOf(':');
37
+ if (colonIndex === -1) {
38
+ errors.push({ line: lineNum, message: `Invalid syntax: missing colon` });
39
+ continue;
40
+ }
41
+ const directiveType = line.substring(0, colonIndex).trim().toLowerCase();
42
+ const value = line.substring(colonIndex + 1).trim();
43
+ switch (directiveType) {
44
+ case 'user-agent':
45
+ if (!value) {
46
+ errors.push({ line: lineNum, message: 'Empty user-agent value' });
47
+ continue;
48
+ }
49
+ if (!currentBlock || currentBlock.rules.length > 0) {
50
+ currentBlock = { userAgents: [value], rules: [] };
51
+ userAgentBlocks.push(currentBlock);
52
+ }
53
+ else {
54
+ currentBlock.userAgents.push(value);
55
+ }
56
+ directives.push({ type: 'user-agent', value, line: lineNum });
57
+ break;
58
+ case 'disallow':
59
+ if (!currentBlock) {
60
+ warnings.push({ line: lineNum, message: 'Disallow without preceding User-agent' });
61
+ currentBlock = { userAgents: ['*'], rules: [] };
62
+ userAgentBlocks.push(currentBlock);
63
+ }
64
+ currentBlock.rules.push({ type: 'disallow', path: value, line: lineNum });
65
+ directives.push({ type: 'disallow', value, line: lineNum });
66
+ if (value === '/' && currentBlock.userAgents.includes('*')) {
67
+ blocksAllRobots = true;
68
+ }
69
+ for (const importantPath of IMPORTANT_PATHS) {
70
+ if (value === importantPath || (value.endsWith('/') && importantPath.startsWith(value))) {
71
+ blocksImportantPaths = true;
72
+ }
73
+ }
74
+ break;
75
+ case 'allow':
76
+ if (!currentBlock) {
77
+ warnings.push({ line: lineNum, message: 'Allow without preceding User-agent' });
78
+ currentBlock = { userAgents: ['*'], rules: [] };
79
+ userAgentBlocks.push(currentBlock);
80
+ }
81
+ currentBlock.rules.push({ type: 'allow', path: value, line: lineNum });
82
+ directives.push({ type: 'allow', value, line: lineNum });
83
+ break;
84
+ case 'sitemap':
85
+ if (!value) {
86
+ errors.push({ line: lineNum, message: 'Empty sitemap URL' });
87
+ continue;
88
+ }
89
+ try {
90
+ new URL(value);
91
+ sitemaps.push(value);
92
+ directives.push({ type: 'sitemap', value, line: lineNum });
93
+ }
94
+ catch {
95
+ errors.push({ line: lineNum, message: `Invalid sitemap URL: ${value}` });
96
+ }
97
+ break;
98
+ case 'crawl-delay':
99
+ if (!currentBlock) {
100
+ warnings.push({ line: lineNum, message: 'Crawl-delay without preceding User-agent' });
101
+ }
102
+ else {
103
+ const delay = parseFloat(value);
104
+ if (isNaN(delay) || delay < 0) {
105
+ errors.push({ line: lineNum, message: `Invalid crawl-delay value: ${value}` });
106
+ }
107
+ else {
108
+ currentBlock.crawlDelay = delay;
109
+ if (delay > 10) {
110
+ warnings.push({ line: lineNum, message: `High crawl-delay (${delay}s) may slow indexing` });
111
+ }
112
+ }
113
+ }
114
+ directives.push({ type: 'crawl-delay', value, line: lineNum });
115
+ break;
116
+ case 'host':
117
+ host = value;
118
+ directives.push({ type: 'host', value, line: lineNum });
119
+ break;
120
+ case 'clean-param':
121
+ directives.push({ type: 'clean-param', value, line: lineNum });
122
+ break;
123
+ default:
124
+ warnings.push({ line: lineNum, message: `Unknown directive: ${directiveType}` });
125
+ }
126
+ }
127
+ return {
128
+ valid: errors.length === 0,
129
+ errors,
130
+ warnings,
131
+ directives,
132
+ userAgentBlocks,
133
+ sitemaps,
134
+ host,
135
+ blocksAllRobots,
136
+ blocksImportantPaths,
137
+ size: content.length,
138
+ };
139
+ }
140
+ export function validateRobotsTxt(content, baseUrl) {
141
+ const parseResult = parseRobotsTxt(content);
142
+ const issues = [];
143
+ for (const error of parseResult.errors) {
144
+ issues.push({
145
+ type: 'error',
146
+ code: 'PARSE_ERROR',
147
+ message: error.message,
148
+ line: error.line,
149
+ });
150
+ }
151
+ for (const warning of parseResult.warnings) {
152
+ issues.push({
153
+ type: 'warning',
154
+ code: 'PARSE_WARNING',
155
+ message: warning.message,
156
+ line: warning.line,
157
+ });
158
+ }
159
+ if (content.trim().length === 0) {
160
+ issues.push({
161
+ type: 'warning',
162
+ code: 'EMPTY_FILE',
163
+ message: 'robots.txt is empty',
164
+ recommendation: 'Add at least User-agent: * and basic Allow/Disallow rules',
165
+ });
166
+ }
167
+ if (parseResult.blocksAllRobots) {
168
+ issues.push({
169
+ type: 'error',
170
+ code: 'BLOCKS_ALL_ROBOTS',
171
+ message: 'robots.txt blocks all search engines (Disallow: /)',
172
+ recommendation: 'Remove "Disallow: /" or add specific Allow rules for indexable content',
173
+ });
174
+ }
175
+ if (parseResult.sitemaps.length === 0) {
176
+ issues.push({
177
+ type: 'warning',
178
+ code: 'NO_SITEMAP',
179
+ message: 'No Sitemap directive found',
180
+ recommendation: 'Add Sitemap: https://example.com/sitemap.xml to help search engines discover your content',
181
+ });
182
+ }
183
+ if (baseUrl) {
184
+ const baseHost = new URL(baseUrl).hostname;
185
+ for (const sitemap of parseResult.sitemaps) {
186
+ try {
187
+ const sitemapHost = new URL(sitemap).hostname;
188
+ if (sitemapHost !== baseHost) {
189
+ issues.push({
190
+ type: 'warning',
191
+ code: 'SITEMAP_CROSS_DOMAIN',
192
+ message: `Sitemap points to different domain: ${sitemap}`,
193
+ recommendation: 'Ensure sitemap URLs are on the same domain',
194
+ });
195
+ }
196
+ }
197
+ catch {
198
+ }
199
+ }
200
+ }
201
+ for (const block of parseResult.userAgentBlocks) {
202
+ if (block.userAgents.includes('*') || block.userAgents.includes('Googlebot')) {
203
+ for (const rule of block.rules) {
204
+ if (rule.type === 'disallow') {
205
+ for (const criticalPath of SEO_CRITICAL_PATHS) {
206
+ if (rule.path.includes(criticalPath)) {
207
+ issues.push({
208
+ type: 'warning',
209
+ code: 'BLOCKS_RESOURCES',
210
+ message: `Blocking ${rule.path} may prevent proper rendering`,
211
+ line: rule.line,
212
+ recommendation: 'Allow CSS, JS, and image files for proper page rendering',
213
+ });
214
+ }
215
+ }
216
+ }
217
+ }
218
+ }
219
+ }
220
+ if (parseResult.size > 500 * 1024) {
221
+ issues.push({
222
+ type: 'warning',
223
+ code: 'FILE_TOO_LARGE',
224
+ message: `robots.txt is ${Math.round(parseResult.size / 1024)}KB (recommended max: 500KB)`,
225
+ recommendation: 'Simplify robots.txt to reduce file size',
226
+ });
227
+ }
228
+ if (parseResult.userAgentBlocks.length === 0 && content.trim().length > 0) {
229
+ issues.push({
230
+ type: 'warning',
231
+ code: 'NO_USER_AGENT',
232
+ message: 'No User-agent directive found',
233
+ recommendation: 'Add at least User-agent: * for universal rules',
234
+ });
235
+ }
236
+ for (const block of parseResult.userAgentBlocks) {
237
+ if (block.crawlDelay && block.crawlDelay > 30) {
238
+ issues.push({
239
+ type: 'error',
240
+ code: 'EXCESSIVE_CRAWL_DELAY',
241
+ message: `Crawl-delay of ${block.crawlDelay}s is excessive for ${block.userAgents.join(', ')}`,
242
+ recommendation: 'Crawl-delay over 30 seconds may significantly slow indexing',
243
+ });
244
+ }
245
+ }
246
+ return {
247
+ valid: issues.filter(i => i.type === 'error').length === 0,
248
+ issues,
249
+ parseResult,
250
+ };
251
+ }
252
+ export function isPathAllowed(parseResult, path, userAgent = '*') {
253
+ let matchingBlock;
254
+ for (const block of parseResult.userAgentBlocks) {
255
+ if (block.userAgents.some(ua => ua.toLowerCase() === userAgent.toLowerCase() ||
256
+ userAgent.toLowerCase().includes(ua.toLowerCase()))) {
257
+ matchingBlock = block;
258
+ break;
259
+ }
260
+ }
261
+ if (!matchingBlock) {
262
+ matchingBlock = parseResult.userAgentBlocks.find(b => b.userAgents.includes('*'));
263
+ }
264
+ if (!matchingBlock) {
265
+ return true;
266
+ }
267
+ let isAllowed = true;
268
+ let longestMatch = -1;
269
+ for (const rule of matchingBlock.rules) {
270
+ const pattern = rule.path;
271
+ let regex;
272
+ try {
273
+ const escapedPattern = pattern
274
+ .replace(/[.+?^${}()|[\]\\]/g, '\\$&')
275
+ .replace(/\*/g, '.*')
276
+ .replace(/\\\$/g, '$');
277
+ regex = new RegExp(`^${escapedPattern}`);
278
+ }
279
+ catch {
280
+ continue;
281
+ }
282
+ if (regex.test(path)) {
283
+ const matchLength = pattern.replace(/\*/g, '').length;
284
+ if (matchLength > longestMatch) {
285
+ longestMatch = matchLength;
286
+ isAllowed = rule.type === 'allow';
287
+ }
288
+ }
289
+ }
290
+ return isAllowed;
291
+ }
292
+ export async function fetchAndValidateRobotsTxt(url, fetcher) {
293
+ const robotsUrl = new URL('/robots.txt', url).href;
294
+ try {
295
+ let response;
296
+ if (fetcher) {
297
+ response = await fetcher(robotsUrl);
298
+ }
299
+ else {
300
+ const fetchResponse = await fetch(robotsUrl);
301
+ response = {
302
+ status: fetchResponse.status,
303
+ text: await fetchResponse.text(),
304
+ };
305
+ }
306
+ if (response.status === 404) {
307
+ return {
308
+ exists: false,
309
+ status: 404,
310
+ valid: false,
311
+ issues: [{
312
+ type: 'warning',
313
+ code: 'NOT_FOUND',
314
+ message: 'robots.txt not found (404)',
315
+ recommendation: 'Create a robots.txt file to control search engine crawling',
316
+ }],
317
+ parseResult: {
318
+ valid: false,
319
+ errors: [],
320
+ warnings: [],
321
+ directives: [],
322
+ userAgentBlocks: [],
323
+ sitemaps: [],
324
+ blocksAllRobots: false,
325
+ blocksImportantPaths: false,
326
+ size: 0,
327
+ },
328
+ };
329
+ }
330
+ if (response.status >= 400) {
331
+ return {
332
+ exists: false,
333
+ status: response.status,
334
+ valid: false,
335
+ issues: [{
336
+ type: 'error',
337
+ code: 'FETCH_ERROR',
338
+ message: `Failed to fetch robots.txt (HTTP ${response.status})`,
339
+ }],
340
+ parseResult: {
341
+ valid: false,
342
+ errors: [],
343
+ warnings: [],
344
+ directives: [],
345
+ userAgentBlocks: [],
346
+ sitemaps: [],
347
+ blocksAllRobots: false,
348
+ blocksImportantPaths: false,
349
+ size: 0,
350
+ },
351
+ };
352
+ }
353
+ const validation = validateRobotsTxt(response.text, url);
354
+ return {
355
+ ...validation,
356
+ exists: true,
357
+ status: response.status,
358
+ };
359
+ }
360
+ catch (error) {
361
+ return {
362
+ exists: false,
363
+ valid: false,
364
+ issues: [{
365
+ type: 'error',
366
+ code: 'FETCH_ERROR',
367
+ message: `Failed to fetch robots.txt: ${error instanceof Error ? error.message : 'Unknown error'}`,
368
+ }],
369
+ parseResult: {
370
+ valid: false,
371
+ errors: [],
372
+ warnings: [],
373
+ directives: [],
374
+ userAgentBlocks: [],
375
+ sitemaps: [],
376
+ blocksAllRobots: false,
377
+ blocksImportantPaths: false,
378
+ size: 0,
379
+ },
380
+ };
381
+ }
382
+ }
@@ -0,0 +1,69 @@
1
+ export interface SitemapUrl {
2
+ loc: string;
3
+ lastmod?: string;
4
+ changefreq?: 'always' | 'hourly' | 'daily' | 'weekly' | 'monthly' | 'yearly' | 'never';
5
+ priority?: number;
6
+ images?: Array<{
7
+ loc: string;
8
+ caption?: string;
9
+ title?: string;
10
+ }>;
11
+ videos?: Array<{
12
+ thumbnailLoc: string;
13
+ title: string;
14
+ description: string;
15
+ contentLoc?: string;
16
+ playerLoc?: string;
17
+ }>;
18
+ news?: {
19
+ publicationName: string;
20
+ publicationLanguage: string;
21
+ publicationDate: string;
22
+ title: string;
23
+ };
24
+ alternates?: Array<{
25
+ hreflang: string;
26
+ href: string;
27
+ }>;
28
+ }
29
+ export interface SitemapIndex {
30
+ loc: string;
31
+ lastmod?: string;
32
+ }
33
+ export interface SitemapParseResult {
34
+ type: 'urlset' | 'sitemapindex' | 'unknown';
35
+ valid: boolean;
36
+ errors: string[];
37
+ warnings: string[];
38
+ urls: SitemapUrl[];
39
+ sitemaps: SitemapIndex[];
40
+ urlCount: number;
41
+ size: number;
42
+ compressed: boolean;
43
+ }
44
+ export interface SitemapValidationIssue {
45
+ type: 'error' | 'warning' | 'info';
46
+ code: string;
47
+ message: string;
48
+ url?: string;
49
+ recommendation?: string;
50
+ }
51
+ export interface SitemapValidationResult {
52
+ valid: boolean;
53
+ issues: SitemapValidationIssue[];
54
+ parseResult: SitemapParseResult;
55
+ }
56
+ export declare function parseSitemap(content: string, compressed?: boolean): SitemapParseResult;
57
+ export declare function validateSitemap(content: string, baseUrl?: string): SitemapValidationResult;
58
+ export declare function discoverSitemaps(baseUrl: string, robotsTxtContent?: string, fetcher?: (url: string) => Promise<{
59
+ status: number;
60
+ text: string;
61
+ }>): Promise<string[]>;
62
+ export declare function fetchAndValidateSitemap(url: string, fetcher?: (url: string) => Promise<{
63
+ status: number;
64
+ text: string;
65
+ headers?: Record<string, string>;
66
+ }>): Promise<SitemapValidationResult & {
67
+ exists: boolean;
68
+ status?: number;
69
+ }>;