recker 1.0.32 → 1.0.33-next.bbc56eb

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/cli/index.js +2354 -39
  2. package/dist/cli/tui/shell-search.js +10 -8
  3. package/dist/cli/tui/shell.d.ts +29 -0
  4. package/dist/cli/tui/shell.js +1733 -9
  5. package/dist/mcp/search/hybrid-search.js +4 -2
  6. package/dist/seo/analyzer.d.ts +7 -0
  7. package/dist/seo/analyzer.js +200 -4
  8. package/dist/seo/rules/ai-search.d.ts +2 -0
  9. package/dist/seo/rules/ai-search.js +423 -0
  10. package/dist/seo/rules/canonical.d.ts +12 -0
  11. package/dist/seo/rules/canonical.js +249 -0
  12. package/dist/seo/rules/crawl.js +113 -0
  13. package/dist/seo/rules/cwv.js +0 -95
  14. package/dist/seo/rules/i18n.js +27 -0
  15. package/dist/seo/rules/images.js +23 -27
  16. package/dist/seo/rules/index.js +14 -0
  17. package/dist/seo/rules/internal-linking.js +6 -6
  18. package/dist/seo/rules/links.js +321 -0
  19. package/dist/seo/rules/meta.js +24 -0
  20. package/dist/seo/rules/mobile.js +0 -20
  21. package/dist/seo/rules/performance.js +124 -0
  22. package/dist/seo/rules/redirects.d.ts +16 -0
  23. package/dist/seo/rules/redirects.js +193 -0
  24. package/dist/seo/rules/resources.d.ts +2 -0
  25. package/dist/seo/rules/resources.js +373 -0
  26. package/dist/seo/rules/security.js +290 -0
  27. package/dist/seo/rules/technical-advanced.d.ts +10 -0
  28. package/dist/seo/rules/technical-advanced.js +283 -0
  29. package/dist/seo/rules/technical.js +74 -18
  30. package/dist/seo/rules/types.d.ts +103 -3
  31. package/dist/seo/seo-spider.d.ts +2 -0
  32. package/dist/seo/seo-spider.js +47 -2
  33. package/dist/seo/types.d.ts +48 -28
  34. package/dist/seo/utils/index.d.ts +1 -0
  35. package/dist/seo/utils/index.js +1 -0
  36. package/dist/seo/utils/similarity.d.ts +47 -0
  37. package/dist/seo/utils/similarity.js +273 -0
  38. package/dist/seo/validators/index.d.ts +3 -0
  39. package/dist/seo/validators/index.js +3 -0
  40. package/dist/seo/validators/llms-txt.d.ts +57 -0
  41. package/dist/seo/validators/llms-txt.js +317 -0
  42. package/dist/seo/validators/robots.d.ts +54 -0
  43. package/dist/seo/validators/robots.js +382 -0
  44. package/dist/seo/validators/sitemap.d.ts +69 -0
  45. package/dist/seo/validators/sitemap.js +424 -0
  46. package/package.json +1 -1
@@ -0,0 +1,424 @@
1
+ import * as cheerio from 'cheerio';
2
+ const VALID_CHANGEFREQ = ['always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never'];
3
+ const MAX_URLS_PER_SITEMAP = 50000;
4
+ const MAX_SITEMAP_SIZE = 50 * 1024 * 1024;
5
+ export function parseSitemap(content, compressed = false) {
6
+ const errors = [];
7
+ const warnings = [];
8
+ const urls = [];
9
+ const sitemaps = [];
10
+ let type = 'unknown';
11
+ try {
12
+ const $ = cheerio.load(content, { xmlMode: true });
13
+ if ($('urlset').length > 0) {
14
+ type = 'urlset';
15
+ $('url').each((_, elem) => {
16
+ const $url = $(elem);
17
+ const loc = $url.find('loc').first().text().trim();
18
+ if (!loc) {
19
+ errors.push('URL entry missing <loc> element');
20
+ return;
21
+ }
22
+ try {
23
+ new URL(loc);
24
+ }
25
+ catch {
26
+ errors.push(`Invalid URL: ${loc}`);
27
+ return;
28
+ }
29
+ const url = { loc };
30
+ const lastmod = $url.find('lastmod').first().text().trim();
31
+ if (lastmod) {
32
+ if (isValidDate(lastmod)) {
33
+ url.lastmod = lastmod;
34
+ }
35
+ else {
36
+ warnings.push(`Invalid lastmod date for ${loc}: ${lastmod}`);
37
+ }
38
+ }
39
+ const changefreq = $url.find('changefreq').first().text().trim().toLowerCase();
40
+ if (changefreq) {
41
+ if (VALID_CHANGEFREQ.includes(changefreq)) {
42
+ url.changefreq = changefreq;
43
+ }
44
+ else {
45
+ warnings.push(`Invalid changefreq for ${loc}: ${changefreq}`);
46
+ }
47
+ }
48
+ const priority = $url.find('priority').first().text().trim();
49
+ if (priority) {
50
+ const p = parseFloat(priority);
51
+ if (isNaN(p) || p < 0 || p > 1) {
52
+ warnings.push(`Invalid priority for ${loc}: ${priority} (must be 0.0-1.0)`);
53
+ }
54
+ else {
55
+ url.priority = p;
56
+ }
57
+ }
58
+ const images = [];
59
+ $url.find('image\\:image, image').each((_, imgElem) => {
60
+ const $img = $(imgElem);
61
+ const imgLoc = $img.find('image\\:loc, loc').first().text().trim();
62
+ if (imgLoc) {
63
+ images.push({
64
+ loc: imgLoc,
65
+ caption: $img.find('image\\:caption, caption').first().text().trim() || undefined,
66
+ title: $img.find('image\\:title, title').first().text().trim() || undefined,
67
+ });
68
+ }
69
+ });
70
+ if (images.length > 0) {
71
+ url.images = images;
72
+ }
73
+ const alternates = [];
74
+ $url.find('xhtml\\:link[rel="alternate"], link[rel="alternate"]').each((_, linkElem) => {
75
+ const $link = $(linkElem);
76
+ const hreflang = $link.attr('hreflang');
77
+ const href = $link.attr('href');
78
+ if (hreflang && href) {
79
+ alternates.push({ hreflang, href });
80
+ }
81
+ });
82
+ if (alternates.length > 0) {
83
+ url.alternates = alternates;
84
+ }
85
+ urls.push(url);
86
+ });
87
+ }
88
+ else if ($('sitemapindex').length > 0) {
89
+ type = 'sitemapindex';
90
+ $('sitemap').each((_, elem) => {
91
+ const $sitemap = $(elem);
92
+ const loc = $sitemap.find('loc').first().text().trim();
93
+ if (!loc) {
94
+ errors.push('Sitemap entry missing <loc> element');
95
+ return;
96
+ }
97
+ const sitemap = { loc };
98
+ const lastmod = $sitemap.find('lastmod').first().text().trim();
99
+ if (lastmod) {
100
+ if (isValidDate(lastmod)) {
101
+ sitemap.lastmod = lastmod;
102
+ }
103
+ else {
104
+ warnings.push(`Invalid lastmod date for sitemap ${loc}: ${lastmod}`);
105
+ }
106
+ }
107
+ sitemaps.push(sitemap);
108
+ });
109
+ }
110
+ else {
111
+ errors.push('Invalid sitemap: must contain <urlset> or <sitemapindex>');
112
+ }
113
+ }
114
+ catch (e) {
115
+ errors.push(`XML parsing error: ${e instanceof Error ? e.message : 'Unknown error'}`);
116
+ }
117
+ return {
118
+ type,
119
+ valid: errors.length === 0,
120
+ errors,
121
+ warnings,
122
+ urls,
123
+ sitemaps,
124
+ urlCount: type === 'urlset' ? urls.length : sitemaps.reduce((sum, s) => sum + 1, 0),
125
+ size: content.length,
126
+ compressed,
127
+ };
128
+ }
129
+ export function validateSitemap(content, baseUrl) {
130
+ const parseResult = parseSitemap(content);
131
+ const issues = [];
132
+ for (const error of parseResult.errors) {
133
+ issues.push({
134
+ type: 'error',
135
+ code: 'PARSE_ERROR',
136
+ message: error,
137
+ });
138
+ }
139
+ for (const warning of parseResult.warnings) {
140
+ issues.push({
141
+ type: 'warning',
142
+ code: 'PARSE_WARNING',
143
+ message: warning,
144
+ });
145
+ }
146
+ if (parseResult.urls.length > MAX_URLS_PER_SITEMAP) {
147
+ issues.push({
148
+ type: 'error',
149
+ code: 'TOO_MANY_URLS',
150
+ message: `Sitemap has ${parseResult.urls.length} URLs (max: ${MAX_URLS_PER_SITEMAP})`,
151
+ recommendation: 'Split into multiple sitemaps and use a sitemap index',
152
+ });
153
+ }
154
+ if (parseResult.size > MAX_SITEMAP_SIZE) {
155
+ issues.push({
156
+ type: 'error',
157
+ code: 'FILE_TOO_LARGE',
158
+ message: `Sitemap is ${Math.round(parseResult.size / (1024 * 1024))}MB (max: 50MB)`,
159
+ recommendation: 'Split into multiple sitemaps or compress with gzip',
160
+ });
161
+ }
162
+ if (parseResult.type === 'urlset' && parseResult.urls.length === 0) {
163
+ issues.push({
164
+ type: 'warning',
165
+ code: 'EMPTY_SITEMAP',
166
+ message: 'Sitemap contains no URLs',
167
+ recommendation: 'Add URLs to your sitemap for search engines to discover',
168
+ });
169
+ }
170
+ if (parseResult.type === 'sitemapindex' && parseResult.sitemaps.length === 0) {
171
+ issues.push({
172
+ type: 'warning',
173
+ code: 'EMPTY_INDEX',
174
+ message: 'Sitemap index contains no sitemaps',
175
+ recommendation: 'Add sitemap references to your sitemap index',
176
+ });
177
+ }
178
+ const seenUrls = new Set();
179
+ const duplicates = [];
180
+ for (const url of parseResult.urls) {
181
+ const normalized = normalizeUrl(url.loc);
182
+ if (seenUrls.has(normalized)) {
183
+ duplicates.push(url.loc);
184
+ }
185
+ else {
186
+ seenUrls.add(normalized);
187
+ }
188
+ }
189
+ if (duplicates.length > 0) {
190
+ issues.push({
191
+ type: 'warning',
192
+ code: 'DUPLICATE_URLS',
193
+ message: `${duplicates.length} duplicate URL(s) found`,
194
+ recommendation: 'Remove duplicate URLs from sitemap',
195
+ });
196
+ }
197
+ if (baseUrl) {
198
+ const baseHost = new URL(baseUrl).hostname;
199
+ for (const url of parseResult.urls) {
200
+ try {
201
+ const urlHost = new URL(url.loc).hostname;
202
+ if (urlHost !== baseHost && !urlHost.endsWith(`.${baseHost}`)) {
203
+ issues.push({
204
+ type: 'warning',
205
+ code: 'CROSS_DOMAIN_URL',
206
+ message: `URL belongs to different domain: ${url.loc}`,
207
+ url: url.loc,
208
+ recommendation: 'Sitemaps should only contain URLs from the same domain',
209
+ });
210
+ }
211
+ }
212
+ catch {
213
+ }
214
+ }
215
+ }
216
+ const oneYearAgo = new Date();
217
+ oneYearAgo.setFullYear(oneYearAgo.getFullYear() - 1);
218
+ let oldDateCount = 0;
219
+ for (const url of parseResult.urls) {
220
+ if (url.lastmod) {
221
+ const date = new Date(url.lastmod);
222
+ if (!isNaN(date.getTime()) && date < oneYearAgo) {
223
+ oldDateCount++;
224
+ }
225
+ }
226
+ }
227
+ if (oldDateCount > parseResult.urls.length * 0.5) {
228
+ issues.push({
229
+ type: 'info',
230
+ code: 'OLD_LASTMOD',
231
+ message: `${oldDateCount} URLs have lastmod dates over 1 year old`,
232
+ recommendation: 'Consider updating lastmod dates when content changes',
233
+ });
234
+ }
235
+ const missingLastmod = parseResult.urls.filter(u => !u.lastmod).length;
236
+ if (missingLastmod > 0 && missingLastmod === parseResult.urls.length) {
237
+ issues.push({
238
+ type: 'info',
239
+ code: 'NO_LASTMOD',
240
+ message: 'No URLs have lastmod dates',
241
+ recommendation: 'Add lastmod dates to help search engines prioritize crawling',
242
+ });
243
+ }
244
+ const priorities = parseResult.urls.filter(u => u.priority !== undefined).map(u => u.priority);
245
+ if (priorities.length > 0) {
246
+ const allSamePriority = priorities.every(p => p === priorities[0]);
247
+ if (allSamePriority && parseResult.urls.length > 10) {
248
+ issues.push({
249
+ type: 'info',
250
+ code: 'UNIFORM_PRIORITY',
251
+ message: `All ${priorities.length} URLs have the same priority (${priorities[0]})`,
252
+ recommendation: 'Vary priority values to indicate relative page importance',
253
+ });
254
+ }
255
+ }
256
+ return {
257
+ valid: issues.filter(i => i.type === 'error').length === 0,
258
+ issues,
259
+ parseResult,
260
+ };
261
+ }
262
+ export async function discoverSitemaps(baseUrl, robotsTxtContent, fetcher) {
263
+ const discovered = new Set();
264
+ const base = new URL(baseUrl);
265
+ const commonLocations = [
266
+ '/sitemap.xml',
267
+ '/sitemap_index.xml',
268
+ '/sitemap-index.xml',
269
+ '/sitemaps.xml',
270
+ '/sitemap1.xml',
271
+ '/post-sitemap.xml',
272
+ '/page-sitemap.xml',
273
+ ];
274
+ if (robotsTxtContent) {
275
+ const lines = robotsTxtContent.split(/\r?\n/);
276
+ for (const line of lines) {
277
+ const match = line.match(/^sitemap:\s*(.+)$/i);
278
+ if (match) {
279
+ try {
280
+ const url = new URL(match[1].trim(), baseUrl).href;
281
+ discovered.add(url);
282
+ }
283
+ catch {
284
+ }
285
+ }
286
+ }
287
+ }
288
+ for (const path of commonLocations) {
289
+ const url = new URL(path, base).href;
290
+ try {
291
+ let response;
292
+ if (fetcher) {
293
+ response = await fetcher(url);
294
+ }
295
+ else {
296
+ const fetchResponse = await fetch(url, { method: 'HEAD' });
297
+ response = { status: fetchResponse.status, text: '' };
298
+ }
299
+ if (response.status === 200) {
300
+ discovered.add(url);
301
+ }
302
+ }
303
+ catch {
304
+ }
305
+ }
306
+ return Array.from(discovered);
307
+ }
308
+ export async function fetchAndValidateSitemap(url, fetcher) {
309
+ try {
310
+ let response;
311
+ if (fetcher) {
312
+ response = await fetcher(url);
313
+ }
314
+ else {
315
+ const fetchResponse = await fetch(url);
316
+ response = {
317
+ status: fetchResponse.status,
318
+ text: await fetchResponse.text(),
319
+ headers: Object.fromEntries(fetchResponse.headers.entries()),
320
+ };
321
+ }
322
+ if (response.status === 404) {
323
+ return {
324
+ exists: false,
325
+ status: 404,
326
+ valid: false,
327
+ issues: [{
328
+ type: 'warning',
329
+ code: 'NOT_FOUND',
330
+ message: 'Sitemap not found (404)',
331
+ recommendation: 'Create a sitemap.xml to help search engines discover your content',
332
+ }],
333
+ parseResult: {
334
+ type: 'unknown',
335
+ valid: false,
336
+ errors: [],
337
+ warnings: [],
338
+ urls: [],
339
+ sitemaps: [],
340
+ urlCount: 0,
341
+ size: 0,
342
+ compressed: false,
343
+ },
344
+ };
345
+ }
346
+ if (response.status >= 400) {
347
+ return {
348
+ exists: false,
349
+ status: response.status,
350
+ valid: false,
351
+ issues: [{
352
+ type: 'error',
353
+ code: 'FETCH_ERROR',
354
+ message: `Failed to fetch sitemap (HTTP ${response.status})`,
355
+ }],
356
+ parseResult: {
357
+ type: 'unknown',
358
+ valid: false,
359
+ errors: [],
360
+ warnings: [],
361
+ urls: [],
362
+ sitemaps: [],
363
+ urlCount: 0,
364
+ size: 0,
365
+ compressed: false,
366
+ },
367
+ };
368
+ }
369
+ const baseUrl = new URL(url).origin;
370
+ const validation = validateSitemap(response.text, baseUrl);
371
+ return {
372
+ ...validation,
373
+ exists: true,
374
+ status: response.status,
375
+ };
376
+ }
377
+ catch (error) {
378
+ return {
379
+ exists: false,
380
+ valid: false,
381
+ issues: [{
382
+ type: 'error',
383
+ code: 'FETCH_ERROR',
384
+ message: `Failed to fetch sitemap: ${error instanceof Error ? error.message : 'Unknown error'}`,
385
+ }],
386
+ parseResult: {
387
+ type: 'unknown',
388
+ valid: false,
389
+ errors: [],
390
+ warnings: [],
391
+ urls: [],
392
+ sitemaps: [],
393
+ urlCount: 0,
394
+ size: 0,
395
+ compressed: false,
396
+ },
397
+ };
398
+ }
399
+ }
400
+ function isValidDate(dateString) {
401
+ const patterns = [
402
+ /^\d{4}-\d{2}-\d{2}$/,
403
+ /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}([+-]\d{2}:\d{2}|Z)$/,
404
+ /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}([+-]\d{2}:\d{2}|Z)$/,
405
+ ];
406
+ if (!patterns.some(p => p.test(dateString))) {
407
+ return false;
408
+ }
409
+ const date = new Date(dateString);
410
+ return !isNaN(date.getTime());
411
+ }
412
+ function normalizeUrl(url) {
413
+ try {
414
+ const parsed = new URL(url);
415
+ let path = parsed.pathname;
416
+ if (path !== '/' && path.endsWith('/')) {
417
+ path = path.slice(0, -1);
418
+ }
419
+ return `${parsed.protocol}//${parsed.host}${path}${parsed.search}`.toLowerCase();
420
+ }
421
+ catch {
422
+ return url.toLowerCase();
423
+ }
424
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "recker",
3
- "version": "1.0.32",
3
+ "version": "1.0.33-next.bbc56eb",
4
4
  "description": "AI & DevX focused HTTP client for Node.js 18+",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",