recker 1.0.30 → 1.0.32-next.02f2bae

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/cli/index.js +2653 -197
  2. package/dist/cli/tui/shell-search.js +10 -8
  3. package/dist/cli/tui/shell.d.ts +29 -0
  4. package/dist/cli/tui/shell.js +1733 -9
  5. package/dist/mcp/search/hybrid-search.js +4 -2
  6. package/dist/seo/analyzer.d.ts +7 -0
  7. package/dist/seo/analyzer.js +200 -4
  8. package/dist/seo/rules/ai-search.d.ts +2 -0
  9. package/dist/seo/rules/ai-search.js +423 -0
  10. package/dist/seo/rules/canonical.d.ts +12 -0
  11. package/dist/seo/rules/canonical.js +249 -0
  12. package/dist/seo/rules/crawl.js +113 -0
  13. package/dist/seo/rules/cwv.js +0 -95
  14. package/dist/seo/rules/i18n.js +27 -0
  15. package/dist/seo/rules/images.js +23 -27
  16. package/dist/seo/rules/index.js +14 -0
  17. package/dist/seo/rules/internal-linking.js +6 -6
  18. package/dist/seo/rules/links.js +321 -0
  19. package/dist/seo/rules/meta.js +24 -0
  20. package/dist/seo/rules/mobile.js +0 -20
  21. package/dist/seo/rules/performance.js +124 -0
  22. package/dist/seo/rules/redirects.d.ts +16 -0
  23. package/dist/seo/rules/redirects.js +193 -0
  24. package/dist/seo/rules/resources.d.ts +2 -0
  25. package/dist/seo/rules/resources.js +373 -0
  26. package/dist/seo/rules/security.js +290 -0
  27. package/dist/seo/rules/technical-advanced.d.ts +10 -0
  28. package/dist/seo/rules/technical-advanced.js +283 -0
  29. package/dist/seo/rules/technical.js +74 -18
  30. package/dist/seo/rules/types.d.ts +103 -3
  31. package/dist/seo/seo-spider.d.ts +2 -0
  32. package/dist/seo/seo-spider.js +47 -2
  33. package/dist/seo/types.d.ts +48 -28
  34. package/dist/seo/utils/index.d.ts +1 -0
  35. package/dist/seo/utils/index.js +1 -0
  36. package/dist/seo/utils/similarity.d.ts +47 -0
  37. package/dist/seo/utils/similarity.js +273 -0
  38. package/dist/seo/validators/index.d.ts +3 -0
  39. package/dist/seo/validators/index.js +3 -0
  40. package/dist/seo/validators/llms-txt.d.ts +57 -0
  41. package/dist/seo/validators/llms-txt.js +317 -0
  42. package/dist/seo/validators/robots.d.ts +54 -0
  43. package/dist/seo/validators/robots.js +382 -0
  44. package/dist/seo/validators/sitemap.d.ts +69 -0
  45. package/dist/seo/validators/sitemap.js +424 -0
  46. package/package.json +1 -1
@@ -1,5 +1,4 @@
1
1
  import { createResult } from './types.js';
2
- import { SEO_THRESHOLDS } from './thresholds.js';
3
2
  export const technicalRules = [
4
3
  {
5
4
  id: 'canonical-exists',
@@ -71,23 +70,6 @@ export const technicalRules = [
71
70
  return createResult({ id: 'favicon-exists', name: 'Favicon', category: 'technical', severity: 'warning' }, 'pass', 'Favicon is defined', { value: ctx.faviconUrl });
72
71
  },
73
72
  },
74
- {
75
- id: 'url-length',
76
- name: 'URL Length',
77
- category: 'technical',
78
- severity: 'info',
79
- description: 'URL should be under 75 characters',
80
- check: (ctx) => {
81
- if (!ctx.url)
82
- return null;
83
- const len = ctx.urlLength ?? ctx.url.length;
84
- const max = SEO_THRESHOLDS.url.maxLength;
85
- if (len > max) {
86
- return createResult({ id: 'url-length', name: 'URL Length', category: 'technical', severity: 'info' }, 'info', `URL is long (${len} chars)`, { value: len, recommendation: `Keep URLs under ${max} characters when possible` });
87
- }
88
- return null;
89
- },
90
- },
91
73
  {
92
74
  id: 'url-lowercase',
93
75
  name: 'URL Lowercase',
@@ -220,4 +202,78 @@ export const technicalRules = [
220
202
  return createResult({ id: 'technical-robots-txt-hint', name: 'Robots.txt Hint', category: 'technical', severity: 'info' }, 'info', 'Robots.txt existence cannot be verified from HTML alone.', { recommendation: 'Ensure a valid `robots.txt` file is present at your domain root (e.g., `https://example.com/robots.txt`) to guide search engine crawlers and define your sitemap location.' });
221
203
  },
222
204
  },
205
+ {
206
+ id: 'url-many-parameters',
207
+ name: 'Too Many URL Parameters',
208
+ category: 'technical',
209
+ severity: 'warning',
210
+ description: 'URLs should not have more than 3 query parameters',
211
+ check: (ctx) => {
212
+ if (!ctx.url)
213
+ return null;
214
+ try {
215
+ const url = new URL(ctx.url);
216
+ const paramCount = Array.from(url.searchParams.keys()).length;
217
+ if (paramCount > 3) {
218
+ return createResult({ id: 'url-many-parameters', name: 'Too Many URL Parameters', category: 'technical', severity: 'warning' }, 'warn', `URL has ${paramCount} query parameters`, {
219
+ value: paramCount,
220
+ recommendation: 'Reduce URL parameters to 3 or fewer for better crawlability',
221
+ evidence: {
222
+ found: url.search,
223
+ expected: '3 or fewer parameters',
224
+ impact: 'Multiple parameters make URLs less enticing and may cause indexing issues'
225
+ }
226
+ });
227
+ }
228
+ }
229
+ catch {
230
+ }
231
+ return null;
232
+ },
233
+ },
234
+ {
235
+ id: 'deprecated-plugins',
236
+ name: 'Deprecated Plugins',
237
+ category: 'technical',
238
+ severity: 'error',
239
+ description: 'Pages should not use Flash, Java Applets, or Silverlight',
240
+ check: (ctx) => {
241
+ if (ctx.hasDeprecatedPlugins === undefined)
242
+ return null;
243
+ if (ctx.hasDeprecatedPlugins) {
244
+ return createResult({ id: 'deprecated-plugins', name: 'Deprecated Plugins', category: 'technical', severity: 'error' }, 'fail', 'Page uses deprecated plugins (Flash, Java, or Silverlight)', {
245
+ recommendation: 'Convert plugin content to HTML5',
246
+ evidence: {
247
+ found: ctx.deprecatedPluginTypes?.join(', ') || 'Flash/Java/Silverlight detected',
248
+ expected: 'No deprecated plugins',
249
+ impact: 'These plugins do not work on mobile devices and cannot be crawled properly',
250
+ learnMore: 'https://developers.google.com/search/docs/crawling-indexing/mobile/mobile-sites-mobile-first-indexing'
251
+ }
252
+ });
253
+ }
254
+ return null;
255
+ },
256
+ },
257
+ {
258
+ id: 'frame-tags',
259
+ name: 'Frame Tags',
260
+ category: 'technical',
261
+ severity: 'error',
262
+ description: 'Pages should not use <frame> or <frameset> tags',
263
+ check: (ctx) => {
264
+ if (ctx.hasFrameTags === undefined)
265
+ return null;
266
+ if (ctx.hasFrameTags) {
267
+ return createResult({ id: 'frame-tags', name: 'Frame Tags', category: 'technical', severity: 'error' }, 'fail', 'Page uses <frame> or <frameset> tags', {
268
+ recommendation: 'Remove frame tags and restructure using modern HTML',
269
+ evidence: {
270
+ found: '<frame> or <frameset> tags detected',
271
+ expected: 'No frame tags',
272
+ impact: 'Search engines have difficulty indexing content within frames, affecting rankings'
273
+ }
274
+ });
275
+ }
276
+ return null;
277
+ },
278
+ },
223
279
  ];
@@ -1,7 +1,7 @@
1
1
  import type { SeoStatus } from '../types.js';
2
2
  import type { ExtractedLink } from '../../scrape/types.js';
3
3
  export type RuleSeverity = 'error' | 'warning' | 'info';
4
- export type RuleCategory = 'title' | 'meta' | 'og' | 'twitter' | 'headings' | 'images' | 'links' | 'content' | 'technical' | 'security' | 'mobile' | 'structured-data' | 'performance' | 'accessibility';
4
+ export type RuleCategory = 'title' | 'meta' | 'og' | 'twitter' | 'headings' | 'images' | 'links' | 'content' | 'technical' | 'security' | 'mobile' | 'structured-data' | 'performance' | 'accessibility' | 'ai-search' | 'resources' | 'crawlability' | 'canonicalization';
5
5
  export interface RuleContext {
6
6
  title?: string;
7
7
  titleLength?: number;
@@ -39,6 +39,8 @@ export interface RuleContext {
39
39
  altTextLengths?: number[];
40
40
  imageFilenames?: string[];
41
41
  imagesWithAsyncDecoding?: number;
42
+ brokenExternalImages?: number;
43
+ brokenExternalImageUrls?: string[];
42
44
  buttonsWithoutAriaLabel?: number;
43
45
  linksWithoutAriaLabel?: number;
44
46
  inputsWithoutLabel?: number;
@@ -76,6 +78,10 @@ export interface RuleContext {
76
78
  linksWithGenericText?: number;
77
79
  externalLinksWithoutNoopener?: number;
78
80
  externalLinksWithoutNoreferrer?: number;
81
+ linksToResources?: number;
82
+ resourceLinkUrls?: string[];
83
+ forbidden403Links?: number;
84
+ forbidden403LinkUrls?: string[];
79
85
  problematicLinks?: {
80
86
  withoutText?: ExtractedLink[];
81
87
  genericText?: ExtractedLink[];
@@ -126,6 +132,9 @@ export interface RuleContext {
126
132
  hasMixedContent?: boolean;
127
133
  responseHeaders?: Record<string, string | string[]>;
128
134
  textHtmlRatio?: number;
135
+ hasDeprecatedPlugins?: boolean;
136
+ deprecatedPluginTypes?: string[];
137
+ hasFrameTags?: boolean;
129
138
  hasFavicon?: boolean;
130
139
  faviconUrl?: string;
131
140
  hasPreconnect?: boolean;
@@ -156,6 +165,7 @@ export interface RuleContext {
156
165
  }>;
157
166
  ogLocale?: string;
158
167
  alternateLanguages?: string[];
168
+ detectedLanguage?: string;
159
169
  titleMatchesH1?: boolean;
160
170
  urlHasUppercase?: boolean;
161
171
  urlHasSpecialChars?: boolean;
@@ -257,9 +267,16 @@ export interface RuleContext {
257
267
  hasSitemapLink?: boolean;
258
268
  sitemapUrl?: string;
259
269
  robotsHasSitemap?: boolean;
270
+ robotsTxtExists?: boolean;
271
+ robotsTxtHasSitemap?: boolean;
260
272
  isPaginatedPage?: boolean;
261
273
  hasRelPrev?: boolean;
262
274
  hasRelNext?: boolean;
275
+ blockedResources?: number;
276
+ blockedResourceUrls?: string[];
277
+ blockedExternalResources?: number;
278
+ blockedExternalResourceUrls?: string[];
279
+ xRobotsTag?: string;
263
280
  hasDoctype?: boolean;
264
281
  httpStatusCode?: number;
265
282
  uncrawlableLinksCount?: number;
@@ -268,6 +285,10 @@ export interface RuleContext {
268
285
  structuredDataErrors?: number;
269
286
  isIndexable?: boolean;
270
287
  httpRedirectsToHttps?: boolean;
288
+ sniSupported?: boolean;
289
+ sitemapHttpUrls?: number;
290
+ sitemapHttpUrlsList?: string[];
291
+ hasHsts?: boolean;
271
292
  passiveVoicePercentage?: number;
272
293
  transitionWordPercentage?: number;
273
294
  consecutiveSentenceStarts?: number;
@@ -305,9 +326,88 @@ export interface RuleContext {
305
326
  contextualLinkCount?: number;
306
327
  incomingInternalLinks?: number;
307
328
  selfReferencingLinks?: number;
308
- brokenInternalLinks?: number;
309
- redirectChainLinks?: number;
329
+ brokenInternalLinks?: string[];
330
+ brokenExternalLinks?: string[];
331
+ redirectChainLinks?: Array<{
332
+ from: string;
333
+ to: string;
334
+ hops: number;
335
+ }>;
310
336
  pageClickDepth?: number;
337
+ clickDepth?: number;
338
+ isStartPage?: boolean;
339
+ nofollowInternalLinks?: number;
340
+ llmsTxt?: {
341
+ exists: boolean;
342
+ valid: boolean;
343
+ issues?: Array<{
344
+ message: string;
345
+ }>;
346
+ parseResult?: {
347
+ siteName?: string;
348
+ siteDescription?: string;
349
+ sections: Array<{
350
+ title: string;
351
+ }>;
352
+ links: Array<{
353
+ text: string;
354
+ url: string;
355
+ }>;
356
+ };
357
+ };
358
+ robotsTxt?: {
359
+ parseResult?: {
360
+ userAgentBlocks: Array<{
361
+ userAgents: string[];
362
+ rules: Array<{
363
+ type: string;
364
+ path: string;
365
+ }>;
366
+ }>;
367
+ };
368
+ };
369
+ headings?: {
370
+ structure: Array<{
371
+ level: number;
372
+ text: string;
373
+ }>;
374
+ h1Count?: number;
375
+ hasProperHierarchy?: boolean;
376
+ };
377
+ lastModified?: string;
378
+ semanticHtmlRatio?: number;
379
+ jsFilesCount?: number;
380
+ jsTotalSize?: number;
381
+ renderBlockingJs?: number;
382
+ cssFilesCount?: number;
383
+ cssTotalSize?: number;
384
+ hasCriticalCss?: boolean;
385
+ largeImages?: string[];
386
+ imagesTotal?: number;
387
+ modernFormatImages?: number;
388
+ fontFilesCount?: number;
389
+ hasFontDisplaySwap?: boolean;
390
+ totalRequests?: number;
391
+ totalPageSize?: number;
392
+ uncompressedResources?: number;
393
+ resourcesWithoutCaching?: number;
394
+ brokenExternalResources?: number;
395
+ brokenExternalResourceUrls?: string[];
396
+ unminifiedResources?: number;
397
+ unminifiedResourceUrls?: string[];
398
+ sslCertificate?: {
399
+ valid: boolean;
400
+ error?: string;
401
+ expiryDate?: string;
402
+ nameMismatch?: boolean;
403
+ commonName?: string;
404
+ expectedDomain?: string;
405
+ issuer?: string;
406
+ selfSigned?: boolean;
407
+ };
408
+ tlsVersion?: string;
409
+ hasPasswordField?: boolean;
410
+ formsOnHttp?: number;
311
411
  }
312
412
  export interface RuleEvidence {
313
413
  found?: string | number | string[];
@@ -4,6 +4,8 @@ export interface SeoSpiderOptions extends SpiderOptions {
4
4
  seo?: boolean;
5
5
  output?: string;
6
6
  onSeoAnalysis?: (result: SeoPageResult) => void;
7
+ focusCategories?: string[];
8
+ focusMode?: 'all' | 'links' | 'duplicates' | 'security' | 'ai' | 'resources';
7
9
  }
8
10
  export interface SeoPageResult extends SpiderPageResult {
9
11
  seoReport?: SeoReport;
@@ -62,7 +62,13 @@ export class SeoSpider {
62
62
  try {
63
63
  const response = await client.get(page.url);
64
64
  const html = await response.text();
65
- const seoReport = await analyzeSeo(html, { baseUrl: page.url });
65
+ const rulesOptions = this.options.focusCategories?.length
66
+ ? { categories: this.options.focusCategories }
67
+ : undefined;
68
+ const seoReport = await analyzeSeo(html, {
69
+ baseUrl: page.url,
70
+ rules: rulesOptions,
71
+ });
66
72
  const seoPage = {
67
73
  ...page,
68
74
  seoReport,
@@ -145,11 +151,49 @@ export class SeoSpider {
145
151
  return sum;
146
152
  }, 0);
147
153
  const score = checks.length > 0 ? Math.round(scoreSum / checks.length) : 0;
154
+ const passed = checks.filter(c => c.status === 'pass').length;
155
+ const warnings = checks.filter(c => c.status === 'warn').length;
156
+ const errors = checks.filter(c => c.status === 'fail').length;
157
+ const infos = checks.filter(c => c.status === 'info').length;
158
+ const passRate = checks.length > 0 ? Math.round((passed / checks.length) * 100) : 0;
148
159
  return {
149
160
  url: page.url,
150
161
  timestamp: new Date(),
151
162
  grade: this.scoreToGrade(score),
152
163
  score,
164
+ summary: {
165
+ totalChecks: checks.length,
166
+ passed,
167
+ warnings,
168
+ errors,
169
+ infos,
170
+ passRate,
171
+ issuesByCategory: {},
172
+ topIssues: checks
173
+ .filter(c => c.status === 'fail' || c.status === 'warn')
174
+ .slice(0, 5)
175
+ .map(c => ({
176
+ name: c.name,
177
+ message: c.message,
178
+ category: 'general',
179
+ severity: c.status === 'fail' ? 'error' : 'warning',
180
+ })),
181
+ quickWins: [],
182
+ vitals: {
183
+ wordCount: 0,
184
+ readingTime: 0,
185
+ imageCount: 0,
186
+ linkCount: page.links.length,
187
+ },
188
+ completeness: {
189
+ meta: 0,
190
+ social: 0,
191
+ technical: 0,
192
+ content: 0,
193
+ images: 0,
194
+ links: 0,
195
+ },
196
+ },
153
197
  checks,
154
198
  title: page.title ? { text: page.title, length: page.title.length } : undefined,
155
199
  headings: {
@@ -216,9 +260,10 @@ export class SeoSpider {
216
260
  hasCharset: false,
217
261
  hasLang: false,
218
262
  },
219
- jsonLd: {
263
+ structuredData: {
220
264
  count: 0,
221
265
  types: [],
266
+ items: [],
222
267
  },
223
268
  };
224
269
  }
@@ -38,6 +38,8 @@ export interface ContentMetrics {
38
38
  listCount: number;
39
39
  strongTagCount: number;
40
40
  emTagCount: number;
41
+ fleschReadingEase?: number;
42
+ hasQuestionHeadings?: boolean;
41
43
  }
42
44
  export interface LinkAnalysis {
43
45
  total: number;
@@ -46,6 +48,8 @@ export interface LinkAnalysis {
46
48
  nofollow: number;
47
49
  broken: number;
48
50
  withoutText: number;
51
+ sponsoredLinks: number;
52
+ ugcLinks: number;
49
53
  }
50
54
  export interface ImageAnalysis {
51
55
  total: number;
@@ -94,11 +98,50 @@ export interface SeoTiming {
94
98
  tls?: number;
95
99
  download?: number;
96
100
  }
101
+ export interface SeoSummary {
102
+ totalChecks: number;
103
+ passed: number;
104
+ warnings: number;
105
+ errors: number;
106
+ infos: number;
107
+ passRate: number;
108
+ issuesByCategory: Record<string, {
109
+ passed: number;
110
+ warnings: number;
111
+ errors: number;
112
+ }>;
113
+ topIssues: Array<{
114
+ name: string;
115
+ message: string;
116
+ category: string;
117
+ severity: 'error' | 'warning';
118
+ }>;
119
+ quickWins: string[];
120
+ vitals: {
121
+ htmlSize?: number;
122
+ domElements?: number;
123
+ ttfb?: number;
124
+ totalTime?: number;
125
+ wordCount: number;
126
+ readingTime: number;
127
+ imageCount: number;
128
+ linkCount: number;
129
+ };
130
+ completeness: {
131
+ meta: number;
132
+ social: number;
133
+ technical: number;
134
+ content: number;
135
+ images: number;
136
+ links: number;
137
+ };
138
+ }
97
139
  export interface SeoReport {
98
140
  url: string;
99
141
  timestamp: Date;
100
142
  grade: string;
101
143
  score: number;
144
+ summary: SeoSummary;
102
145
  timing?: SeoTiming;
103
146
  checks: SeoCheckResult[];
104
147
  title?: {
@@ -124,16 +167,17 @@ export interface SeoReport {
124
167
  image?: string;
125
168
  site?: string;
126
169
  };
170
+ structuredData: {
171
+ count: number;
172
+ types: string[];
173
+ items: Record<string, unknown>[];
174
+ };
127
175
  headings: HeadingAnalysis;
128
176
  content: ContentMetrics;
129
177
  links: LinkAnalysis;
130
178
  images: ImageAnalysis;
131
179
  social: SocialMetaAnalysis;
132
180
  technical: TechnicalSeo;
133
- jsonLd: {
134
- count: number;
135
- types: string[];
136
- };
137
181
  }
138
182
  export interface SeoAnalyzerOptions {
139
183
  baseUrl?: string;
@@ -158,27 +202,3 @@ export interface ExtractedImage {
158
202
  srcset?: string;
159
203
  loading?: 'lazy' | 'eager';
160
204
  }
161
- export interface LinkAnalysis {
162
- total: number;
163
- internal: number;
164
- external: number;
165
- nofollow: number;
166
- broken: number;
167
- withoutText: number;
168
- sponsoredLinks: number;
169
- ugcLinks: number;
170
- }
171
- export interface ContentMetrics {
172
- wordCount: number;
173
- characterCount: number;
174
- sentenceCount: number;
175
- paragraphCount: number;
176
- readingTimeMinutes: number;
177
- avgWordsPerSentence: number;
178
- avgParagraphLength: number;
179
- listCount: number;
180
- strongTagCount: number;
181
- emTagCount: number;
182
- fleschReadingEase?: number;
183
- hasQuestionHeadings?: boolean;
184
- }
@@ -0,0 +1 @@
1
+ export * from './similarity.js';
@@ -0,0 +1 @@
1
+ export * from './similarity.js';
@@ -0,0 +1,47 @@
1
+ export interface SimilarityResult {
2
+ urlA: string;
3
+ urlB: string;
4
+ similarity: number;
5
+ type: 'exact' | 'near-duplicate' | 'similar' | 'different';
6
+ simhashDistance?: number;
7
+ jaccardIndex?: number;
8
+ }
9
+ export interface DuplicateGroup {
10
+ type: 'title' | 'description' | 'h1' | 'content';
11
+ value: string;
12
+ urls: string[];
13
+ similarity?: number;
14
+ }
15
+ export interface ContentFingerprint {
16
+ url: string;
17
+ simhash: bigint;
18
+ shingles: Set<number>;
19
+ wordCount: number;
20
+ normalizedText: string;
21
+ }
22
+ export declare const DEFAULT_SIMILARITY_THRESHOLD = 85;
23
+ export declare function normalizeText(text: string, removeStopWords?: boolean): string;
24
+ export declare function tokenize(text: string): string[];
25
+ export declare function createShingles(tokens: string[], size?: number): Set<number>;
26
+ export declare function calculateSimHash(text: string): bigint;
27
+ export declare function hammingDistance(hash1: bigint, hash2: bigint): number;
28
+ export declare function simhashSimilarity(hash1: bigint, hash2: bigint): number;
29
+ export declare function jaccardSimilarity(setA: Set<number>, setB: Set<number>): number;
30
+ export declare function createFingerprint(url: string, text: string): ContentFingerprint;
31
+ export declare function compareFingerprints(fpA: ContentFingerprint, fpB: ContentFingerprint): SimilarityResult;
32
+ export declare function findDuplicateContent(pages: Array<{
33
+ url: string;
34
+ content: string;
35
+ }>, threshold?: number): SimilarityResult[];
36
+ export declare function findDuplicateMetadata(pages: Array<{
37
+ url: string;
38
+ title?: string;
39
+ description?: string;
40
+ h1?: string;
41
+ }>, threshold?: number): DuplicateGroup[];
42
+ export declare function calculateStringSimilarity(a: string, b: string): number;
43
+ export declare function calculateTextToHtmlRatio(html: string, text: string): number;
44
+ export declare function isThinContent(wordCount: number, textToHtmlRatio?: number, minWords?: number, minRatio?: number): {
45
+ isThin: boolean;
46
+ reason?: string;
47
+ };