recker 1.0.72 → 1.0.75-next.2e5a94f

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/README.md +5 -18
  2. package/dist/browser/core/client.d.ts +14 -8
  3. package/dist/browser/core/client.js +199 -17
  4. package/dist/browser/core/errors.d.ts +15 -1
  5. package/dist/browser/core/errors.js +140 -9
  6. package/dist/browser/core/request.d.ts +5 -0
  7. package/dist/browser/core/request.js +33 -2
  8. package/dist/browser/core-runtime/plugin-manifest.d.ts +24 -0
  9. package/dist/browser/core-runtime/plugin-manifest.js +159 -0
  10. package/dist/browser/core-runtime/request-context.d.ts +13 -0
  11. package/dist/browser/core-runtime/request-context.js +24 -0
  12. package/dist/browser/core-runtime/typed-events.d.ts +89 -0
  13. package/dist/browser/core-runtime/typed-events.js +34 -0
  14. package/dist/browser/index.iife.min.js +79 -79
  15. package/dist/browser/index.min.js +79 -79
  16. package/dist/browser/index.mini.iife.js +913 -97
  17. package/dist/browser/index.mini.iife.min.js +46 -46
  18. package/dist/browser/index.mini.min.js +46 -46
  19. package/dist/browser/index.mini.umd.js +913 -97
  20. package/dist/browser/index.mini.umd.min.js +46 -46
  21. package/dist/browser/index.umd.min.js +79 -79
  22. package/dist/browser/plugins/auth/aws-sigv4.d.ts +1 -0
  23. package/dist/browser/plugins/auth/aws-sigv4.js +19 -2
  24. package/dist/browser/plugins/retry.js +29 -1
  25. package/dist/browser/presets/aws.d.ts +1 -0
  26. package/dist/browser/presets/aws.js +62 -1
  27. package/dist/browser/runner/request-runner.d.ts +15 -5
  28. package/dist/browser/runner/request-runner.js +164 -30
  29. package/dist/browser/scrape/parser/nodes/html.d.ts +6 -0
  30. package/dist/browser/scrape/parser/nodes/html.js +70 -18
  31. package/dist/browser/scrape/parser/nodes/node.d.ts +1 -0
  32. package/dist/browser/scrape/parser/nodes/node.js +5 -0
  33. package/dist/browser/scrape/spider.d.ts +1 -0
  34. package/dist/browser/scrape/spider.js +39 -26
  35. package/dist/browser/seo/analyzer.d.ts +1 -1
  36. package/dist/browser/seo/analyzer.js +73 -42
  37. package/dist/browser/seo/index.d.ts +1 -1
  38. package/dist/browser/seo/rules/types.d.ts +2 -0
  39. package/dist/browser/seo/seo-spider.d.ts +2 -3
  40. package/dist/browser/seo/seo-spider.js +26 -202
  41. package/dist/browser/seo/types.d.ts +4 -0
  42. package/dist/browser/seo/validators/sitemap.js +9 -2
  43. package/dist/browser/transport/fetch.js +38 -5
  44. package/dist/browser/transport/undici.js +73 -11
  45. package/dist/browser/transport/worker.d.ts +0 -1
  46. package/dist/browser/transport/worker.js +1 -3
  47. package/dist/browser/types/index.d.ts +24 -0
  48. package/dist/cli/commands/mcp.js +5 -3
  49. package/dist/core/client.d.ts +14 -8
  50. package/dist/core/client.js +199 -17
  51. package/dist/core/errors.d.ts +15 -1
  52. package/dist/core/errors.js +140 -9
  53. package/dist/core/request.d.ts +5 -0
  54. package/dist/core/request.js +33 -2
  55. package/dist/core-runtime/plugin-manifest.d.ts +24 -0
  56. package/dist/core-runtime/plugin-manifest.js +159 -0
  57. package/dist/core-runtime/request-context.d.ts +13 -0
  58. package/dist/core-runtime/request-context.js +24 -0
  59. package/dist/core-runtime/typed-events.d.ts +89 -0
  60. package/dist/core-runtime/typed-events.js +34 -0
  61. package/dist/index.d.ts +2 -1
  62. package/dist/index.js +2 -1
  63. package/dist/mcp/cli.js +10 -8
  64. package/dist/mcp/profiles.d.ts +1 -1
  65. package/dist/mcp/profiles.js +31 -6
  66. package/dist/mcp/tools/categories.js +0 -1
  67. package/dist/mcp/tools/seo.js +320 -4
  68. package/dist/plugins/auth/aws-sigv4.d.ts +1 -0
  69. package/dist/plugins/auth/aws-sigv4.js +19 -2
  70. package/dist/plugins/retry.js +29 -1
  71. package/dist/presets/aws.d.ts +1 -0
  72. package/dist/presets/aws.js +62 -1
  73. package/dist/recker.d.ts +3 -0
  74. package/dist/recker.js +5 -0
  75. package/dist/runner/request-runner.d.ts +15 -5
  76. package/dist/runner/request-runner.js +164 -30
  77. package/dist/scrape/parser/nodes/html.d.ts +6 -0
  78. package/dist/scrape/parser/nodes/html.js +70 -18
  79. package/dist/scrape/parser/nodes/node.d.ts +1 -0
  80. package/dist/scrape/parser/nodes/node.js +5 -0
  81. package/dist/scrape/spider.d.ts +1 -0
  82. package/dist/scrape/spider.js +39 -26
  83. package/dist/search/google.d.ts +67 -0
  84. package/dist/search/google.js +480 -0
  85. package/dist/search/index.d.ts +3 -0
  86. package/dist/search/index.js +1 -0
  87. package/dist/seo/analyzer.d.ts +1 -1
  88. package/dist/seo/analyzer.js +73 -42
  89. package/dist/seo/index.d.ts +1 -1
  90. package/dist/seo/rules/types.d.ts +2 -0
  91. package/dist/seo/seo-spider.d.ts +2 -3
  92. package/dist/seo/seo-spider.js +26 -202
  93. package/dist/seo/types.d.ts +4 -0
  94. package/dist/seo/validators/sitemap.js +9 -2
  95. package/dist/transport/fetch.js +38 -5
  96. package/dist/transport/undici.js +73 -11
  97. package/dist/transport/worker.d.ts +0 -1
  98. package/dist/transport/worker.js +1 -3
  99. package/dist/types/index.d.ts +24 -0
  100. package/dist/version.js +1 -1
  101. package/package.json +9 -1
@@ -1,7 +1,7 @@
1
1
  import { parse } from '../scrape/parser/index.js';
2
2
  import { extractMeta, extractOpenGraph, extractTwitterCard, extractJsonLd, extractLinks, extractImages, } from '../scrape/extractors.js';
3
3
  import { generateKeywordCloud } from './keywords.js';
4
- import { createRulesEngine, SEO_THRESHOLDS, } from './rules/index.js';
4
+ import { createRulesEngine, SEO_THRESHOLDS, calculateWeightedScore, } from './rules/index.js';
5
5
  export class SeoAnalyzer {
6
6
  root;
7
7
  options;
@@ -47,7 +47,9 @@ export class SeoAnalyzer {
47
47
  const analytics = this.analyzeAnalytics();
48
48
  const feeds = this.analyzeFeeds();
49
49
  const conversion = this.analyzeConversionElements(links, visibleText);
50
+ const pageType = this.detectPageType(jsonLd);
50
51
  const context = this.buildRuleContext({
52
+ pageType,
51
53
  meta,
52
54
  og,
53
55
  twitter,
@@ -68,7 +70,7 @@ export class SeoAnalyzer {
68
70
  });
69
71
  const ruleResults = this.rulesEngine.evaluate(context);
70
72
  const checks = this.convertToCheckResults(ruleResults);
71
- const { score, grade } = this.calculateScore(checks);
73
+ const { score, grade } = this.calculateScore(ruleResults);
72
74
  const summary = this.buildSummary(ruleResults, checks, {
73
75
  content,
74
76
  imageAnalysis,
@@ -77,13 +79,17 @@ export class SeoAnalyzer {
77
79
  og,
78
80
  twitter,
79
81
  technical,
82
+ pageType,
83
+ timings: this.options.timings,
80
84
  });
81
85
  return {
82
86
  url,
83
87
  timestamp: new Date(),
84
88
  grade,
85
89
  score,
90
+ timing: this.options.timings,
86
91
  summary,
92
+ pageType,
87
93
  checks,
88
94
  title: meta.title
89
95
  ? { text: meta.title, length: meta.title.length }
@@ -134,6 +140,47 @@ export class SeoAnalyzer {
134
140
  return bodies[0];
135
141
  return bodies.reduce((prev, curr) => curr.text.length > prev.text.length ? curr : prev);
136
142
  }
143
+ detectPageType(jsonLd) {
144
+ if (!this.options.baseUrl) {
145
+ return 'other';
146
+ }
147
+ try {
148
+ const parsed = new URL(this.options.baseUrl);
149
+ const pathname = parsed.pathname.toLowerCase();
150
+ const hasQueryKeyword = (value) => parsed.searchParams.has(value);
151
+ if (pathname === '/' || pathname === '') {
152
+ return 'homepage';
153
+ }
154
+ if (/(^|\/)(search|busca|s|results|query)\b/.test(pathname) ||
155
+ hasQueryKeyword('q') ||
156
+ hasQueryKeyword('query') ||
157
+ hasQueryKeyword('search')) {
158
+ return 'search';
159
+ }
160
+ const productSignals = ['product', 'produto', 'item', 'sku', 'shop'];
161
+ if (productSignals.some((segment) => pathname.includes(`/${segment}/`))) {
162
+ return 'product';
163
+ }
164
+ const articleSignals = ['article', 'post', 'blog', 'noticia', 'news'];
165
+ if (articleSignals.some((segment) => pathname.includes(`/${segment}/`)) ||
166
+ this.root.querySelectorAll('article').length > 0) {
167
+ return 'article';
168
+ }
169
+ if (/(^|\/)(categoria|category|tag|section|topic)\b/.test(pathname)) {
170
+ return 'category';
171
+ }
172
+ const hasProductJsonLd = jsonLd
173
+ .map((node) => node['@type'])
174
+ .some((type) => typeof type === 'string' && type.toLowerCase() === 'product');
175
+ if (hasProductJsonLd) {
176
+ return 'product';
177
+ }
178
+ return 'other';
179
+ }
180
+ catch {
181
+ return 'other';
182
+ }
183
+ }
137
184
  getVisibleText() {
138
185
  const body = this.getMainBody();
139
186
  if (!body)
@@ -159,7 +206,7 @@ export class SeoAnalyzer {
159
206
  return clone.text.replace(/\s+/g, ' ').trim();
160
207
  }
161
208
  buildRuleContext(data) {
162
- const { meta, og, twitter, jsonLd, headings, content, linkAnalysis, imageAnalysis, links, keywords, resources, emailsFound, socialLinksFound, socialLinkDetails, analytics, feeds, conversion, } = data;
209
+ const { meta, og, twitter, jsonLd, headings, content, pageType, linkAnalysis, imageAnalysis, links, keywords, resources, emailsFound, socialLinksFound, socialLinkDetails, analytics, feeds, conversion, } = data;
163
210
  const html = this.root.querySelector('html');
164
211
  const htmlLang = html ? html.getAttribute('lang') : undefined;
165
212
  const hreflangTags = [];
@@ -198,7 +245,6 @@ export class SeoAnalyzer {
198
245
  const hasMixedContent = this.checkMixedContent();
199
246
  const h1Elements = this.root.querySelectorAll('h1');
200
247
  const h1Text = h1Elements.length > 0 ? h1Elements[0].text.trim() : '';
201
- const iframeCount = this.root.querySelectorAll('iframe').length;
202
248
  const topKeywords = keywords.topKeywords.slice(0, 5).map(k => k.word);
203
249
  const mainKeyword = topKeywords.length > 0 ? topKeywords[0] : undefined;
204
250
  const keywordsInTitle = topKeywords.some(kw => meta.title?.toLowerCase().includes(kw));
@@ -234,7 +280,6 @@ export class SeoAnalyzer {
234
280
  const structuralHtml = this.analyzeStructuralHtml();
235
281
  const breadcrumbs = this.analyzeBreadcrumbs(jsonLd.map((j) => j['@type']).filter(Boolean));
236
282
  const multimedia = this.analyzeMultimedia();
237
- const advancedImages = this.analyzeAdvancedImages();
238
283
  const responsiveImages = this.analyzeResponsiveImages();
239
284
  const inlineImages = this.analyzeInlineImages();
240
285
  const trustSignals = this.analyzeTrustSignals(links);
@@ -245,6 +290,7 @@ export class SeoAnalyzer {
245
290
  : 0;
246
291
  const textHtmlRatio = this.calculateTextHtmlRatio(content.characterCount);
247
292
  return {
293
+ pageType,
248
294
  jsFilesCount: resources.jsFilesCount,
249
295
  cssFilesCount: resources.cssFilesCount,
250
296
  unminifiedResources: resources.unminifiedResources,
@@ -774,7 +820,6 @@ export class SeoAnalyzer {
774
820
  }
775
821
  analyzeAnalytics() {
776
822
  const providers = [];
777
- const html = this.root.innerHTML || '';
778
823
  const scripts = this.root.querySelectorAll('script');
779
824
  const scriptSources = [];
780
825
  const scriptContents = [];
@@ -899,21 +944,6 @@ export class SeoAnalyzer {
899
944
  hasPhoneOnPage,
900
945
  };
901
946
  }
902
- analyzeAdvancedImages() {
903
- let imagesWithSrcset = 0;
904
- let largeBase64ImagesCount = 0;
905
- const imgs = this.root.querySelectorAll('img');
906
- imgs.forEach((img) => {
907
- if (img.getAttribute('srcset') || (img.parentNode && img.parentNode.tagName === 'PICTURE')) {
908
- imagesWithSrcset++;
909
- }
910
- const src = img.getAttribute('src') || '';
911
- if (src.startsWith('data:image') && src.length > 5 * 1024) {
912
- largeBase64ImagesCount++;
913
- }
914
- });
915
- return { imagesWithSrcset, largeBase64ImagesCount };
916
- }
917
947
  calculateTextHtmlRatio(bodyTextLength) {
918
948
  const htmlSize = this.root.innerHTML?.length;
919
949
  if (htmlSize && htmlSize > 0) {
@@ -926,6 +956,7 @@ export class SeoAnalyzer {
926
956
  id: r.id,
927
957
  name: r.name,
928
958
  category: r.category,
959
+ severity: r.severity,
929
960
  status: r.status,
930
961
  message: r.message,
931
962
  value: r.value,
@@ -934,6 +965,8 @@ export class SeoAnalyzer {
934
965
  }));
935
966
  }
936
967
  buildSummary(ruleResults, checks, data) {
968
+ const pageType = data.pageType;
969
+ const timings = data.timings;
937
970
  const passed = checks.filter((c) => c.status === 'pass').length;
938
971
  const warnings = checks.filter((c) => c.status === 'warn').length;
939
972
  const errors = checks.filter((c) => c.status === 'fail').length;
@@ -954,21 +987,25 @@ export class SeoAnalyzer {
954
987
  else if (result.status === 'fail')
955
988
  issuesByCategory[cat].errors++;
956
989
  }
957
- const topIssues = ruleResults
958
- .filter((r) => r.status === 'fail' || r.status === 'warn')
990
+ const topIssues = checks
991
+ .filter((c) => c.status === 'fail' || c.status === 'warn')
959
992
  .sort((a, b) => {
960
- if (a.status === 'fail' && b.status !== 'fail')
961
- return -1;
962
- if (a.status !== 'fail' && b.status === 'fail')
963
- return 1;
964
- return 0;
993
+ const severityOrder = (status) => status === 'fail' ? 2 : 1;
994
+ const statusDiff = severityOrder(b.status) - severityOrder(a.status);
995
+ if (statusDiff !== 0)
996
+ return statusDiff;
997
+ const aSeverity = a.severity || (a.status === 'fail' ? 'error' : 'warning');
998
+ const bSeverity = b.severity || (b.status === 'fail' ? 'error' : 'warning');
999
+ if (aSeverity === bSeverity)
1000
+ return 0;
1001
+ return aSeverity === 'error' ? -1 : 1;
965
1002
  })
966
1003
  .slice(0, 5)
967
1004
  .map((r) => ({
968
1005
  name: r.name,
969
1006
  message: r.message,
970
1007
  category: r.category,
971
- severity: (r.status === 'fail' ? 'error' : 'warning'),
1008
+ severity: (r.severity || (r.status === 'fail' ? 'error' : 'warning')),
972
1009
  }));
973
1010
  const quickWins = [];
974
1011
  if (!data.meta.title)
@@ -993,8 +1030,8 @@ export class SeoAnalyzer {
993
1030
  const vitals = {
994
1031
  htmlSize,
995
1032
  domElements,
996
- ttfb: this.options.responseHeaders ? undefined : undefined,
997
- totalTime: undefined,
1033
+ ttfb: timings?.ttfb,
1034
+ totalTime: timings?.total,
998
1035
  wordCount: data.content.wordCount,
999
1036
  totalWordCount: data.content.totalWordCount,
1000
1037
  readingTime: data.content.readingTimeMinutes,
@@ -1017,6 +1054,7 @@ export class SeoAnalyzer {
1017
1054
  infos,
1018
1055
  passRate,
1019
1056
  issuesByCategory,
1057
+ pageType: pageType,
1020
1058
  topIssues,
1021
1059
  quickWins: limitedQuickWins,
1022
1060
  vitals,
@@ -1387,18 +1425,11 @@ export class SeoAnalyzer {
1387
1425
  unminifiedResourceUrls: unminified
1388
1426
  };
1389
1427
  }
1390
- calculateScore(checks) {
1391
- const weights = {
1392
- pass: 100,
1393
- warn: 50,
1394
- fail: 0,
1395
- info: 100,
1396
- };
1397
- const scoringChecks = checks.filter((c) => c.status !== 'info');
1398
- if (scoringChecks.length === 0)
1428
+ calculateScore(results) {
1429
+ if (results.length === 0)
1399
1430
  return { score: 100, grade: 'A' };
1400
- const totalWeight = scoringChecks.reduce((sum, check) => sum + weights[check.status], 0);
1401
- const score = Math.round(totalWeight / scoringChecks.length);
1431
+ const { score: weightedScore } = calculateWeightedScore(results);
1432
+ const score = weightedScore;
1402
1433
  let grade;
1403
1434
  if (score >= 90)
1404
1435
  grade = 'A';
@@ -6,7 +6,7 @@ export type { SeoSpiderOptions, SeoPageResult, SiteWideIssue, SeoSpiderResult, }
6
6
  export { SeoRulesEngine, createRulesEngine, SEO_THRESHOLDS, ALL_SEO_RULES, } from './rules/index.js';
7
7
  export { generateSeoFilename, resolveOutputPath, writeReport, formatReportForJson, } from './output.js';
8
8
  export type { SeoOutputType, OutputOptions, WriteOptions } from './output.js';
9
- export type { SeoReport, SeoCheckResult, SeoStatus, SeoTiming, HeadingAnalysis, HeadingInfo, ContentMetrics, LinkAnalysis, ImageAnalysis, SocialMetaAnalysis, TechnicalSeo, SeoAnalyzerOptions, } from './types.js';
9
+ export type { SeoReport, SeoCheckResult, SeoStatus, SeoPageType, SeoTiming, HeadingAnalysis, HeadingInfo, ContentMetrics, LinkAnalysis, ImageAnalysis, SocialMetaAnalysis, TechnicalSeo, SeoAnalyzerOptions, } from './types.js';
10
10
  export type { SeoRule, RuleContext, RuleResult, RuleEvidence, RuleCategory, RuleSeverity, RulesEngineOptions, } from './rules/index.js';
11
11
  export type { SeoAnalyzerFullOptions } from './analyzer.js';
12
12
  export { parseRobotsTxt, validateRobotsTxt, isPathAllowed, fetchAndValidateRobotsTxt, } from './validators/robots.js';
@@ -1,8 +1,10 @@
1
1
  import type { SeoStatus } from '../types.js';
2
2
  import type { ExtractedLink } from '../../scrape/types.js';
3
+ import type { SeoPageType } from '../types.js';
3
4
  export type RuleSeverity = 'error' | 'warning' | 'info';
4
5
  export type RuleCategory = 'title' | 'meta' | 'og' | 'twitter' | 'headings' | 'images' | 'links' | 'content' | 'technical' | 'security' | 'mobile' | 'structured-data' | 'performance' | 'accessibility' | 'i18n' | 'ai-search' | 'resources' | 'crawlability' | 'canonicalization';
5
6
  export interface RuleContext {
7
+ pageType?: SeoPageType;
6
8
  keywordsInTitle?: boolean;
7
9
  keywordsInDescription?: boolean;
8
10
  keywordsInH1?: boolean;
@@ -66,19 +66,18 @@ export interface SeoSpiderResult extends Omit<SpiderResult, 'pages'> {
66
66
  export declare class SeoSpider {
67
67
  private spider;
68
68
  private options;
69
- private seoResults;
70
69
  private seoPages;
71
70
  private homeHtml;
71
+ private normalizeUrl;
72
+ private toHeaderRecord;
72
73
  constructor(options?: SeoSpiderOptions);
73
74
  private analyzePageDuringCrawl;
74
75
  crawl(startUrl: string): Promise<SeoSpiderResult>;
75
76
  private checkSiteFiles;
76
77
  private validateManifest;
77
78
  private validateSitemap;
78
- private createReportFromPageData;
79
79
  private detectSiteWideIssues;
80
80
  private calculateSummary;
81
- private scoreToGrade;
82
81
  private saveReport;
83
82
  abort(): void;
84
83
  isRunning(): boolean;
@@ -7,9 +7,29 @@ import * as fs from 'fs/promises';
7
7
  export class SeoSpider {
8
8
  spider;
9
9
  options;
10
- seoResults = new Map();
11
10
  seoPages = [];
12
11
  homeHtml = '';
12
+ normalizeUrl(url) {
13
+ try {
14
+ const parsed = new URL(url);
15
+ parsed.hash = '';
16
+ parsed.searchParams.sort();
17
+ if (parsed.pathname !== '/' && parsed.pathname.endsWith('/')) {
18
+ parsed.pathname = parsed.pathname.slice(0, -1);
19
+ }
20
+ return parsed.toString();
21
+ }
22
+ catch {
23
+ return url;
24
+ }
25
+ }
26
+ toHeaderRecord(headers) {
27
+ const headerRecord = {};
28
+ headers.forEach((value, key) => {
29
+ headerRecord[key] = value;
30
+ });
31
+ return headerRecord;
32
+ }
13
33
  constructor(options = {}) {
14
34
  this.options = options;
15
35
  this.spider = new Spider({
@@ -36,11 +56,12 @@ export class SeoSpider {
36
56
  : undefined;
37
57
  const seoReport = await analyzeSeo(html, {
38
58
  baseUrl: pageResult.url,
59
+ timings: pageResult.timings,
60
+ htmlSize: pageResult.metrics?.htmlSize,
39
61
  rules: rulesOptions,
40
62
  });
41
63
  const seoPage = { ...pageResult, seoReport };
42
64
  this.seoPages.push(seoPage);
43
- this.seoResults.set(pageResult.url, seoReport);
44
65
  this.options.onSeoAnalysis?.(seoPage);
45
66
  }
46
67
  catch {
@@ -50,7 +71,6 @@ export class SeoSpider {
50
71
  }
51
72
  async crawl(startUrl) {
52
73
  this.seoPages = [];
53
- this.seoResults.clear();
54
74
  this.homeHtml = '';
55
75
  const result = await this.spider.crawl(startUrl);
56
76
  if (!this.options.seo) {
@@ -206,7 +226,7 @@ export class SeoSpider {
206
226
  return {
207
227
  status: res.status,
208
228
  text,
209
- headers: Object.fromEntries([...res.headers.entries()]),
229
+ headers: this.toHeaderRecord(res.headers),
210
230
  };
211
231
  };
212
232
  const result = await fetchAndValidateSitemap(sitemapUrl, fetcher);
@@ -216,191 +236,6 @@ export class SeoSpider {
216
236
  return undefined;
217
237
  }
218
238
  }
219
- createReportFromPageData(page) {
220
- const checks = [];
221
- if (page.title) {
222
- const titleLength = page.title.length;
223
- if (titleLength < 30) {
224
- checks.push({
225
- id: 'title-length',
226
- name: 'Title Length',
227
- category: 'title',
228
- status: 'warn',
229
- message: `Title is ${titleLength} characters`,
230
- value: titleLength,
231
- recommendation: 'Title should be 50-60 characters',
232
- });
233
- }
234
- else if (titleLength > 60) {
235
- checks.push({
236
- id: 'title-length',
237
- name: 'Title Length',
238
- category: 'title',
239
- status: 'warn',
240
- message: `Title is too long (${titleLength} chars)`,
241
- value: titleLength,
242
- recommendation: 'Title should be 50-60 characters',
243
- });
244
- }
245
- else {
246
- checks.push({
247
- id: 'title-length',
248
- name: 'Title Length',
249
- category: 'title',
250
- status: 'pass',
251
- message: `Good title length (${titleLength} chars)`,
252
- value: titleLength,
253
- });
254
- }
255
- }
256
- else {
257
- checks.push({
258
- id: 'title-missing',
259
- name: 'Title',
260
- category: 'title',
261
- status: 'fail',
262
- message: 'Page has no title',
263
- recommendation: 'Add a descriptive <title> tag',
264
- });
265
- }
266
- const internalLinks = page.links.filter(l => l.type === 'internal').length;
267
- const externalLinks = page.links.filter(l => l.type === 'external').length;
268
- if (internalLinks === 0) {
269
- checks.push({
270
- id: 'internal-links',
271
- name: 'Internal Links',
272
- category: 'links',
273
- status: 'warn',
274
- message: 'No internal links found',
275
- recommendation: 'Add internal links to improve site structure',
276
- });
277
- }
278
- else {
279
- checks.push({
280
- id: 'internal-links',
281
- name: 'Internal Links',
282
- category: 'links',
283
- status: 'pass',
284
- message: `${internalLinks} internal links found`,
285
- value: internalLinks,
286
- });
287
- }
288
- const scoreSum = checks.reduce((sum, c) => {
289
- if (c.status === 'pass')
290
- return sum + 100;
291
- if (c.status === 'warn')
292
- return sum + 50;
293
- return sum;
294
- }, 0);
295
- const score = checks.length > 0 ? Math.round(scoreSum / checks.length) : 0;
296
- const passed = checks.filter(c => c.status === 'pass').length;
297
- const warnings = checks.filter(c => c.status === 'warn').length;
298
- const errors = checks.filter(c => c.status === 'fail').length;
299
- const infos = checks.filter(c => c.status === 'info').length;
300
- const passRate = checks.length > 0 ? Math.round((passed / checks.length) * 100) : 0;
301
- return {
302
- url: page.url,
303
- timestamp: new Date(),
304
- grade: this.scoreToGrade(score),
305
- score,
306
- summary: {
307
- totalChecks: checks.length,
308
- passed,
309
- warnings,
310
- errors,
311
- infos,
312
- passRate,
313
- issuesByCategory: {},
314
- topIssues: checks
315
- .filter(c => c.status === 'fail' || c.status === 'warn')
316
- .slice(0, 5)
317
- .map(c => ({
318
- name: c.name,
319
- message: c.message,
320
- category: 'general',
321
- severity: c.status === 'fail' ? 'error' : 'warning',
322
- })),
323
- quickWins: [],
324
- vitals: {
325
- wordCount: 0,
326
- readingTime: 0,
327
- imageCount: 0,
328
- linkCount: page.links.length,
329
- },
330
- completeness: {
331
- meta: 0,
332
- social: 0,
333
- technical: 0,
334
- content: 0,
335
- images: 0,
336
- links: 0,
337
- },
338
- },
339
- checks,
340
- title: page.title ? { text: page.title, length: page.title.length } : undefined,
341
- headings: {
342
- structure: [],
343
- h1Count: 0,
344
- hasProperHierarchy: false,
345
- issues: [],
346
- },
347
- content: {
348
- wordCount: 0,
349
- characterCount: 0,
350
- sentenceCount: 0,
351
- paragraphCount: 0,
352
- readingTimeMinutes: 0,
353
- avgWordsPerSentence: 0,
354
- avgParagraphLength: 0,
355
- listCount: 0,
356
- strongTagCount: 0,
357
- emTagCount: 0,
358
- },
359
- links: {
360
- total: page.links.length,
361
- internal: internalLinks,
362
- external: externalLinks,
363
- nofollow: 0,
364
- broken: 0,
365
- withoutText: page.links.filter(l => !l.text?.trim()).length,
366
- sponsoredLinks: 0,
367
- ugcLinks: 0,
368
- },
369
- images: {
370
- total: 0,
371
- withAlt: 0,
372
- withoutAlt: 0,
373
- lazy: 0,
374
- missingDimensions: 0,
375
- modernFormats: 0,
376
- altTextLengths: [],
377
- imageAltTexts: [],
378
- imageFilenames: [],
379
- imagesWithAsyncDecoding: 0,
380
- },
381
- social: {
382
- openGraph: {
383
- present: false, hasTitle: false, hasDescription: false, hasImage: false, hasUrl: false, issues: []
384
- },
385
- twitterCard: {
386
- present: false, hasCard: false, hasTitle: false, hasDescription: false, hasImage: false, issues: []
387
- },
388
- },
389
- keywords: { totalWords: 0, uniqueWords: 0, topKeywords: [] },
390
- technical: {
391
- hasCanonical: false,
392
- hasRobotsMeta: false,
393
- hasViewport: false,
394
- hasCharset: false,
395
- hasLang: false,
396
- },
397
- structuredData: {
398
- count: 0,
399
- types: [],
400
- items: [],
401
- },
402
- };
403
- }
404
239
  detectSiteWideIssues(pages) {
405
240
  const issues = [];
406
241
  const titleGroups = new Map();
@@ -465,12 +300,12 @@ export class SeoSpider {
465
300
  for (const page of pages) {
466
301
  for (const link of page.links) {
467
302
  if (link.type === 'internal' && link.href) {
468
- linkedUrls.add(link.href);
303
+ linkedUrls.add(this.normalizeUrl(link.href));
469
304
  }
470
305
  }
471
306
  }
472
307
  const orphanPages = pages
473
- .filter(p => !linkedUrls.has(p.url) && p.depth > 0)
308
+ .filter(p => p.depth > 0 && !linkedUrls.has(this.normalizeUrl(p.url)))
474
309
  .map(p => p.url);
475
310
  if (orphanPages.length > 0) {
476
311
  issues.push({
@@ -507,17 +342,6 @@ export class SeoSpider {
507
342
  orphanPages,
508
343
  };
509
344
  }
510
- scoreToGrade(score) {
511
- if (score >= 90)
512
- return 'A';
513
- if (score >= 80)
514
- return 'B';
515
- if (score >= 70)
516
- return 'C';
517
- if (score >= 60)
518
- return 'D';
519
- return 'F';
520
- }
521
345
  async saveReport(result) {
522
346
  if (!this.options.output)
523
347
  return;
@@ -1,6 +1,7 @@
1
1
  import { KeywordCloud } from './keywords.js';
2
2
  export type { KeywordCloud, KeywordItem } from './keywords.js';
3
3
  export type SeoStatus = 'pass' | 'warn' | 'fail' | 'info';
4
+ export type SeoPageType = 'homepage' | 'product' | 'article' | 'category' | 'search' | 'other';
4
5
  export interface SeoCheckEvidence {
5
6
  found?: string | number | string[];
6
7
  expected?: string | number | string[];
@@ -15,6 +16,7 @@ export interface SeoCheckResult {
15
16
  name: string;
16
17
  category: string;
17
18
  status: SeoStatus;
19
+ severity?: 'error' | 'warning' | 'info';
18
20
  message: string;
19
21
  value?: string | number;
20
22
  recommendation?: string;
@@ -107,6 +109,7 @@ export interface SeoTiming {
107
109
  download?: number;
108
110
  }
109
111
  export interface SeoSummary {
112
+ pageType?: SeoPageType;
110
113
  totalChecks: number;
111
114
  passed: number;
112
115
  warnings: number;
@@ -150,6 +153,7 @@ export interface SeoReport {
150
153
  timestamp: Date;
151
154
  grade: string;
152
155
  score: number;
156
+ pageType?: SeoPageType;
153
157
  summary: SeoSummary;
154
158
  timing?: SeoTiming;
155
159
  checks: SeoCheckResult[];
@@ -2,6 +2,13 @@ import { parse } from '../../scrape/parser/index.js';
2
2
  const VALID_CHANGEFREQ = ['always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never'];
3
3
  const MAX_URLS_PER_SITEMAP = 50000;
4
4
  const MAX_SITEMAP_SIZE = 50 * 1024 * 1024;
5
+ function toHeaderRecord(headers) {
6
+ const headerRecord = {};
7
+ headers.forEach((value, key) => {
8
+ headerRecord[key] = value;
9
+ });
10
+ return headerRecord;
11
+ }
5
12
  export function parseSitemap(content, compressed = false) {
6
13
  const errors = [];
7
14
  const warnings = [];
@@ -141,7 +148,7 @@ export function parseSitemap(content, compressed = false) {
141
148
  warnings,
142
149
  urls,
143
150
  sitemaps,
144
- urlCount: type === 'urlset' ? urls.length : sitemaps.reduce((sum, s) => sum + 1, 0),
151
+ urlCount: type === 'urlset' ? urls.length : sitemaps.length,
145
152
  size: content.length,
146
153
  compressed,
147
154
  };
@@ -339,7 +346,7 @@ export async function fetchAndValidateSitemap(url, fetcher) {
339
346
  response = {
340
347
  status: fetchResponse.status,
341
348
  text: await fetchResponse.text(),
342
- headers: Object.fromEntries(fetchResponse.headers.entries()),
349
+ headers: toHeaderRecord(fetchResponse.headers),
343
350
  };
344
351
  }
345
352
  if (response.status === 404) {