recker 1.0.72 → 1.0.75-next.2e5a94f

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/README.md +5 -18
  2. package/dist/browser/core/client.d.ts +14 -8
  3. package/dist/browser/core/client.js +199 -17
  4. package/dist/browser/core/errors.d.ts +15 -1
  5. package/dist/browser/core/errors.js +140 -9
  6. package/dist/browser/core/request.d.ts +5 -0
  7. package/dist/browser/core/request.js +33 -2
  8. package/dist/browser/core-runtime/plugin-manifest.d.ts +24 -0
  9. package/dist/browser/core-runtime/plugin-manifest.js +159 -0
  10. package/dist/browser/core-runtime/request-context.d.ts +13 -0
  11. package/dist/browser/core-runtime/request-context.js +24 -0
  12. package/dist/browser/core-runtime/typed-events.d.ts +89 -0
  13. package/dist/browser/core-runtime/typed-events.js +34 -0
  14. package/dist/browser/index.iife.min.js +79 -79
  15. package/dist/browser/index.min.js +79 -79
  16. package/dist/browser/index.mini.iife.js +913 -97
  17. package/dist/browser/index.mini.iife.min.js +46 -46
  18. package/dist/browser/index.mini.min.js +46 -46
  19. package/dist/browser/index.mini.umd.js +913 -97
  20. package/dist/browser/index.mini.umd.min.js +46 -46
  21. package/dist/browser/index.umd.min.js +79 -79
  22. package/dist/browser/plugins/auth/aws-sigv4.d.ts +1 -0
  23. package/dist/browser/plugins/auth/aws-sigv4.js +19 -2
  24. package/dist/browser/plugins/retry.js +29 -1
  25. package/dist/browser/presets/aws.d.ts +1 -0
  26. package/dist/browser/presets/aws.js +62 -1
  27. package/dist/browser/runner/request-runner.d.ts +15 -5
  28. package/dist/browser/runner/request-runner.js +164 -30
  29. package/dist/browser/scrape/parser/nodes/html.d.ts +6 -0
  30. package/dist/browser/scrape/parser/nodes/html.js +70 -18
  31. package/dist/browser/scrape/parser/nodes/node.d.ts +1 -0
  32. package/dist/browser/scrape/parser/nodes/node.js +5 -0
  33. package/dist/browser/scrape/spider.d.ts +1 -0
  34. package/dist/browser/scrape/spider.js +39 -26
  35. package/dist/browser/seo/analyzer.d.ts +1 -1
  36. package/dist/browser/seo/analyzer.js +73 -42
  37. package/dist/browser/seo/index.d.ts +1 -1
  38. package/dist/browser/seo/rules/types.d.ts +2 -0
  39. package/dist/browser/seo/seo-spider.d.ts +2 -3
  40. package/dist/browser/seo/seo-spider.js +26 -202
  41. package/dist/browser/seo/types.d.ts +4 -0
  42. package/dist/browser/seo/validators/sitemap.js +9 -2
  43. package/dist/browser/transport/fetch.js +38 -5
  44. package/dist/browser/transport/undici.js +73 -11
  45. package/dist/browser/transport/worker.d.ts +0 -1
  46. package/dist/browser/transport/worker.js +1 -3
  47. package/dist/browser/types/index.d.ts +24 -0
  48. package/dist/cli/commands/mcp.js +5 -3
  49. package/dist/core/client.d.ts +14 -8
  50. package/dist/core/client.js +199 -17
  51. package/dist/core/errors.d.ts +15 -1
  52. package/dist/core/errors.js +140 -9
  53. package/dist/core/request.d.ts +5 -0
  54. package/dist/core/request.js +33 -2
  55. package/dist/core-runtime/plugin-manifest.d.ts +24 -0
  56. package/dist/core-runtime/plugin-manifest.js +159 -0
  57. package/dist/core-runtime/request-context.d.ts +13 -0
  58. package/dist/core-runtime/request-context.js +24 -0
  59. package/dist/core-runtime/typed-events.d.ts +89 -0
  60. package/dist/core-runtime/typed-events.js +34 -0
  61. package/dist/index.d.ts +2 -1
  62. package/dist/index.js +2 -1
  63. package/dist/mcp/cli.js +10 -8
  64. package/dist/mcp/profiles.d.ts +1 -1
  65. package/dist/mcp/profiles.js +31 -6
  66. package/dist/mcp/tools/categories.js +0 -1
  67. package/dist/mcp/tools/seo.js +320 -4
  68. package/dist/plugins/auth/aws-sigv4.d.ts +1 -0
  69. package/dist/plugins/auth/aws-sigv4.js +19 -2
  70. package/dist/plugins/retry.js +29 -1
  71. package/dist/presets/aws.d.ts +1 -0
  72. package/dist/presets/aws.js +62 -1
  73. package/dist/recker.d.ts +3 -0
  74. package/dist/recker.js +5 -0
  75. package/dist/runner/request-runner.d.ts +15 -5
  76. package/dist/runner/request-runner.js +164 -30
  77. package/dist/scrape/parser/nodes/html.d.ts +6 -0
  78. package/dist/scrape/parser/nodes/html.js +70 -18
  79. package/dist/scrape/parser/nodes/node.d.ts +1 -0
  80. package/dist/scrape/parser/nodes/node.js +5 -0
  81. package/dist/scrape/spider.d.ts +1 -0
  82. package/dist/scrape/spider.js +39 -26
  83. package/dist/search/google.d.ts +67 -0
  84. package/dist/search/google.js +480 -0
  85. package/dist/search/index.d.ts +3 -0
  86. package/dist/search/index.js +1 -0
  87. package/dist/seo/analyzer.d.ts +1 -1
  88. package/dist/seo/analyzer.js +73 -42
  89. package/dist/seo/index.d.ts +1 -1
  90. package/dist/seo/rules/types.d.ts +2 -0
  91. package/dist/seo/seo-spider.d.ts +2 -3
  92. package/dist/seo/seo-spider.js +26 -202
  93. package/dist/seo/types.d.ts +4 -0
  94. package/dist/seo/validators/sitemap.js +9 -2
  95. package/dist/transport/fetch.js +38 -5
  96. package/dist/transport/undici.js +73 -11
  97. package/dist/transport/worker.d.ts +0 -1
  98. package/dist/transport/worker.js +1 -3
  99. package/dist/types/index.d.ts +24 -0
  100. package/dist/version.js +1 -1
  101. package/package.json +9 -1
@@ -98,11 +98,31 @@ export default class HTMLElement extends Node {
98
98
  voidTag;
99
99
  _attrs;
100
100
  _rawAttrs;
101
+ _queryCache;
101
102
  _parseOptions;
102
103
  rawTagName;
103
104
  id;
104
105
  classList;
105
106
  nodeType = NodeType.ELEMENT_NODE;
107
+ get isSelectorCacheEnabled() {
108
+ return this._parseOptions?.selectorCache !== false;
109
+ }
110
+ getQueryCache() {
111
+ if (!this._queryCache) {
112
+ this._queryCache = new Map();
113
+ }
114
+ return this._queryCache;
115
+ }
116
+ clearQueryCache() {
117
+ this._queryCache = undefined;
118
+ }
119
+ invalidateSelectorCacheRecursively() {
120
+ let current = this;
121
+ while (current) {
122
+ current.clearQueryCache();
123
+ current = current.parentNode;
124
+ }
125
+ }
106
126
  quoteAttribute(attr) {
107
127
  if (attr == null) {
108
128
  return 'null';
@@ -144,6 +164,7 @@ export default class HTMLElement extends Node {
144
164
  this.childNodes = this.childNodes.filter((child) => {
145
165
  return child !== node;
146
166
  });
167
+ this.invalidateSelectorCacheRecursively();
147
168
  return this;
148
169
  }
149
170
  exchangeChild(oldNode, newNode) {
@@ -154,6 +175,7 @@ export default class HTMLElement extends Node {
154
175
  }
155
176
  return child;
156
177
  });
178
+ this.invalidateSelectorCacheRecursively();
157
179
  return this;
158
180
  }
159
181
  get tagName() {
@@ -182,6 +204,7 @@ export default class HTMLElement extends Node {
182
204
  set textContent(val) {
183
205
  const content = [new TextNode(val, this)];
184
206
  this.childNodes = content;
207
+ this.invalidateSelectorCacheRecursively();
185
208
  }
186
209
  get text() {
187
210
  return decode(this.rawText);
@@ -249,6 +272,7 @@ export default class HTMLElement extends Node {
249
272
  resetParent(nodes, this);
250
273
  resetParent(this.childNodes, null);
251
274
  this.childNodes = nodes;
275
+ this.invalidateSelectorCacheRecursively();
252
276
  }
253
277
  set_content(content, options = {}) {
254
278
  if (content instanceof Node) {
@@ -264,6 +288,7 @@ export default class HTMLElement extends Node {
264
288
  resetParent(this.childNodes, null);
265
289
  resetParent(content, this);
266
290
  this.childNodes = content;
291
+ this.invalidateSelectorCacheRecursively();
267
292
  return this;
268
293
  }
269
294
  replaceWith(...nodes) {
@@ -293,6 +318,7 @@ export default class HTMLElement extends Node {
293
318
  ...resetParent(content, parent),
294
319
  ...parent.childNodes.slice(idx + 1),
295
320
  ];
321
+ parent.invalidateSelectorCacheRecursively();
296
322
  return this;
297
323
  }
298
324
  get outerHTML() {
@@ -312,6 +338,7 @@ export default class HTMLElement extends Node {
312
338
  }
313
339
  }
314
340
  }
341
+ this.invalidateSelectorCacheRecursively();
315
342
  return this;
316
343
  }
317
344
  get structure() {
@@ -357,6 +384,7 @@ export default class HTMLElement extends Node {
357
384
  this.childNodes[o++] = node;
358
385
  });
359
386
  this.childNodes.length = o;
387
+ this.invalidateSelectorCacheRecursively();
360
388
  const attrs = Object.keys(this.rawAttributes)
361
389
  .map((key) => {
362
390
  const val = this.rawAttributes[key];
@@ -368,16 +396,49 @@ export default class HTMLElement extends Node {
368
396
  return this;
369
397
  }
370
398
  querySelectorAll(selector) {
371
- return selectAll(selector, this, {
399
+ if (this.isSelectorCacheEnabled) {
400
+ const cached = this.getQueryCache().get(selector);
401
+ if (cached?.all) {
402
+ return cached.all.slice();
403
+ }
404
+ }
405
+ const nodes = selectAll(selector, this, {
372
406
  xmlMode: false,
373
407
  adapter: Matcher,
374
408
  });
409
+ if (this.isSelectorCacheEnabled) {
410
+ const cacheEntry = this.getQueryCache().get(selector) || {};
411
+ cacheEntry.all = nodes;
412
+ if (cacheEntry.first === undefined) {
413
+ cacheEntry.first = nodes[0] || null;
414
+ }
415
+ this.getQueryCache().set(selector, cacheEntry);
416
+ }
417
+ return nodes;
375
418
  }
376
419
  querySelector(selector) {
377
- return selectOne(selector, this, {
420
+ if (this.isSelectorCacheEnabled) {
421
+ const cached = this.getQueryCache().get(selector);
422
+ if (cached?.first !== undefined) {
423
+ return cached.first || null;
424
+ }
425
+ if (cached?.all) {
426
+ const first = cached.all[0] || null;
427
+ cached.first = first;
428
+ this.getQueryCache().set(selector, cached);
429
+ return first;
430
+ }
431
+ }
432
+ const result = selectOne(selector, this, {
378
433
  xmlMode: false,
379
434
  adapter: Matcher,
380
435
  });
436
+ if (this.isSelectorCacheEnabled) {
437
+ const cacheEntry = this.getQueryCache().get(selector) || {};
438
+ cacheEntry.first = result;
439
+ this.getQueryCache().set(selector, cacheEntry);
440
+ }
441
+ return result;
381
442
  }
382
443
  getElementsByTagName(tagName) {
383
444
  const upperCasedTagName = tagName.toUpperCase();
@@ -440,22 +501,6 @@ export default class HTMLElement extends Node {
440
501
  const mapChild = new Map();
441
502
  let el = this;
442
503
  let old = null;
443
- function findOne(test, elems) {
444
- let elem = null;
445
- for (let i = 0, l = elems.length; i < l && !elem; i++) {
446
- const el = elems[i];
447
- if (test(el)) {
448
- elem = el;
449
- }
450
- else {
451
- const child = mapChild.get(el);
452
- if (child) {
453
- elem = findOne(test, [child]);
454
- }
455
- }
456
- }
457
- return elem;
458
- }
459
504
  while (el) {
460
505
  if (old)
461
506
  mapChild.set(el, old);
@@ -545,6 +590,7 @@ export default class HTMLElement extends Node {
545
590
  if (key === 'id') {
546
591
  this.id = '';
547
592
  }
593
+ this.invalidateSelectorCacheRecursively();
548
594
  return this;
549
595
  }
550
596
  hasAttribute(key) {
@@ -580,6 +626,7 @@ export default class HTMLElement extends Node {
580
626
  if (key === 'id') {
581
627
  this.id = value;
582
628
  }
629
+ this.invalidateSelectorCacheRecursively();
583
630
  return this;
584
631
  }
585
632
  setAttributes(attributes) {
@@ -597,6 +644,7 @@ export default class HTMLElement extends Node {
597
644
  return `${name}=${this.quoteAttribute(String(val))}`;
598
645
  })
599
646
  .join(' ');
647
+ this.invalidateSelectorCacheRecursively();
600
648
  return this;
601
649
  }
602
650
  insertAdjacentHTML(where, html) {
@@ -625,11 +673,13 @@ export default class HTMLElement extends Node {
625
673
  const nodes = resolveInsertable(insertable, this._parseOptions);
626
674
  resetParent(nodes, this);
627
675
  this.childNodes.unshift(...nodes);
676
+ this.invalidateSelectorCacheRecursively();
628
677
  }
629
678
  append(...insertable) {
630
679
  const nodes = resolveInsertable(insertable, this._parseOptions);
631
680
  resetParent(nodes, this);
632
681
  this.childNodes.push(...nodes);
682
+ this.invalidateSelectorCacheRecursively();
633
683
  }
634
684
  before(...insertable) {
635
685
  if (!this.parentNode)
@@ -638,6 +688,7 @@ export default class HTMLElement extends Node {
638
688
  const siblings = this.parentNode.childNodes;
639
689
  resetParent(nodes, this.parentNode);
640
690
  siblings.splice(siblings.indexOf(this), 0, ...nodes);
691
+ this.parentNode.invalidateSelectorCacheRecursively();
641
692
  }
642
693
  after(...insertable) {
643
694
  if (!this.parentNode)
@@ -646,6 +697,7 @@ export default class HTMLElement extends Node {
646
697
  const siblings = this.parentNode.childNodes;
647
698
  resetParent(nodes, this.parentNode);
648
699
  siblings.splice(siblings.indexOf(this) + 1, 0, ...nodes);
700
+ this.parentNode.invalidateSelectorCacheRecursively();
649
701
  }
650
702
  get nextSibling() {
651
703
  if (this.parentNode) {
@@ -12,6 +12,7 @@ export default abstract class Node {
12
12
  abstract clone(): Node;
13
13
  constructor(parentNode?: HTMLElement | null, range?: [number, number]);
14
14
  remove(): this;
15
+ invalidateSelectorCacheRecursively(): void;
15
16
  get innerText(): string;
16
17
  get textContent(): string;
17
18
  set textContent(val: string);
@@ -11,6 +11,9 @@ export default class Node {
11
11
  }
12
12
  remove() {
13
13
  if (this.parentNode) {
14
+ if (typeof this.parentNode.invalidateSelectorCacheRecursively === 'function') {
15
+ this.parentNode.invalidateSelectorCacheRecursively();
16
+ }
14
17
  const children = this.parentNode.childNodes;
15
18
  this.parentNode.childNodes = children.filter((child) => {
16
19
  return this !== child;
@@ -19,6 +22,8 @@ export default class Node {
19
22
  }
20
23
  return this;
21
24
  }
25
+ invalidateSelectorCacheRecursively() {
26
+ }
22
27
  get innerText() {
23
28
  return this.rawText;
24
29
  }
@@ -123,6 +123,7 @@ export declare class Spider {
123
123
  private robotsData;
124
124
  private sitemapValidation;
125
125
  private robotsValidation;
126
+ private toHeaderRecord;
126
127
  constructor(options?: SpiderOptions);
127
128
  crawl(startUrl: string): Promise<SpiderResult>;
128
129
  private fetchRobotsTxt;
@@ -76,9 +76,6 @@ function shouldCrawl(url, baseHost, options) {
76
76
  return false;
77
77
  }
78
78
  }
79
- function sleep(ms) {
80
- return new Promise(resolve => setTimeout(resolve, ms));
81
- }
82
79
  function parseExtractSelectors(selectors) {
83
80
  const schema = {};
84
81
  for (const sel of selectors) {
@@ -115,6 +112,13 @@ export class Spider {
115
112
  robotsData = null;
116
113
  sitemapValidation = null;
117
114
  robotsValidation = null;
115
+ toHeaderRecord(headers) {
116
+ const headerRecord = {};
117
+ headers.forEach((value, key) => {
118
+ headerRecord[key] = value;
119
+ });
120
+ return headerRecord;
121
+ }
118
122
  constructor(options = {}) {
119
123
  let extractSchema;
120
124
  if (options.extract) {
@@ -194,7 +198,7 @@ export class Spider {
194
198
  await this.fetchSitemaps(baseUrl);
195
199
  }
196
200
  const pending = new Map();
197
- const scheduleUrl = (item, fromSitemap = false) => {
201
+ const scheduleUrl = (item) => {
198
202
  const normalized = normalizeUrl(item.url);
199
203
  if (this.visited.has(normalized))
200
204
  return;
@@ -230,7 +234,7 @@ export class Spider {
230
234
  try {
231
235
  const urlHost = new URL(sitemapUrl.loc).hostname;
232
236
  if (urlHost === this.baseHost) {
233
- scheduleUrl({ url: sitemapUrl.loc, depth: 1 }, true);
237
+ scheduleUrl({ url: sitemapUrl.loc, depth: 1 });
234
238
  }
235
239
  }
236
240
  catch {
@@ -303,7 +307,7 @@ export class Spider {
303
307
  return {
304
308
  status: response.status,
305
309
  text: await response.text(),
306
- headers: Object.fromEntries([...response.headers.entries()]),
310
+ headers: this.toHeaderRecord(response.headers),
307
311
  };
308
312
  };
309
313
  try {
@@ -351,40 +355,49 @@ export class Spider {
351
355
  }
352
356
  buildSitemapAnalysis() {
353
357
  const crawledUrls = new Set(this.results.map(r => normalizeUrl(r.url)));
354
- const crawledFromSitemap = this.sitemapUrls.filter(u => crawledUrls.has(normalizeUrl(u.loc))).length;
358
+ const sitemapUrlSet = this.sitemapUrlSet.size > 0
359
+ ? this.sitemapUrlSet
360
+ : new Set(this.sitemapUrls.map((u) => normalizeUrl(u.loc)));
361
+ const crawledFromSitemap = Array.from(sitemapUrlSet)
362
+ .filter(url => crawledUrls.has(url))
363
+ .length;
355
364
  const linkedUrls = new Set();
356
- for (const page of this.results) {
357
- for (const link of page.links) {
358
- if (link.href) {
359
- linkedUrls.add(normalizeUrl(link.href));
360
- }
361
- }
362
- }
363
- const orphanUrls = this.sitemapUrls
364
- .filter(u => {
365
- const normalized = normalizeUrl(u.loc);
366
- return !linkedUrls.has(normalized) && crawledUrls.has(normalized);
367
- })
368
- .map(u => u.loc);
369
- const missingFromSitemap = Array.from(crawledUrls)
370
- .filter(url => !this.sitemapUrlSet.has(url));
371
- const blockedBySitemapRobots = [];
365
+ const blockedBySitemapRobotsSet = new Set();
372
366
  if (this.robotsData) {
373
367
  for (const sitemapUrl of this.sitemapUrls) {
374
368
  try {
369
+ const normalized = normalizeUrl(sitemapUrl.loc);
375
370
  const urlPath = new URL(sitemapUrl.loc).pathname;
376
371
  if (!isPathAllowed(this.robotsData, urlPath, this.options.userAgent)) {
377
- blockedBySitemapRobots.push(sitemapUrl.loc);
372
+ blockedBySitemapRobotsSet.add(normalized);
378
373
  }
379
374
  }
380
375
  catch {
381
376
  }
382
377
  }
383
378
  }
379
+ for (const page of this.results) {
380
+ for (const link of page.links) {
381
+ if (link.href) {
382
+ linkedUrls.add(normalizeUrl(link.href));
383
+ }
384
+ }
385
+ }
386
+ const orphanUrlSet = new Set();
387
+ for (const u of this.sitemapUrls) {
388
+ const normalized = normalizeUrl(u.loc);
389
+ if (!linkedUrls.has(normalized) && !blockedBySitemapRobotsSet.has(normalized)) {
390
+ orphanUrlSet.add(normalized);
391
+ }
392
+ }
393
+ const orphanUrls = Array.from(orphanUrlSet);
394
+ const missingFromSitemap = Array.from(crawledUrls)
395
+ .filter(url => !sitemapUrlSet.has(url));
396
+ const blockedBySitemapRobots = Array.from(blockedBySitemapRobotsSet);
384
397
  return {
385
398
  found: this.sitemapUrls.length > 0,
386
- url: this.sitemapValidation?.parseResult ? undefined : undefined,
387
- totalUrls: this.sitemapUrls.length,
399
+ url: this.sitemapUrls[0]?.loc,
400
+ totalUrls: sitemapUrlSet.size,
388
401
  crawledFromSitemap,
389
402
  orphanUrls,
390
403
  missingFromSitemap,
@@ -12,6 +12,7 @@ export declare class SeoAnalyzer {
12
12
  static fromHtml(html: string, options?: SeoAnalyzerFullOptions): Promise<SeoAnalyzer>;
13
13
  analyze(): SeoReport;
14
14
  private getMainBody;
15
+ private detectPageType;
15
16
  private getVisibleText;
16
17
  private buildRuleContext;
17
18
  private analyzeUrlQuality;
@@ -32,7 +33,6 @@ export declare class SeoAnalyzer {
32
33
  private analyzeAnalytics;
33
34
  private analyzeFeeds;
34
35
  private analyzeConversionElements;
35
- private analyzeAdvancedImages;
36
36
  private calculateTextHtmlRatio;
37
37
  private convertToCheckResults;
38
38
  private buildSummary;
@@ -1,7 +1,7 @@
1
1
  import { parse } from '../scrape/parser/index.js';
2
2
  import { extractMeta, extractOpenGraph, extractTwitterCard, extractJsonLd, extractLinks, extractImages, } from '../scrape/extractors.js';
3
3
  import { generateKeywordCloud } from './keywords.js';
4
- import { createRulesEngine, SEO_THRESHOLDS, } from './rules/index.js';
4
+ import { createRulesEngine, SEO_THRESHOLDS, calculateWeightedScore, } from './rules/index.js';
5
5
  export class SeoAnalyzer {
6
6
  root;
7
7
  options;
@@ -47,7 +47,9 @@ export class SeoAnalyzer {
47
47
  const analytics = this.analyzeAnalytics();
48
48
  const feeds = this.analyzeFeeds();
49
49
  const conversion = this.analyzeConversionElements(links, visibleText);
50
+ const pageType = this.detectPageType(jsonLd);
50
51
  const context = this.buildRuleContext({
52
+ pageType,
51
53
  meta,
52
54
  og,
53
55
  twitter,
@@ -68,7 +70,7 @@ export class SeoAnalyzer {
68
70
  });
69
71
  const ruleResults = this.rulesEngine.evaluate(context);
70
72
  const checks = this.convertToCheckResults(ruleResults);
71
- const { score, grade } = this.calculateScore(checks);
73
+ const { score, grade } = this.calculateScore(ruleResults);
72
74
  const summary = this.buildSummary(ruleResults, checks, {
73
75
  content,
74
76
  imageAnalysis,
@@ -77,13 +79,17 @@ export class SeoAnalyzer {
77
79
  og,
78
80
  twitter,
79
81
  technical,
82
+ pageType,
83
+ timings: this.options.timings,
80
84
  });
81
85
  return {
82
86
  url,
83
87
  timestamp: new Date(),
84
88
  grade,
85
89
  score,
90
+ timing: this.options.timings,
86
91
  summary,
92
+ pageType,
87
93
  checks,
88
94
  title: meta.title
89
95
  ? { text: meta.title, length: meta.title.length }
@@ -134,6 +140,47 @@ export class SeoAnalyzer {
134
140
  return bodies[0];
135
141
  return bodies.reduce((prev, curr) => curr.text.length > prev.text.length ? curr : prev);
136
142
  }
143
+ detectPageType(jsonLd) {
144
+ if (!this.options.baseUrl) {
145
+ return 'other';
146
+ }
147
+ try {
148
+ const parsed = new URL(this.options.baseUrl);
149
+ const pathname = parsed.pathname.toLowerCase();
150
+ const hasQueryKeyword = (value) => parsed.searchParams.has(value);
151
+ if (pathname === '/' || pathname === '') {
152
+ return 'homepage';
153
+ }
154
+ if (/(^|\/)(search|busca|s|results|query)\b/.test(pathname) ||
155
+ hasQueryKeyword('q') ||
156
+ hasQueryKeyword('query') ||
157
+ hasQueryKeyword('search')) {
158
+ return 'search';
159
+ }
160
+ const productSignals = ['product', 'produto', 'item', 'sku', 'shop'];
161
+ if (productSignals.some((segment) => pathname.includes(`/${segment}/`))) {
162
+ return 'product';
163
+ }
164
+ const articleSignals = ['article', 'post', 'blog', 'noticia', 'news'];
165
+ if (articleSignals.some((segment) => pathname.includes(`/${segment}/`)) ||
166
+ this.root.querySelectorAll('article').length > 0) {
167
+ return 'article';
168
+ }
169
+ if (/(^|\/)(categoria|category|tag|section|topic)\b/.test(pathname)) {
170
+ return 'category';
171
+ }
172
+ const hasProductJsonLd = jsonLd
173
+ .map((node) => node['@type'])
174
+ .some((type) => typeof type === 'string' && type.toLowerCase() === 'product');
175
+ if (hasProductJsonLd) {
176
+ return 'product';
177
+ }
178
+ return 'other';
179
+ }
180
+ catch {
181
+ return 'other';
182
+ }
183
+ }
137
184
  getVisibleText() {
138
185
  const body = this.getMainBody();
139
186
  if (!body)
@@ -159,7 +206,7 @@ export class SeoAnalyzer {
159
206
  return clone.text.replace(/\s+/g, ' ').trim();
160
207
  }
161
208
  buildRuleContext(data) {
162
- const { meta, og, twitter, jsonLd, headings, content, linkAnalysis, imageAnalysis, links, keywords, resources, emailsFound, socialLinksFound, socialLinkDetails, analytics, feeds, conversion, } = data;
209
+ const { meta, og, twitter, jsonLd, headings, content, pageType, linkAnalysis, imageAnalysis, links, keywords, resources, emailsFound, socialLinksFound, socialLinkDetails, analytics, feeds, conversion, } = data;
163
210
  const html = this.root.querySelector('html');
164
211
  const htmlLang = html ? html.getAttribute('lang') : undefined;
165
212
  const hreflangTags = [];
@@ -198,7 +245,6 @@ export class SeoAnalyzer {
198
245
  const hasMixedContent = this.checkMixedContent();
199
246
  const h1Elements = this.root.querySelectorAll('h1');
200
247
  const h1Text = h1Elements.length > 0 ? h1Elements[0].text.trim() : '';
201
- const iframeCount = this.root.querySelectorAll('iframe').length;
202
248
  const topKeywords = keywords.topKeywords.slice(0, 5).map(k => k.word);
203
249
  const mainKeyword = topKeywords.length > 0 ? topKeywords[0] : undefined;
204
250
  const keywordsInTitle = topKeywords.some(kw => meta.title?.toLowerCase().includes(kw));
@@ -234,7 +280,6 @@ export class SeoAnalyzer {
234
280
  const structuralHtml = this.analyzeStructuralHtml();
235
281
  const breadcrumbs = this.analyzeBreadcrumbs(jsonLd.map((j) => j['@type']).filter(Boolean));
236
282
  const multimedia = this.analyzeMultimedia();
237
- const advancedImages = this.analyzeAdvancedImages();
238
283
  const responsiveImages = this.analyzeResponsiveImages();
239
284
  const inlineImages = this.analyzeInlineImages();
240
285
  const trustSignals = this.analyzeTrustSignals(links);
@@ -245,6 +290,7 @@ export class SeoAnalyzer {
245
290
  : 0;
246
291
  const textHtmlRatio = this.calculateTextHtmlRatio(content.characterCount);
247
292
  return {
293
+ pageType,
248
294
  jsFilesCount: resources.jsFilesCount,
249
295
  cssFilesCount: resources.cssFilesCount,
250
296
  unminifiedResources: resources.unminifiedResources,
@@ -774,7 +820,6 @@ export class SeoAnalyzer {
774
820
  }
775
821
  analyzeAnalytics() {
776
822
  const providers = [];
777
- const html = this.root.innerHTML || '';
778
823
  const scripts = this.root.querySelectorAll('script');
779
824
  const scriptSources = [];
780
825
  const scriptContents = [];
@@ -899,21 +944,6 @@ export class SeoAnalyzer {
899
944
  hasPhoneOnPage,
900
945
  };
901
946
  }
902
- analyzeAdvancedImages() {
903
- let imagesWithSrcset = 0;
904
- let largeBase64ImagesCount = 0;
905
- const imgs = this.root.querySelectorAll('img');
906
- imgs.forEach((img) => {
907
- if (img.getAttribute('srcset') || (img.parentNode && img.parentNode.tagName === 'PICTURE')) {
908
- imagesWithSrcset++;
909
- }
910
- const src = img.getAttribute('src') || '';
911
- if (src.startsWith('data:image') && src.length > 5 * 1024) {
912
- largeBase64ImagesCount++;
913
- }
914
- });
915
- return { imagesWithSrcset, largeBase64ImagesCount };
916
- }
917
947
  calculateTextHtmlRatio(bodyTextLength) {
918
948
  const htmlSize = this.root.innerHTML?.length;
919
949
  if (htmlSize && htmlSize > 0) {
@@ -926,6 +956,7 @@ export class SeoAnalyzer {
926
956
  id: r.id,
927
957
  name: r.name,
928
958
  category: r.category,
959
+ severity: r.severity,
929
960
  status: r.status,
930
961
  message: r.message,
931
962
  value: r.value,
@@ -934,6 +965,8 @@ export class SeoAnalyzer {
934
965
  }));
935
966
  }
936
967
  buildSummary(ruleResults, checks, data) {
968
+ const pageType = data.pageType;
969
+ const timings = data.timings;
937
970
  const passed = checks.filter((c) => c.status === 'pass').length;
938
971
  const warnings = checks.filter((c) => c.status === 'warn').length;
939
972
  const errors = checks.filter((c) => c.status === 'fail').length;
@@ -954,21 +987,25 @@ export class SeoAnalyzer {
954
987
  else if (result.status === 'fail')
955
988
  issuesByCategory[cat].errors++;
956
989
  }
957
- const topIssues = ruleResults
958
- .filter((r) => r.status === 'fail' || r.status === 'warn')
990
+ const topIssues = checks
991
+ .filter((c) => c.status === 'fail' || c.status === 'warn')
959
992
  .sort((a, b) => {
960
- if (a.status === 'fail' && b.status !== 'fail')
961
- return -1;
962
- if (a.status !== 'fail' && b.status === 'fail')
963
- return 1;
964
- return 0;
993
+ const severityOrder = (status) => status === 'fail' ? 2 : 1;
994
+ const statusDiff = severityOrder(b.status) - severityOrder(a.status);
995
+ if (statusDiff !== 0)
996
+ return statusDiff;
997
+ const aSeverity = a.severity || (a.status === 'fail' ? 'error' : 'warning');
998
+ const bSeverity = b.severity || (b.status === 'fail' ? 'error' : 'warning');
999
+ if (aSeverity === bSeverity)
1000
+ return 0;
1001
+ return aSeverity === 'error' ? -1 : 1;
965
1002
  })
966
1003
  .slice(0, 5)
967
1004
  .map((r) => ({
968
1005
  name: r.name,
969
1006
  message: r.message,
970
1007
  category: r.category,
971
- severity: (r.status === 'fail' ? 'error' : 'warning'),
1008
+ severity: (r.severity || (r.status === 'fail' ? 'error' : 'warning')),
972
1009
  }));
973
1010
  const quickWins = [];
974
1011
  if (!data.meta.title)
@@ -993,8 +1030,8 @@ export class SeoAnalyzer {
993
1030
  const vitals = {
994
1031
  htmlSize,
995
1032
  domElements,
996
- ttfb: this.options.responseHeaders ? undefined : undefined,
997
- totalTime: undefined,
1033
+ ttfb: timings?.ttfb,
1034
+ totalTime: timings?.total,
998
1035
  wordCount: data.content.wordCount,
999
1036
  totalWordCount: data.content.totalWordCount,
1000
1037
  readingTime: data.content.readingTimeMinutes,
@@ -1017,6 +1054,7 @@ export class SeoAnalyzer {
1017
1054
  infos,
1018
1055
  passRate,
1019
1056
  issuesByCategory,
1057
+ pageType: pageType,
1020
1058
  topIssues,
1021
1059
  quickWins: limitedQuickWins,
1022
1060
  vitals,
@@ -1387,18 +1425,11 @@ export class SeoAnalyzer {
1387
1425
  unminifiedResourceUrls: unminified
1388
1426
  };
1389
1427
  }
1390
- calculateScore(checks) {
1391
- const weights = {
1392
- pass: 100,
1393
- warn: 50,
1394
- fail: 0,
1395
- info: 100,
1396
- };
1397
- const scoringChecks = checks.filter((c) => c.status !== 'info');
1398
- if (scoringChecks.length === 0)
1428
+ calculateScore(results) {
1429
+ if (results.length === 0)
1399
1430
  return { score: 100, grade: 'A' };
1400
- const totalWeight = scoringChecks.reduce((sum, check) => sum + weights[check.status], 0);
1401
- const score = Math.round(totalWeight / scoringChecks.length);
1431
+ const { score: weightedScore } = calculateWeightedScore(results);
1432
+ const score = weightedScore;
1402
1433
  let grade;
1403
1434
  if (score >= 90)
1404
1435
  grade = 'A';
@@ -6,7 +6,7 @@ export type { SeoSpiderOptions, SeoPageResult, SiteWideIssue, SeoSpiderResult, }
6
6
  export { SeoRulesEngine, createRulesEngine, SEO_THRESHOLDS, ALL_SEO_RULES, } from './rules/index.js';
7
7
  export { generateSeoFilename, resolveOutputPath, writeReport, formatReportForJson, } from './output.js';
8
8
  export type { SeoOutputType, OutputOptions, WriteOptions } from './output.js';
9
- export type { SeoReport, SeoCheckResult, SeoStatus, SeoTiming, HeadingAnalysis, HeadingInfo, ContentMetrics, LinkAnalysis, ImageAnalysis, SocialMetaAnalysis, TechnicalSeo, SeoAnalyzerOptions, } from './types.js';
9
+ export type { SeoReport, SeoCheckResult, SeoStatus, SeoPageType, SeoTiming, HeadingAnalysis, HeadingInfo, ContentMetrics, LinkAnalysis, ImageAnalysis, SocialMetaAnalysis, TechnicalSeo, SeoAnalyzerOptions, } from './types.js';
10
10
  export type { SeoRule, RuleContext, RuleResult, RuleEvidence, RuleCategory, RuleSeverity, RulesEngineOptions, } from './rules/index.js';
11
11
  export type { SeoAnalyzerFullOptions } from './analyzer.js';
12
12
  export { parseRobotsTxt, validateRobotsTxt, isPathAllowed, fetchAndValidateRobotsTxt, } from './validators/robots.js';
@@ -1,8 +1,10 @@
1
1
  import type { SeoStatus } from '../types.js';
2
2
  import type { ExtractedLink } from '../../scrape/types.js';
3
+ import type { SeoPageType } from '../types.js';
3
4
  export type RuleSeverity = 'error' | 'warning' | 'info';
4
5
  export type RuleCategory = 'title' | 'meta' | 'og' | 'twitter' | 'headings' | 'images' | 'links' | 'content' | 'technical' | 'security' | 'mobile' | 'structured-data' | 'performance' | 'accessibility' | 'i18n' | 'ai-search' | 'resources' | 'crawlability' | 'canonicalization';
5
6
  export interface RuleContext {
7
+ pageType?: SeoPageType;
6
8
  keywordsInTitle?: boolean;
7
9
  keywordsInDescription?: boolean;
8
10
  keywordsInH1?: boolean;
@@ -66,19 +66,18 @@ export interface SeoSpiderResult extends Omit<SpiderResult, 'pages'> {
66
66
  export declare class SeoSpider {
67
67
  private spider;
68
68
  private options;
69
- private seoResults;
70
69
  private seoPages;
71
70
  private homeHtml;
71
+ private normalizeUrl;
72
+ private toHeaderRecord;
72
73
  constructor(options?: SeoSpiderOptions);
73
74
  private analyzePageDuringCrawl;
74
75
  crawl(startUrl: string): Promise<SeoSpiderResult>;
75
76
  private checkSiteFiles;
76
77
  private validateManifest;
77
78
  private validateSitemap;
78
- private createReportFromPageData;
79
79
  private detectSiteWideIssues;
80
80
  private calculateSummary;
81
- private scoreToGrade;
82
81
  private saveReport;
83
82
  abort(): void;
84
83
  isRunning(): boolean;