@pseolint/core 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +10 -9
  2. package/dist/ai/prompt.d.ts +1 -1
  3. package/dist/ai/prompt.d.ts.map +1 -1
  4. package/dist/ai/prompt.js +13 -1
  5. package/dist/ai/prompt.js.map +1 -1
  6. package/dist/auditor.d.ts.map +1 -1
  7. package/dist/auditor.js +122 -46
  8. package/dist/auditor.js.map +1 -1
  9. package/dist/formatters/console.d.ts +9 -0
  10. package/dist/formatters/console.d.ts.map +1 -1
  11. package/dist/formatters/console.js +53 -0
  12. package/dist/formatters/console.js.map +1 -1
  13. package/dist/formatters/html.d.ts.map +1 -1
  14. package/dist/formatters/html.js +363 -135
  15. package/dist/formatters/html.js.map +1 -1
  16. package/dist/index.d.ts +10 -0
  17. package/dist/index.d.ts.map +1 -1
  18. package/dist/index.js +9 -0
  19. package/dist/index.js.map +1 -1
  20. package/dist/rule-references.d.ts.map +1 -1
  21. package/dist/rule-references.js +8 -0
  22. package/dist/rule-references.js.map +1 -1
  23. package/dist/rules/aeo/answer-first.d.ts +18 -0
  24. package/dist/rules/aeo/answer-first.d.ts.map +1 -0
  25. package/dist/rules/aeo/answer-first.js +191 -0
  26. package/dist/rules/aeo/answer-first.js.map +1 -0
  27. package/dist/rules/aeo/citable-facts.d.ts +9 -0
  28. package/dist/rules/aeo/citable-facts.d.ts.map +1 -0
  29. package/dist/rules/aeo/citable-facts.js +90 -0
  30. package/dist/rules/aeo/citable-facts.js.map +1 -0
  31. package/dist/rules/aeo/content-modularity.d.ts +11 -0
  32. package/dist/rules/aeo/content-modularity.d.ts.map +1 -0
  33. package/dist/rules/aeo/content-modularity.js +107 -0
  34. package/dist/rules/aeo/content-modularity.js.map +1 -0
  35. package/dist/rules/aeo/crawler-access.d.ts +25 -0
  36. package/dist/rules/aeo/crawler-access.d.ts.map +1 -0
  37. package/dist/rules/aeo/crawler-access.js +116 -0
  38. package/dist/rules/aeo/crawler-access.js.map +1 -0
  39. package/dist/rules/aeo/faq-coverage.d.ts +9 -0
  40. package/dist/rules/aeo/faq-coverage.d.ts.map +1 -0
  41. package/dist/rules/aeo/faq-coverage.js +71 -0
  42. package/dist/rules/aeo/faq-coverage.js.map +1 -0
  43. package/dist/rules/aeo/freshness-signals.d.ts +9 -0
  44. package/dist/rules/aeo/freshness-signals.d.ts.map +1 -0
  45. package/dist/rules/aeo/freshness-signals.js +109 -0
  46. package/dist/rules/aeo/freshness-signals.js.map +1 -0
  47. package/dist/rules/aeo/llms-txt.d.ts +24 -0
  48. package/dist/rules/aeo/llms-txt.d.ts.map +1 -0
  49. package/dist/rules/aeo/llms-txt.js +93 -0
  50. package/dist/rules/aeo/llms-txt.js.map +1 -0
  51. package/dist/rules/aeo/non-replicable-value.d.ts +9 -0
  52. package/dist/rules/aeo/non-replicable-value.d.ts.map +1 -0
  53. package/dist/rules/aeo/non-replicable-value.js +95 -0
  54. package/dist/rules/aeo/non-replicable-value.js.map +1 -0
  55. package/dist/rules/scope.d.ts +12 -0
  56. package/dist/rules/scope.d.ts.map +1 -0
  57. package/dist/rules/scope.js +66 -0
  58. package/dist/rules/scope.js.map +1 -0
  59. package/dist/types.d.ts +17 -0
  60. package/dist/types.d.ts.map +1 -1
  61. package/package.json +2 -2
package/README.md CHANGED
@@ -24,15 +24,16 @@ console.log(`Findings: ${summary.findings.length}`);
24
24
 
25
25
  ## What It Checks
26
26
 
27
- 37+ rules across 7 categories:
27
+ 42 rules across 8 categories. Seven categories feed the composite score; `data/*` is a separate data-binding family.
28
28
 
29
- - **Spam / SpamBrain risk** — near-duplicate (SimHash), entity-swap doorways, thin content, boilerplate ratio, template diversity, template coverage, publication velocity, doorway pattern
30
- - **Content** — unique value, heading / meta uniqueness, author attribution, E-E-A-T signals
31
- - **Internal linking** — orphan pages, dead ends, cluster connectivity, hub pages, link depth, unreachable-from-root
32
- - **Technical SEO** — canonical consistency, canonical/noindex and robots/noindex conflicts, sitemap completeness, robots compliance, redirect chains, soft 404s, Open Graph, hreflang
33
- - **Structured data** — JSON-LD validity, required fields, cross-page schema consistency
34
- - **Cannibalization** — title overlap, keyword collision, URL pattern conflicts
35
- - **Data binding** — verify rendered pages expose values from a source dataset (catches missing or identical-across-pages bindings)
29
+ - **Spam / SpamBrain risk** (8) — near-duplicate (SimHash), entity-swap doorways, thin content, boilerplate ratio, template diversity, template coverage, publication velocity, doorway pattern
30
+ - **Technical SEO** (8) canonical consistency, canonical/noindex and robots/noindex conflicts, sitemap completeness, robots compliance, redirect chains, soft 404s, Open Graph, hreflang
31
+ - **AEO / AI Overview citability** (8, v0.3.0) `llms.txt` presence, AI-crawler access in robots.txt, freshness signals, FAQ coverage, answer-first opener, citable-fact density, non-replicable value, content modularity
32
+ - **Content** (5) unique value, heading / meta uniqueness, author attribution, E-E-A-T signals
33
+ - **Internal linking** (5) orphan pages, dead ends, cluster connectivity, hub pages, link depth
34
+ - **Structured data** (3) JSON-LD validity, required fields, cross-page schema consistency
35
+ - **Cannibalization** (3) title overlap, keyword collision, URL pattern conflicts
36
+ - **Data binding** (2) — verify rendered pages expose values from a source dataset (missing or identical-across-pages bindings)
36
37
 
37
38
  ## API
38
39
 
@@ -118,4 +119,4 @@ All AI providers and `playwright-core` are optional peers — you only install t
118
119
 
119
120
  ## License
120
121
 
121
- MIT
122
+ MIT
@@ -1,5 +1,5 @@
1
1
  import type { RuleResult } from "../types.js";
2
- export declare const PROMPT_VERSION = "1.0.0";
2
+ export declare const PROMPT_VERSION = "1.1.0";
3
3
  export declare const MAX_FINDINGS_IN_PROMPT = 200;
4
4
  export interface PromptRequest {
5
5
  system: string;
@@ -1 +1 @@
1
- {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../src/ai/prompt.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAY,MAAM,aAAa,CAAC;AAExD,eAAO,MAAM,cAAc,UAAU,CAAC;AACtC,eAAO,MAAM,sBAAsB,MAAM,CAAC;AAe1C,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;CACd;AAED,wBAAgB,eAAe,CAAC,CAAC,EAAE,UAAU,GAAG,MAAM,CAMrD;AAmBD,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,UAAU,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,aAAa,CA8B3F"}
1
+ {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../src/ai/prompt.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAY,MAAM,aAAa,CAAC;AAExD,eAAO,MAAM,cAAc,UAAU,CAAC;AACtC,eAAO,MAAM,sBAAsB,MAAM,CAAC;AAqB1C,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;CACd;AAED,wBAAgB,eAAe,CAAC,CAAC,EAAE,UAAU,GAAG,MAAM,CAMrD;AAqBD,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,UAAU,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,aAAa,CAqC3F"}
package/dist/ai/prompt.js CHANGED
@@ -1,9 +1,15 @@
1
1
  import { createHash } from "node:crypto";
2
- export const PROMPT_VERSION = "1.0.0";
2
+ export const PROMPT_VERSION = "1.1.0";
3
3
  export const MAX_FINDINGS_IN_PROMPT = 200;
4
4
  const SEVERITY_ORDER = { info: 0, warning: 1, error: 2, critical: 3 };
5
5
  const SYSTEM_PROMPT = `You are an SEO audit triage assistant. Given a list of pSEO linter findings, identify 1-5 underlying ROOT CAUSES driving the findings. Group findings by shared underlying problem, not by rule ID. Rank causes by likely SEO impact (highest first).
6
6
 
7
+ Findings fall into two distinct threat families — treat them as separate root causes, not one combined cause:
8
+ - SpamBrain penalty risk: spam/*, cannibal/*, content/*, data/*, tech/*, schema/*, links/* — these make Google penalize or demote the site.
9
+ - AI Overview invisibility: aeo/* — these make pages uncitable in AI answer engines (ChatGPT, Perplexity, Gemini, AI Overviews). Sites not cited lose ~68% of traffic vs ~12% for cited sites.
10
+
11
+ When both families are present, produce at least one root cause from each. Label AEO root causes clearly (e.g. "AI Overviews: ...") so the user can tell them apart from penalty risks.
12
+
7
13
  Rules:
8
14
  - Emit rootCauses FIRST, then narrative — do not reverse this order.
9
15
  - Keep each rootCause label <= 80 chars and phrase it as a problem statement.
@@ -31,11 +37,17 @@ export function buildPromptRequest(findings, pageCount) {
31
37
  pageUrl: f.pageUrl,
32
38
  group: f.group,
33
39
  }));
40
+ const countByCategory = {};
41
+ for (const f of findings) {
42
+ const cat = f.ruleId.split("/")[0];
43
+ countByCategory[cat] = (countByCategory[cat] ?? 0) + 1;
44
+ }
34
45
  const payload = {
35
46
  totalFindings: total,
36
47
  pageCount,
37
48
  truncated,
38
49
  findings: projected,
50
+ findingCountByCategory: countByCategory,
39
51
  };
40
52
  if (truncated) {
41
53
  const counts = {};
@@ -1 +1 @@
1
- {"version":3,"file":"prompt.js","sourceRoot":"","sources":["../../src/ai/prompt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGzC,MAAM,CAAC,MAAM,cAAc,GAAG,OAAO,CAAC;AACtC,MAAM,CAAC,MAAM,sBAAsB,GAAG,GAAG,CAAC;AAE1C,MAAM,cAAc,GAA6B,EAAE,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;AAEhG,MAAM,aAAa,GAAG;;;;;;;;;sDASgC,CAAC;AAOvD,MAAM,UAAU,eAAe,CAAC,CAAa;IAC3C,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC;SAC9B,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC;SAC3C,MAAM,CAAC,KAAK,CAAC;SACb,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACf,OAAO,GAAG,CAAC,CAAC,MAAM,IAAI,IAAI,EAAE,CAAC;AAC/B,CAAC;AAmBD,MAAM,UAAU,kBAAkB,CAAC,QAAsB,EAAE,SAAiB;IAC1E,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC;IAC9B,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;IACrG,MAAM,SAAS,GAAG,KAAK,GAAG,sBAAsB,CAAC;IACjD,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,sBAAsB,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAqB,EAAE,CAAC,CAAC;QACvF,EAAE,EAAE,eAAe,CAAC,CAAC,CAAC;QACtB,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,QAAQ,EAAE,CAAC,CAAC,QAAQ;QACpB,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,KAAK,EAAE,CAAC,CAAC,KAAK;KACf,CAAC,CAAC,CAAC;IAEJ,MAAM,OAAO,GAAkB;QAC7B,aAAa,EAAE,KAAK;QACpB,SAAS;QACT,SAAS;QACT,QAAQ,EAAE,SAAS;KACpB,CAAC;IAEF,IAAI,SAAS,EAAE,CAAC;QACd,MAAM,MAAM,GAA2B,EAAE,CAAC;QAC1C,KAAK,MAAM,CAAC,IAAI,QAAQ;YAAE,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QACzE,OAAO,CAAC,kBAAkB,GAAG,MAAM,CAAC;IACtC,CAAC;IAED,OAAO;QACL,MAAM,EAAE,aAAa;QACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;KAC9B,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"prompt.js","sourceRoot":"","sources":["../../src/ai/prompt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGzC,MAAM,CAAC,MAAM,cAAc,GAAG,OAAO,CAAC;AACtC,MAAM,CAAC,MAAM,sBAAsB,GAAG,GAAG,CAAC;AAE1C,MAAM,cAAc,GAA6B,EAAE,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;AAEhG,MAAM,aAAa,GAAG;;;;;;;;;;;;;;;sDAegC,CAAC;AAOvD,MAAM,UAAU,eAAe,CAAC,CAAa;IAC3C,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC;SAC9B,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC;SAC3C,MAAM,CAAC,KAAK,CAAC;SACb,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACf,OAAO,GAAG,CAAC,CAAC,MAAM,IAAI,IAAI,EAAE,CAAC;AAC/B,CAAC;AAqBD,MAAM,UAAU,kBAAkB,CAAC,QAAsB,EAAE,SAAiB;IAC1E,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC;IAC9B,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;IACrG,MAAM,SAAS,GAAG,KAAK,GAAG,sBAAsB,CAAC;IACjD,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,sBAAsB,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAqB,EAAE,CAAC,CAAC;QACvF,EAAE,EAAE,eAAe,CAAC,CAAC,CAAC;QACtB,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,QAAQ,EAAE,CAAC,CAAC,QAAQ;QACpB,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,KAAK,EAAE,CAAC,CAAC,KAAK;KACf,CAAC,CAAC,CAAC;IAEJ,MAAM,eAAe,GAA2B,EAAE,CAAC;IACnD,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,MAAM,GAAG,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACnC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACzD,CAAC;IAED,MAAM,OAAO,GAAkB;QAC7B,aAAa,EAAE,KAAK;QACpB,SAAS;QACT,SAAS;QACT,QAAQ,EAAE,SAAS;QACnB,sBAAsB,EAAE,eAAe;KACxC,CAAC;IAEF,IAAI,SAAS,EAAE,CAAC;QACd,MAAM,MAAM,GAA2B,EAAE,CAAC;QAC1C,KAAK,MAAM,CAAC,IAAI,QAAQ;YAAE,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QACzE,OAAO,CAAC,kBAAkB,GAAG,MAAM,CAAC;IACtC,CAAC;IAED,OAAO;QACL,MAAM,EAAE,aAAa;QACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;KAC9B,CAAC;AACJ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"AAoDA,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAwG,MAAM,YAAY,CAAC;AA8wBnK,wBAAsB,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CAod/F"}
1
+ {"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"AA6DA,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAwG,MAAM,YAAY,CAAC;AAu0BnK,wBAAsB,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CAgf/F"}
package/dist/auditor.js CHANGED
@@ -27,6 +27,14 @@ import { ogCompletenessRule } from "./rules/tech/og-completeness.js";
27
27
  import { robotsNoindexConflictRule } from "./rules/tech/robots-noindex-conflict.js";
28
28
  import { sitemapCompletenessRule } from "./rules/tech/sitemap-completeness.js";
29
29
  import { robotsComplianceRule, parseDisallowPatterns, isBlockedByPattern, parseCrawlDelaySeconds } from "./rules/tech/robots-sitemap-presence.js";
30
+ import { llmsTxtRule } from "./rules/aeo/llms-txt.js";
31
+ import { crawlerAccessRule } from "./rules/aeo/crawler-access.js";
32
+ import { freshnessSignalsRule } from "./rules/aeo/freshness-signals.js";
33
+ import { faqCoverageRule } from "./rules/aeo/faq-coverage.js";
34
+ import { answerFirstRule } from "./rules/aeo/answer-first.js";
35
+ import { citableFactsRule } from "./rules/aeo/citable-facts.js";
36
+ import { nonReplicableValueRule } from "./rules/aeo/non-replicable-value.js";
37
+ import { contentModularityRule } from "./rules/aeo/content-modularity.js";
30
38
  import { redirectChainRule } from "./rules/tech/redirect-chain.js";
31
39
  import { soft404Rule } from "./rules/tech/soft-404.js";
32
40
  import { jsonLdValidRule } from "./rules/schema/json-ld-valid.js";
@@ -38,6 +46,7 @@ import { urlPatternRule } from "./rules/cannibal/url-pattern.js";
38
46
  import { templateCoverageRule } from "./rules/spam/template-coverage.js";
39
47
  import { dataBindingRule, dataIdenticalRule } from "./rules/data/data-binding.js";
40
48
  import { classifyPages, isRuleEnabled } from "./page-classifier.js";
49
+ import { isRuleAllowedInDiff } from "./rules/scope.js";
41
50
  import { RULE_REFERENCES } from "./rule-references.js";
42
51
  import { enrichFindings } from "./enrich-findings.js";
43
52
  import { triageFindings } from "./ai/triage.js";
@@ -61,13 +70,21 @@ const DEFAULTS = {
61
70
  hubPagesMaxSiblings: 50,
62
71
  titleOverlapThreshold: 0.8,
63
72
  keywordCollisionMinShared: 6,
64
- templateCoverageMinPages: 5
73
+ templateCoverageMinPages: 5,
74
+ answerFirstMaxWords: 100,
75
+ citableFactsMin: 3,
76
+ citableFactsTarget: 8,
77
+ freshnessMaxStaleDays: 180,
78
+ modularityMaxParagraphWords: 200,
79
+ modularityMinSelfContainedRatio: 0.7,
80
+ faqMinQuestionHeadings: 2
65
81
  };
66
82
  const CATEGORY_WEIGHTS = {
67
- spam: 0.4,
68
- content: 0.25,
69
- links: 0.15,
70
- tech: 0.1,
83
+ spam: 0.35,
84
+ content: 0.2,
85
+ aeo: 0.15,
86
+ links: 0.12,
87
+ tech: 0.08,
71
88
  schema: 0.05,
72
89
  cannibal: 0.05,
73
90
  /** Dedup / crawl hygiene; does not affect composite score. */
@@ -93,8 +110,9 @@ function resolveGroupRules(baseRules, overrides) {
93
110
  }
94
111
  return result;
95
112
  }
96
- function runRulesOnPages(pages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, entityPatterns, overrides) {
113
+ function runRulesOnPages(pages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, entityPatterns, overrides, mode = "full") {
97
114
  const findings = [];
115
+ const modeOk = (ruleId) => mode !== "diff" || isRuleAllowedInDiff(ruleId);
98
116
  const tag = (results) => results.map((r) => {
99
117
  const override = overrides?.[r.ruleId];
100
118
  return {
@@ -106,106 +124,137 @@ function runRulesOnPages(pages, resolvedRules, isEnabled, groupName, knownUrls,
106
124
  });
107
125
  // Spam rules — always compute cross-page data, only push findings if enabled
108
126
  const nearDuplicate = nearDuplicateRule(pages, resolvedRules.nearDuplicateThreshold);
109
- if (isEnabled("spam/near-duplicate")) {
127
+ if (isEnabled("spam/near-duplicate") && modeOk("spam/near-duplicate")) {
110
128
  findings.push(...tag(nearDuplicate.findings));
111
129
  }
112
130
  const entitySwap = entitySwapRule(pages, entityPatterns, resolvedRules.entitySwapThreshold);
113
- if (isEnabled("spam/entity-swap")) {
131
+ if (isEnabled("spam/entity-swap") && modeOk("spam/entity-swap")) {
114
132
  findings.push(...tag(entitySwap.findings));
115
133
  }
116
134
  const thinContent = thinContentRule(pages, resolvedRules.thinContentMinWords);
117
- if (isEnabled("spam/thin-content")) {
135
+ if (isEnabled("spam/thin-content") && modeOk("spam/thin-content")) {
118
136
  findings.push(...tag(thinContent.findings));
119
137
  }
120
- if (isEnabled("spam/doorway-pattern")) {
138
+ if (isEnabled("spam/doorway-pattern") && modeOk("spam/doorway-pattern")) {
121
139
  findings.push(...tag(doorwayPatternRule(nearDuplicate.pairs, entitySwap.pairs, thinContent.thinContentUrls, pages)));
122
140
  }
123
- if (isEnabled("spam/publication-velocity")) {
141
+ if (isEnabled("spam/publication-velocity") && modeOk("spam/publication-velocity")) {
124
142
  findings.push(...tag(publicationVelocityRule(pages, resolvedRules.publicationVelocityMaxPerDay)));
125
143
  }
126
- if (isEnabled("spam/boilerplate-ratio")) {
144
+ if (isEnabled("spam/boilerplate-ratio") && modeOk("spam/boilerplate-ratio")) {
127
145
  findings.push(...tag(boilerplateRatioRule(pages, resolvedRules.boilerplateMaxRatio)));
128
146
  }
129
- if (isEnabled("spam/template-diversity")) {
147
+ if (isEnabled("spam/template-diversity") && modeOk("spam/template-diversity")) {
130
148
  findings.push(...tag(templateDiversityRule(pages, resolvedRules.templateDiversityMinUniqueRatio)));
131
149
  }
132
- if (isEnabled("spam/template-coverage")) {
150
+ if (isEnabled("spam/template-coverage") && modeOk("spam/template-coverage")) {
133
151
  findings.push(...tag(templateCoverageRule(pages, entityPatterns, resolvedRules.templateCoverageMinPages)));
134
152
  }
135
153
  // Content rules
136
- if (isEnabled("content/unique-value")) {
154
+ if (isEnabled("content/unique-value") && modeOk("content/unique-value")) {
137
155
  findings.push(...tag(uniqueValueRule(pages, resolvedRules.uniqueValueMinWords)));
138
156
  }
139
- if (isEnabled("content/heading-uniqueness")) {
157
+ if (isEnabled("content/heading-uniqueness") && modeOk("content/heading-uniqueness")) {
140
158
  findings.push(...tag(headingUniquenessRule(pages, entityPatterns)));
141
159
  }
142
- if (isEnabled("content/meta-uniqueness")) {
160
+ if (isEnabled("content/meta-uniqueness") && modeOk("content/meta-uniqueness")) {
143
161
  findings.push(...tag(metaUniquenessRule(pages, entityPatterns, resolvedRules.metaUniquenessMinJaccard)));
144
162
  }
145
- if (isEnabled("content/missing-author")) {
163
+ if (isEnabled("content/missing-author") && modeOk("content/missing-author")) {
146
164
  findings.push(...tag(missingAuthorRule(pages)));
147
165
  }
148
- if (isEnabled("content/eeat-signals")) {
166
+ if (isEnabled("content/eeat-signals") && modeOk("content/eeat-signals")) {
149
167
  findings.push(...tag(eeatSignalsRule(pages)));
150
168
  }
151
169
  // Link rules — use the global link graph
152
- if (isEnabled("links/orphan-pages")) {
170
+ if (isEnabled("links/orphan-pages") && modeOk("links/orphan-pages")) {
153
171
  findings.push(...tag(orphanPagesRule(pages, inbound, rootUrl)));
154
172
  }
155
- if (isEnabled("links/dead-ends")) {
173
+ if (isEnabled("links/dead-ends") && modeOk("links/dead-ends")) {
156
174
  findings.push(...tag(deadEndsRule(pages, knownUrls, rootUrl)));
157
175
  }
158
- if (isEnabled("links/link-depth")) {
176
+ if (isEnabled("links/link-depth") && modeOk("links/link-depth")) {
159
177
  if (rootUrl) {
160
178
  findings.push(...tag(linkDepthRule(pages, adjacency, rootUrl, resolvedRules.linkDepthMaxClicks, inbound)));
161
179
  }
162
180
  }
163
- if (isEnabled("links/cluster-connectivity")) {
181
+ if (isEnabled("links/cluster-connectivity") && modeOk("links/cluster-connectivity")) {
164
182
  findings.push(...tag(clusterConnectivityRule(pages, knownUrls)));
165
183
  }
166
- if (isEnabled("links/hub-pages")) {
184
+ if (isEnabled("links/hub-pages") && modeOk("links/hub-pages")) {
167
185
  findings.push(...tag(hubPagesRule(pages, knownUrls, resolvedRules.hubPagesMinSiblings, resolvedRules.hubPagesMaxSiblings)));
168
186
  }
169
187
  // Tech rules
170
- if (isEnabled("tech/canonical-consistency")) {
188
+ if (isEnabled("tech/canonical-consistency") && modeOk("tech/canonical-consistency")) {
171
189
  findings.push(...tag(canonicalConsistencyRule(pages, knownUrls, normalizeUrlOptions)));
172
190
  }
173
- if (isEnabled("tech/canonical-noindex-conflict")) {
191
+ if (isEnabled("tech/canonical-noindex-conflict") && modeOk("tech/canonical-noindex-conflict")) {
174
192
  findings.push(...tag(canonicalNoindexConflictRule(pages, normalizeUrlOptions)));
175
193
  }
176
- if (isEnabled("tech/robots-noindex-conflict")) {
194
+ if (isEnabled("tech/robots-noindex-conflict") && modeOk("tech/robots-noindex-conflict")) {
177
195
  findings.push(...tag(robotsNoindexConflictRule(pages, inbound)));
178
196
  }
179
- if (isEnabled("tech/redirect-chain")) {
197
+ if (isEnabled("tech/redirect-chain") && modeOk("tech/redirect-chain")) {
180
198
  findings.push(...tag(redirectChainRule(pages)));
181
199
  }
182
- if (isEnabled("tech/soft-404")) {
200
+ if (isEnabled("tech/soft-404") && modeOk("tech/soft-404")) {
183
201
  findings.push(...tag(soft404Rule(pages)));
184
202
  }
185
- if (isEnabled("tech/og-completeness")) {
203
+ if (isEnabled("tech/og-completeness") && modeOk("tech/og-completeness")) {
186
204
  findings.push(...tag(ogCompletenessRule(pages)));
187
205
  }
188
- if (isEnabled("tech/hreflang-consistency")) {
206
+ if (isEnabled("tech/hreflang-consistency") && modeOk("tech/hreflang-consistency")) {
189
207
  findings.push(...tag(hreflangConsistencyRule(pages, normalizeUrlOptions)));
190
208
  }
191
209
  // Schema rules
192
- if (isEnabled("schema/json-ld-valid")) {
210
+ if (isEnabled("schema/json-ld-valid") && modeOk("schema/json-ld-valid")) {
193
211
  findings.push(...tag(jsonLdValidRule(pages)));
194
212
  }
195
- if (isEnabled("schema/required-fields")) {
213
+ if (isEnabled("schema/required-fields") && modeOk("schema/required-fields")) {
196
214
  findings.push(...tag(requiredFieldsRule(pages)));
197
215
  }
198
- if (isEnabled("schema/consistency")) {
216
+ if (isEnabled("schema/consistency") && modeOk("schema/consistency")) {
199
217
  findings.push(...tag(schemaConsistencyRule(pages)));
200
218
  }
219
+ // AEO rules
220
+ if (isEnabled("aeo/freshness-signals")) {
221
+ findings.push(...tag(freshnessSignalsRule(pages, {
222
+ maxStaleDays: resolvedRules.freshnessMaxStaleDays,
223
+ })));
224
+ }
225
+ if (isEnabled("aeo/faq-coverage")) {
226
+ findings.push(...tag(faqCoverageRule(pages, {
227
+ minQuestionHeadings: resolvedRules.faqMinQuestionHeadings,
228
+ })));
229
+ }
230
+ if (isEnabled("aeo/answer-first")) {
231
+ findings.push(...tag(answerFirstRule(pages, entityPatterns, {
232
+ maxFirstParagraphWords: resolvedRules.answerFirstMaxWords,
233
+ })));
234
+ }
235
+ if (isEnabled("aeo/citable-facts")) {
236
+ findings.push(...tag(citableFactsRule(pages, entityPatterns, {
237
+ minFactsPerPage: resolvedRules.citableFactsMin,
238
+ targetFactsPerPage: resolvedRules.citableFactsTarget,
239
+ })));
240
+ }
241
+ if (isEnabled("aeo/non-replicable-value")) {
242
+ findings.push(...tag(nonReplicableValueRule(pages)));
243
+ }
244
+ if (isEnabled("aeo/content-modularity")) {
245
+ findings.push(...tag(contentModularityRule(pages, {
246
+ maxParagraphWords: resolvedRules.modularityMaxParagraphWords,
247
+ minSelfContainedRatio: resolvedRules.modularityMinSelfContainedRatio,
248
+ })));
249
+ }
201
250
  // Cannibal rules
202
- if (isEnabled("cannibal/title-overlap")) {
251
+ if (isEnabled("cannibal/title-overlap") && modeOk("cannibal/title-overlap")) {
203
252
  findings.push(...tag(titleOverlapRule(pages, entityPatterns, resolvedRules.titleOverlapThreshold)));
204
253
  }
205
- if (isEnabled("cannibal/keyword-collision")) {
254
+ if (isEnabled("cannibal/keyword-collision") && modeOk("cannibal/keyword-collision")) {
206
255
  findings.push(...tag(keywordCollisionRule(pages, resolvedRules.keywordCollisionMinShared)));
207
256
  }
208
- if (isEnabled("cannibal/url-pattern")) {
257
+ if (isEnabled("cannibal/url-pattern") && modeOk("cannibal/url-pattern")) {
209
258
  findings.push(...tag(urlPatternRule(pages)));
210
259
  }
211
260
  return findings;
@@ -223,6 +272,7 @@ function scoreFromFindings(findings) {
223
272
  const raw = {
224
273
  spam: 0,
225
274
  content: 0,
275
+ aeo: 0,
226
276
  links: 0,
227
277
  tech: 0,
228
278
  schema: 0,
@@ -238,6 +288,7 @@ function scoreFromFindings(findings) {
238
288
  }
239
289
  const weighted = raw.spam * CATEGORY_WEIGHTS.spam +
240
290
  raw.content * CATEGORY_WEIGHTS.content +
291
+ raw.aeo * CATEGORY_WEIGHTS.aeo +
241
292
  raw.links * CATEGORY_WEIGHTS.links +
242
293
  raw.tech * CATEGORY_WEIGHTS.tech +
243
294
  raw.schema * CATEGORY_WEIGHTS.schema +
@@ -248,6 +299,7 @@ function scoreFromFindings(findings) {
248
299
  categoryScores: {
249
300
  spam: raw.spam,
250
301
  content: raw.content,
302
+ aeo: raw.aeo,
251
303
  links: raw.links,
252
304
  tech: raw.tech,
253
305
  schema: raw.schema,
@@ -702,7 +754,14 @@ export async function auditSource(source, options) {
702
754
  hubPagesMaxSiblings: options?.rules?.hubPagesMaxSiblings ?? DEFAULTS.hubPagesMaxSiblings,
703
755
  titleOverlapThreshold: options?.rules?.titleOverlapThreshold ?? DEFAULTS.titleOverlapThreshold,
704
756
  keywordCollisionMinShared: options?.rules?.keywordCollisionMinShared ?? DEFAULTS.keywordCollisionMinShared,
705
- templateCoverageMinPages: options?.rules?.templateCoverageMinPages ?? DEFAULTS.templateCoverageMinPages
757
+ templateCoverageMinPages: options?.rules?.templateCoverageMinPages ?? DEFAULTS.templateCoverageMinPages,
758
+ answerFirstMaxWords: options?.rules?.answerFirstMaxWords ?? DEFAULTS.answerFirstMaxWords,
759
+ citableFactsMin: options?.rules?.citableFactsMin ?? DEFAULTS.citableFactsMin,
760
+ citableFactsTarget: options?.rules?.citableFactsTarget ?? DEFAULTS.citableFactsTarget,
761
+ freshnessMaxStaleDays: options?.rules?.freshnessMaxStaleDays ?? DEFAULTS.freshnessMaxStaleDays,
762
+ modularityMaxParagraphWords: options?.rules?.modularityMaxParagraphWords ?? DEFAULTS.modularityMaxParagraphWords,
763
+ modularityMinSelfContainedRatio: options?.rules?.modularityMinSelfContainedRatio ?? DEFAULTS.modularityMinSelfContainedRatio,
764
+ faqMinQuestionHeadings: options?.rules?.faqMinQuestionHeadings ?? DEFAULTS.faqMinQuestionHeadings
706
765
  };
707
766
  const normalizeUrlOptions = mergeNormalizeUrlOptions({
708
767
  stripQuery: options?.rules?.stripUrlQuery ?? true,
@@ -836,9 +895,14 @@ export async function auditSource(source, options) {
836
895
  throw new Error(`Invalid regex flags "${rawFlags}" in entityPatterns for placeholder "${p.placeholder}". ` +
837
896
  `Only the flags g, i, m, s, u, y are permitted.`);
838
897
  }
898
+ // Entity patterns are used with String.replace to mask every occurrence, which
899
+ // requires the `g` flag. Add it if the user forgot — a silently broken "only first
900
+ // match masked" regex would make template-detection rules (answer-first,
901
+ // citable-facts) miss shared openers.
902
+ const normalizedFlags = rawFlags.includes("g") ? rawFlags : `${rawFlags}g`;
839
903
  try {
840
904
  // Flags validated against SAFE_FLAGS_RE above; pattern is from trusted local config, not HTTP input.
841
- return { placeholder: p.placeholder, pattern: new RegExp(p.pattern, rawFlags) }; // nosemgrep
905
+ return { placeholder: p.placeholder, pattern: new RegExp(p.pattern, normalizedFlags) }; // nosemgrep
842
906
  }
843
907
  catch (err) {
844
908
  throw new Error(`Invalid regex pattern for placeholder "${p.placeholder}": ${err.message}`);
@@ -851,8 +915,9 @@ export async function auditSource(source, options) {
851
915
  const allFindings = [...duplicateUrlFindings];
852
916
  const groupScores = {};
853
917
  const groupPageCounts = {};
918
+ const auditMode = options?.mode ?? "full";
854
919
  // Site-wide rules (run once, outside group loop)
855
- if (sitemapUrlSet && sitemapUrlSet.size > 0) {
920
+ if (sitemapUrlSet && sitemapUrlSet.size > 0 && auditMode !== "diff") {
856
921
  const sitemapFindings = sitemapCompletenessRule(parsedPages, sitemapUrlSet);
857
922
  allFindings.push(...sitemapFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
858
923
  if (robotsTxtContent) {
@@ -860,13 +925,24 @@ export async function auditSource(source, options) {
860
925
  allFindings.push(...robotsFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
861
926
  }
862
927
  }
928
+ // AEO site-wide rules. These run unconditionally (consistent with sitemap-completeness
929
+ // and robots-compliance); page-group rule lists govern per-page AEO rules only.
930
+ const llmsFindings = await llmsTxtRule(source, { timeoutMs });
931
+ allFindings.push(...llmsFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
932
+ if (robotsTxtContent) {
933
+ const crawlerFindings = crawlerAccessRule(robotsTxtContent);
934
+ allFindings.push(...crawlerFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
935
+ }
863
936
  // Data source comparison rules
864
937
  if (options?.dataSource?.records && options.dataSource.records.length > 0) {
865
- const dataFindings = [
866
- ...dataBindingRule(parsedPages, options.dataSource.records),
867
- ...dataIdenticalRule(parsedPages, options.dataSource.records),
868
- ];
869
- allFindings.push(...dataFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
938
+ if (auditMode !== "diff" || isRuleAllowedInDiff("data/missing-binding")) {
939
+ const dataBindingFindings = dataBindingRule(parsedPages, options.dataSource.records);
940
+ allFindings.push(...dataBindingFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
941
+ }
942
+ if (auditMode !== "diff" || isRuleAllowedInDiff("data/identical-across-pages")) {
943
+ const dataIdenticalFindings = dataIdenticalRule(parsedPages, options.dataSource.records);
944
+ allFindings.push(...dataIdenticalFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
945
+ }
870
946
  }
871
947
  for (const [groupName, groupPages] of classified) {
872
948
  if (groupPages.length === 0)
@@ -876,7 +952,7 @@ export async function auditSource(source, options) {
876
952
  continue;
877
953
  const groupRules = resolveGroupRules(resolvedRules, groupConfig?.overrides);
878
954
  const enabledCheck = (ruleId) => isRuleEnabled(ruleId, groupConfig?.rules);
879
- const findings = runRulesOnPages(groupPages, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, DEFAULT_ENTITY_PATTERNS, groupConfig?.overrides);
955
+ const findings = runRulesOnPages(groupPages, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, DEFAULT_ENTITY_PATTERNS, groupConfig?.overrides, options?.mode ?? "full");
880
956
  allFindings.push(...findings);
881
957
  groupPageCounts[groupName] = groupPages.length;
882
958
  const { score } = scoreFromFindings(findings);