@pseolint/core 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -9
- package/dist/ai/prompt.d.ts +1 -1
- package/dist/ai/prompt.d.ts.map +1 -1
- package/dist/ai/prompt.js +13 -1
- package/dist/ai/prompt.js.map +1 -1
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +122 -46
- package/dist/auditor.js.map +1 -1
- package/dist/formatters/console.d.ts +9 -0
- package/dist/formatters/console.d.ts.map +1 -1
- package/dist/formatters/console.js +53 -0
- package/dist/formatters/console.js.map +1 -1
- package/dist/formatters/html.d.ts.map +1 -1
- package/dist/formatters/html.js +363 -135
- package/dist/formatters/html.js.map +1 -1
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -1
- package/dist/rule-references.d.ts.map +1 -1
- package/dist/rule-references.js +8 -0
- package/dist/rule-references.js.map +1 -1
- package/dist/rules/aeo/answer-first.d.ts +18 -0
- package/dist/rules/aeo/answer-first.d.ts.map +1 -0
- package/dist/rules/aeo/answer-first.js +191 -0
- package/dist/rules/aeo/answer-first.js.map +1 -0
- package/dist/rules/aeo/citable-facts.d.ts +9 -0
- package/dist/rules/aeo/citable-facts.d.ts.map +1 -0
- package/dist/rules/aeo/citable-facts.js +90 -0
- package/dist/rules/aeo/citable-facts.js.map +1 -0
- package/dist/rules/aeo/content-modularity.d.ts +11 -0
- package/dist/rules/aeo/content-modularity.d.ts.map +1 -0
- package/dist/rules/aeo/content-modularity.js +107 -0
- package/dist/rules/aeo/content-modularity.js.map +1 -0
- package/dist/rules/aeo/crawler-access.d.ts +25 -0
- package/dist/rules/aeo/crawler-access.d.ts.map +1 -0
- package/dist/rules/aeo/crawler-access.js +116 -0
- package/dist/rules/aeo/crawler-access.js.map +1 -0
- package/dist/rules/aeo/faq-coverage.d.ts +9 -0
- package/dist/rules/aeo/faq-coverage.d.ts.map +1 -0
- package/dist/rules/aeo/faq-coverage.js +71 -0
- package/dist/rules/aeo/faq-coverage.js.map +1 -0
- package/dist/rules/aeo/freshness-signals.d.ts +9 -0
- package/dist/rules/aeo/freshness-signals.d.ts.map +1 -0
- package/dist/rules/aeo/freshness-signals.js +109 -0
- package/dist/rules/aeo/freshness-signals.js.map +1 -0
- package/dist/rules/aeo/llms-txt.d.ts +24 -0
- package/dist/rules/aeo/llms-txt.d.ts.map +1 -0
- package/dist/rules/aeo/llms-txt.js +93 -0
- package/dist/rules/aeo/llms-txt.js.map +1 -0
- package/dist/rules/aeo/non-replicable-value.d.ts +9 -0
- package/dist/rules/aeo/non-replicable-value.d.ts.map +1 -0
- package/dist/rules/aeo/non-replicable-value.js +95 -0
- package/dist/rules/aeo/non-replicable-value.js.map +1 -0
- package/dist/rules/scope.d.ts +12 -0
- package/dist/rules/scope.d.ts.map +1 -0
- package/dist/rules/scope.js +66 -0
- package/dist/rules/scope.js.map +1 -0
- package/dist/types.d.ts +17 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -24,15 +24,16 @@ console.log(`Findings: ${summary.findings.length}`);
|
|
|
24
24
|
|
|
25
25
|
## What It Checks
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
42 rules across 8 categories. Seven categories feed the composite score; `data/*` is a separate data-binding family.
|
|
28
28
|
|
|
29
|
-
- **Spam / SpamBrain risk** — near-duplicate (SimHash), entity-swap doorways, thin content, boilerplate ratio, template diversity, template coverage, publication velocity, doorway pattern
|
|
30
|
-
- **
|
|
31
|
-
- **
|
|
32
|
-
- **
|
|
33
|
-
- **
|
|
34
|
-
- **
|
|
35
|
-
- **
|
|
29
|
+
- **Spam / SpamBrain risk** (8) — near-duplicate (SimHash), entity-swap doorways, thin content, boilerplate ratio, template diversity, template coverage, publication velocity, doorway pattern
|
|
30
|
+
- **Technical SEO** (8) — canonical consistency, canonical/noindex and robots/noindex conflicts, sitemap completeness, robots compliance, redirect chains, soft 404s, Open Graph, hreflang
|
|
31
|
+
- **AEO / AI Overview citability** (8, v0.3.0) — `llms.txt` presence, AI-crawler access in robots.txt, freshness signals, FAQ coverage, answer-first opener, citable-fact density, non-replicable value, content modularity
|
|
32
|
+
- **Content** (5) — unique value, heading / meta uniqueness, author attribution, E-E-A-T signals
|
|
33
|
+
- **Internal linking** (5) — orphan pages, dead ends, cluster connectivity, hub pages, link depth
|
|
34
|
+
- **Structured data** (3) — JSON-LD validity, required fields, cross-page schema consistency
|
|
35
|
+
- **Cannibalization** (3) — title overlap, keyword collision, URL pattern conflicts
|
|
36
|
+
- **Data binding** (2) — verify rendered pages expose values from a source dataset (missing or identical-across-pages bindings)
|
|
36
37
|
|
|
37
38
|
## API
|
|
38
39
|
|
|
@@ -118,4 +119,4 @@ All AI providers and `playwright-core` are optional peers — you only install t
|
|
|
118
119
|
|
|
119
120
|
## License
|
|
120
121
|
|
|
121
|
-
MIT
|
|
122
|
+
MIT
|
package/dist/ai/prompt.d.ts
CHANGED
package/dist/ai/prompt.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../src/ai/prompt.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAY,MAAM,aAAa,CAAC;AAExD,eAAO,MAAM,cAAc,UAAU,CAAC;AACtC,eAAO,MAAM,sBAAsB,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../src/ai/prompt.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAY,MAAM,aAAa,CAAC;AAExD,eAAO,MAAM,cAAc,UAAU,CAAC;AACtC,eAAO,MAAM,sBAAsB,MAAM,CAAC;AAqB1C,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;CACd;AAED,wBAAgB,eAAe,CAAC,CAAC,EAAE,UAAU,GAAG,MAAM,CAMrD;AAqBD,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,UAAU,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,aAAa,CAqC3F"}
|
package/dist/ai/prompt.js
CHANGED
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
import { createHash } from "node:crypto";
|
|
2
|
-
export const PROMPT_VERSION = "1.
|
|
2
|
+
export const PROMPT_VERSION = "1.1.0";
|
|
3
3
|
export const MAX_FINDINGS_IN_PROMPT = 200;
|
|
4
4
|
const SEVERITY_ORDER = { info: 0, warning: 1, error: 2, critical: 3 };
|
|
5
5
|
const SYSTEM_PROMPT = `You are an SEO audit triage assistant. Given a list of pSEO linter findings, identify 1-5 underlying ROOT CAUSES driving the findings. Group findings by shared underlying problem, not by rule ID. Rank causes by likely SEO impact (highest first).
|
|
6
6
|
|
|
7
|
+
Findings fall into two distinct threat families — treat them as separate root causes, not one combined cause:
|
|
8
|
+
- SpamBrain penalty risk: spam/*, cannibal/*, content/*, data/*, tech/*, schema/*, links/* — these make Google penalize or demote the site.
|
|
9
|
+
- AI Overview invisibility: aeo/* — these make pages uncitable in AI answer engines (ChatGPT, Perplexity, Gemini, AI Overviews). Sites not cited lose ~68% of traffic vs ~12% for cited sites.
|
|
10
|
+
|
|
11
|
+
When both families are present, produce at least one root cause from each. Label AEO root causes clearly (e.g. "AI Overviews: ...") so the user can tell them apart from penalty risks.
|
|
12
|
+
|
|
7
13
|
Rules:
|
|
8
14
|
- Emit rootCauses FIRST, then narrative — do not reverse this order.
|
|
9
15
|
- Keep each rootCause label <= 80 chars and phrase it as a problem statement.
|
|
@@ -31,11 +37,17 @@ export function buildPromptRequest(findings, pageCount) {
|
|
|
31
37
|
pageUrl: f.pageUrl,
|
|
32
38
|
group: f.group,
|
|
33
39
|
}));
|
|
40
|
+
const countByCategory = {};
|
|
41
|
+
for (const f of findings) {
|
|
42
|
+
const cat = f.ruleId.split("/")[0];
|
|
43
|
+
countByCategory[cat] = (countByCategory[cat] ?? 0) + 1;
|
|
44
|
+
}
|
|
34
45
|
const payload = {
|
|
35
46
|
totalFindings: total,
|
|
36
47
|
pageCount,
|
|
37
48
|
truncated,
|
|
38
49
|
findings: projected,
|
|
50
|
+
findingCountByCategory: countByCategory,
|
|
39
51
|
};
|
|
40
52
|
if (truncated) {
|
|
41
53
|
const counts = {};
|
package/dist/ai/prompt.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"prompt.js","sourceRoot":"","sources":["../../src/ai/prompt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGzC,MAAM,CAAC,MAAM,cAAc,GAAG,OAAO,CAAC;AACtC,MAAM,CAAC,MAAM,sBAAsB,GAAG,GAAG,CAAC;AAE1C,MAAM,cAAc,GAA6B,EAAE,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;AAEhG,MAAM,aAAa,GAAG
|
|
1
|
+
{"version":3,"file":"prompt.js","sourceRoot":"","sources":["../../src/ai/prompt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGzC,MAAM,CAAC,MAAM,cAAc,GAAG,OAAO,CAAC;AACtC,MAAM,CAAC,MAAM,sBAAsB,GAAG,GAAG,CAAC;AAE1C,MAAM,cAAc,GAA6B,EAAE,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;AAEhG,MAAM,aAAa,GAAG;;;;;;;;;;;;;;;sDAegC,CAAC;AAOvD,MAAM,UAAU,eAAe,CAAC,CAAa;IAC3C,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC;SAC9B,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC;SAC3C,MAAM,CAAC,KAAK,CAAC;SACb,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACf,OAAO,GAAG,CAAC,CAAC,MAAM,IAAI,IAAI,EAAE,CAAC;AAC/B,CAAC;AAqBD,MAAM,UAAU,kBAAkB,CAAC,QAAsB,EAAE,SAAiB;IAC1E,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC;IAC9B,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;IACrG,MAAM,SAAS,GAAG,KAAK,GAAG,sBAAsB,CAAC;IACjD,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,sBAAsB,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAqB,EAAE,CAAC,CAAC;QACvF,EAAE,EAAE,eAAe,CAAC,CAAC,CAAC;QACtB,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,QAAQ,EAAE,CAAC,CAAC,QAAQ;QACpB,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,KAAK,EAAE,CAAC,CAAC,KAAK;KACf,CAAC,CAAC,CAAC;IAEJ,MAAM,eAAe,GAA2B,EAAE,CAAC;IACnD,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,MAAM,GAAG,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACnC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACzD,CAAC;IAED,MAAM,OAAO,GAAkB;QAC7B,aAAa,EAAE,KAAK;QACpB,SAAS;QACT,SAAS;QACT,QAAQ,EAAE,SAAS;QACnB,sBAAsB,EAAE,eAAe;KACxC,CAAC;IAEF,IAAI,SAAS,EAAE,CAAC;QACd,MAAM,MAAM,GAA2B,EAAE,CAAC;QAC1C,KAAK,MAAM,CAAC,IAAI,QAAQ;YAAE,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QACzE,OAAO,CAAC,kBAAkB,GAAG,MAAM,CAAC;IACtC,CAAC;IAED,OAAO;QACL,MAAM,EAAE,aAAa;QACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;KAC9B,CAAC;AACJ,CAAC"}
|
package/dist/auditor.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"AA6DA,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAwG,MAAM,YAAY,CAAC;AAu0BnK,wBAAsB,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CAgf/F"}
|
package/dist/auditor.js
CHANGED
|
@@ -27,6 +27,14 @@ import { ogCompletenessRule } from "./rules/tech/og-completeness.js";
|
|
|
27
27
|
import { robotsNoindexConflictRule } from "./rules/tech/robots-noindex-conflict.js";
|
|
28
28
|
import { sitemapCompletenessRule } from "./rules/tech/sitemap-completeness.js";
|
|
29
29
|
import { robotsComplianceRule, parseDisallowPatterns, isBlockedByPattern, parseCrawlDelaySeconds } from "./rules/tech/robots-sitemap-presence.js";
|
|
30
|
+
import { llmsTxtRule } from "./rules/aeo/llms-txt.js";
|
|
31
|
+
import { crawlerAccessRule } from "./rules/aeo/crawler-access.js";
|
|
32
|
+
import { freshnessSignalsRule } from "./rules/aeo/freshness-signals.js";
|
|
33
|
+
import { faqCoverageRule } from "./rules/aeo/faq-coverage.js";
|
|
34
|
+
import { answerFirstRule } from "./rules/aeo/answer-first.js";
|
|
35
|
+
import { citableFactsRule } from "./rules/aeo/citable-facts.js";
|
|
36
|
+
import { nonReplicableValueRule } from "./rules/aeo/non-replicable-value.js";
|
|
37
|
+
import { contentModularityRule } from "./rules/aeo/content-modularity.js";
|
|
30
38
|
import { redirectChainRule } from "./rules/tech/redirect-chain.js";
|
|
31
39
|
import { soft404Rule } from "./rules/tech/soft-404.js";
|
|
32
40
|
import { jsonLdValidRule } from "./rules/schema/json-ld-valid.js";
|
|
@@ -38,6 +46,7 @@ import { urlPatternRule } from "./rules/cannibal/url-pattern.js";
|
|
|
38
46
|
import { templateCoverageRule } from "./rules/spam/template-coverage.js";
|
|
39
47
|
import { dataBindingRule, dataIdenticalRule } from "./rules/data/data-binding.js";
|
|
40
48
|
import { classifyPages, isRuleEnabled } from "./page-classifier.js";
|
|
49
|
+
import { isRuleAllowedInDiff } from "./rules/scope.js";
|
|
41
50
|
import { RULE_REFERENCES } from "./rule-references.js";
|
|
42
51
|
import { enrichFindings } from "./enrich-findings.js";
|
|
43
52
|
import { triageFindings } from "./ai/triage.js";
|
|
@@ -61,13 +70,21 @@ const DEFAULTS = {
|
|
|
61
70
|
hubPagesMaxSiblings: 50,
|
|
62
71
|
titleOverlapThreshold: 0.8,
|
|
63
72
|
keywordCollisionMinShared: 6,
|
|
64
|
-
templateCoverageMinPages: 5
|
|
73
|
+
templateCoverageMinPages: 5,
|
|
74
|
+
answerFirstMaxWords: 100,
|
|
75
|
+
citableFactsMin: 3,
|
|
76
|
+
citableFactsTarget: 8,
|
|
77
|
+
freshnessMaxStaleDays: 180,
|
|
78
|
+
modularityMaxParagraphWords: 200,
|
|
79
|
+
modularityMinSelfContainedRatio: 0.7,
|
|
80
|
+
faqMinQuestionHeadings: 2
|
|
65
81
|
};
|
|
66
82
|
const CATEGORY_WEIGHTS = {
|
|
67
|
-
spam: 0.
|
|
68
|
-
content: 0.
|
|
69
|
-
|
|
70
|
-
|
|
83
|
+
spam: 0.35,
|
|
84
|
+
content: 0.2,
|
|
85
|
+
aeo: 0.15,
|
|
86
|
+
links: 0.12,
|
|
87
|
+
tech: 0.08,
|
|
71
88
|
schema: 0.05,
|
|
72
89
|
cannibal: 0.05,
|
|
73
90
|
/** Dedup / crawl hygiene; does not affect composite score. */
|
|
@@ -93,8 +110,9 @@ function resolveGroupRules(baseRules, overrides) {
|
|
|
93
110
|
}
|
|
94
111
|
return result;
|
|
95
112
|
}
|
|
96
|
-
function runRulesOnPages(pages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, entityPatterns, overrides) {
|
|
113
|
+
function runRulesOnPages(pages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, entityPatterns, overrides, mode = "full") {
|
|
97
114
|
const findings = [];
|
|
115
|
+
const modeOk = (ruleId) => mode !== "diff" || isRuleAllowedInDiff(ruleId);
|
|
98
116
|
const tag = (results) => results.map((r) => {
|
|
99
117
|
const override = overrides?.[r.ruleId];
|
|
100
118
|
return {
|
|
@@ -106,106 +124,137 @@ function runRulesOnPages(pages, resolvedRules, isEnabled, groupName, knownUrls,
|
|
|
106
124
|
});
|
|
107
125
|
// Spam rules — always compute cross-page data, only push findings if enabled
|
|
108
126
|
const nearDuplicate = nearDuplicateRule(pages, resolvedRules.nearDuplicateThreshold);
|
|
109
|
-
if (isEnabled("spam/near-duplicate")) {
|
|
127
|
+
if (isEnabled("spam/near-duplicate") && modeOk("spam/near-duplicate")) {
|
|
110
128
|
findings.push(...tag(nearDuplicate.findings));
|
|
111
129
|
}
|
|
112
130
|
const entitySwap = entitySwapRule(pages, entityPatterns, resolvedRules.entitySwapThreshold);
|
|
113
|
-
if (isEnabled("spam/entity-swap")) {
|
|
131
|
+
if (isEnabled("spam/entity-swap") && modeOk("spam/entity-swap")) {
|
|
114
132
|
findings.push(...tag(entitySwap.findings));
|
|
115
133
|
}
|
|
116
134
|
const thinContent = thinContentRule(pages, resolvedRules.thinContentMinWords);
|
|
117
|
-
if (isEnabled("spam/thin-content")) {
|
|
135
|
+
if (isEnabled("spam/thin-content") && modeOk("spam/thin-content")) {
|
|
118
136
|
findings.push(...tag(thinContent.findings));
|
|
119
137
|
}
|
|
120
|
-
if (isEnabled("spam/doorway-pattern")) {
|
|
138
|
+
if (isEnabled("spam/doorway-pattern") && modeOk("spam/doorway-pattern")) {
|
|
121
139
|
findings.push(...tag(doorwayPatternRule(nearDuplicate.pairs, entitySwap.pairs, thinContent.thinContentUrls, pages)));
|
|
122
140
|
}
|
|
123
|
-
if (isEnabled("spam/publication-velocity")) {
|
|
141
|
+
if (isEnabled("spam/publication-velocity") && modeOk("spam/publication-velocity")) {
|
|
124
142
|
findings.push(...tag(publicationVelocityRule(pages, resolvedRules.publicationVelocityMaxPerDay)));
|
|
125
143
|
}
|
|
126
|
-
if (isEnabled("spam/boilerplate-ratio")) {
|
|
144
|
+
if (isEnabled("spam/boilerplate-ratio") && modeOk("spam/boilerplate-ratio")) {
|
|
127
145
|
findings.push(...tag(boilerplateRatioRule(pages, resolvedRules.boilerplateMaxRatio)));
|
|
128
146
|
}
|
|
129
|
-
if (isEnabled("spam/template-diversity")) {
|
|
147
|
+
if (isEnabled("spam/template-diversity") && modeOk("spam/template-diversity")) {
|
|
130
148
|
findings.push(...tag(templateDiversityRule(pages, resolvedRules.templateDiversityMinUniqueRatio)));
|
|
131
149
|
}
|
|
132
|
-
if (isEnabled("spam/template-coverage")) {
|
|
150
|
+
if (isEnabled("spam/template-coverage") && modeOk("spam/template-coverage")) {
|
|
133
151
|
findings.push(...tag(templateCoverageRule(pages, entityPatterns, resolvedRules.templateCoverageMinPages)));
|
|
134
152
|
}
|
|
135
153
|
// Content rules
|
|
136
|
-
if (isEnabled("content/unique-value")) {
|
|
154
|
+
if (isEnabled("content/unique-value") && modeOk("content/unique-value")) {
|
|
137
155
|
findings.push(...tag(uniqueValueRule(pages, resolvedRules.uniqueValueMinWords)));
|
|
138
156
|
}
|
|
139
|
-
if (isEnabled("content/heading-uniqueness")) {
|
|
157
|
+
if (isEnabled("content/heading-uniqueness") && modeOk("content/heading-uniqueness")) {
|
|
140
158
|
findings.push(...tag(headingUniquenessRule(pages, entityPatterns)));
|
|
141
159
|
}
|
|
142
|
-
if (isEnabled("content/meta-uniqueness")) {
|
|
160
|
+
if (isEnabled("content/meta-uniqueness") && modeOk("content/meta-uniqueness")) {
|
|
143
161
|
findings.push(...tag(metaUniquenessRule(pages, entityPatterns, resolvedRules.metaUniquenessMinJaccard)));
|
|
144
162
|
}
|
|
145
|
-
if (isEnabled("content/missing-author")) {
|
|
163
|
+
if (isEnabled("content/missing-author") && modeOk("content/missing-author")) {
|
|
146
164
|
findings.push(...tag(missingAuthorRule(pages)));
|
|
147
165
|
}
|
|
148
|
-
if (isEnabled("content/eeat-signals")) {
|
|
166
|
+
if (isEnabled("content/eeat-signals") && modeOk("content/eeat-signals")) {
|
|
149
167
|
findings.push(...tag(eeatSignalsRule(pages)));
|
|
150
168
|
}
|
|
151
169
|
// Link rules — use the global link graph
|
|
152
|
-
if (isEnabled("links/orphan-pages")) {
|
|
170
|
+
if (isEnabled("links/orphan-pages") && modeOk("links/orphan-pages")) {
|
|
153
171
|
findings.push(...tag(orphanPagesRule(pages, inbound, rootUrl)));
|
|
154
172
|
}
|
|
155
|
-
if (isEnabled("links/dead-ends")) {
|
|
173
|
+
if (isEnabled("links/dead-ends") && modeOk("links/dead-ends")) {
|
|
156
174
|
findings.push(...tag(deadEndsRule(pages, knownUrls, rootUrl)));
|
|
157
175
|
}
|
|
158
|
-
if (isEnabled("links/link-depth")) {
|
|
176
|
+
if (isEnabled("links/link-depth") && modeOk("links/link-depth")) {
|
|
159
177
|
if (rootUrl) {
|
|
160
178
|
findings.push(...tag(linkDepthRule(pages, adjacency, rootUrl, resolvedRules.linkDepthMaxClicks, inbound)));
|
|
161
179
|
}
|
|
162
180
|
}
|
|
163
|
-
if (isEnabled("links/cluster-connectivity")) {
|
|
181
|
+
if (isEnabled("links/cluster-connectivity") && modeOk("links/cluster-connectivity")) {
|
|
164
182
|
findings.push(...tag(clusterConnectivityRule(pages, knownUrls)));
|
|
165
183
|
}
|
|
166
|
-
if (isEnabled("links/hub-pages")) {
|
|
184
|
+
if (isEnabled("links/hub-pages") && modeOk("links/hub-pages")) {
|
|
167
185
|
findings.push(...tag(hubPagesRule(pages, knownUrls, resolvedRules.hubPagesMinSiblings, resolvedRules.hubPagesMaxSiblings)));
|
|
168
186
|
}
|
|
169
187
|
// Tech rules
|
|
170
|
-
if (isEnabled("tech/canonical-consistency")) {
|
|
188
|
+
if (isEnabled("tech/canonical-consistency") && modeOk("tech/canonical-consistency")) {
|
|
171
189
|
findings.push(...tag(canonicalConsistencyRule(pages, knownUrls, normalizeUrlOptions)));
|
|
172
190
|
}
|
|
173
|
-
if (isEnabled("tech/canonical-noindex-conflict")) {
|
|
191
|
+
if (isEnabled("tech/canonical-noindex-conflict") && modeOk("tech/canonical-noindex-conflict")) {
|
|
174
192
|
findings.push(...tag(canonicalNoindexConflictRule(pages, normalizeUrlOptions)));
|
|
175
193
|
}
|
|
176
|
-
if (isEnabled("tech/robots-noindex-conflict")) {
|
|
194
|
+
if (isEnabled("tech/robots-noindex-conflict") && modeOk("tech/robots-noindex-conflict")) {
|
|
177
195
|
findings.push(...tag(robotsNoindexConflictRule(pages, inbound)));
|
|
178
196
|
}
|
|
179
|
-
if (isEnabled("tech/redirect-chain")) {
|
|
197
|
+
if (isEnabled("tech/redirect-chain") && modeOk("tech/redirect-chain")) {
|
|
180
198
|
findings.push(...tag(redirectChainRule(pages)));
|
|
181
199
|
}
|
|
182
|
-
if (isEnabled("tech/soft-404")) {
|
|
200
|
+
if (isEnabled("tech/soft-404") && modeOk("tech/soft-404")) {
|
|
183
201
|
findings.push(...tag(soft404Rule(pages)));
|
|
184
202
|
}
|
|
185
|
-
if (isEnabled("tech/og-completeness")) {
|
|
203
|
+
if (isEnabled("tech/og-completeness") && modeOk("tech/og-completeness")) {
|
|
186
204
|
findings.push(...tag(ogCompletenessRule(pages)));
|
|
187
205
|
}
|
|
188
|
-
if (isEnabled("tech/hreflang-consistency")) {
|
|
206
|
+
if (isEnabled("tech/hreflang-consistency") && modeOk("tech/hreflang-consistency")) {
|
|
189
207
|
findings.push(...tag(hreflangConsistencyRule(pages, normalizeUrlOptions)));
|
|
190
208
|
}
|
|
191
209
|
// Schema rules
|
|
192
|
-
if (isEnabled("schema/json-ld-valid")) {
|
|
210
|
+
if (isEnabled("schema/json-ld-valid") && modeOk("schema/json-ld-valid")) {
|
|
193
211
|
findings.push(...tag(jsonLdValidRule(pages)));
|
|
194
212
|
}
|
|
195
|
-
if (isEnabled("schema/required-fields")) {
|
|
213
|
+
if (isEnabled("schema/required-fields") && modeOk("schema/required-fields")) {
|
|
196
214
|
findings.push(...tag(requiredFieldsRule(pages)));
|
|
197
215
|
}
|
|
198
|
-
if (isEnabled("schema/consistency")) {
|
|
216
|
+
if (isEnabled("schema/consistency") && modeOk("schema/consistency")) {
|
|
199
217
|
findings.push(...tag(schemaConsistencyRule(pages)));
|
|
200
218
|
}
|
|
219
|
+
// AEO rules
|
|
220
|
+
if (isEnabled("aeo/freshness-signals")) {
|
|
221
|
+
findings.push(...tag(freshnessSignalsRule(pages, {
|
|
222
|
+
maxStaleDays: resolvedRules.freshnessMaxStaleDays,
|
|
223
|
+
})));
|
|
224
|
+
}
|
|
225
|
+
if (isEnabled("aeo/faq-coverage")) {
|
|
226
|
+
findings.push(...tag(faqCoverageRule(pages, {
|
|
227
|
+
minQuestionHeadings: resolvedRules.faqMinQuestionHeadings,
|
|
228
|
+
})));
|
|
229
|
+
}
|
|
230
|
+
if (isEnabled("aeo/answer-first")) {
|
|
231
|
+
findings.push(...tag(answerFirstRule(pages, entityPatterns, {
|
|
232
|
+
maxFirstParagraphWords: resolvedRules.answerFirstMaxWords,
|
|
233
|
+
})));
|
|
234
|
+
}
|
|
235
|
+
if (isEnabled("aeo/citable-facts")) {
|
|
236
|
+
findings.push(...tag(citableFactsRule(pages, entityPatterns, {
|
|
237
|
+
minFactsPerPage: resolvedRules.citableFactsMin,
|
|
238
|
+
targetFactsPerPage: resolvedRules.citableFactsTarget,
|
|
239
|
+
})));
|
|
240
|
+
}
|
|
241
|
+
if (isEnabled("aeo/non-replicable-value")) {
|
|
242
|
+
findings.push(...tag(nonReplicableValueRule(pages)));
|
|
243
|
+
}
|
|
244
|
+
if (isEnabled("aeo/content-modularity")) {
|
|
245
|
+
findings.push(...tag(contentModularityRule(pages, {
|
|
246
|
+
maxParagraphWords: resolvedRules.modularityMaxParagraphWords,
|
|
247
|
+
minSelfContainedRatio: resolvedRules.modularityMinSelfContainedRatio,
|
|
248
|
+
})));
|
|
249
|
+
}
|
|
201
250
|
// Cannibal rules
|
|
202
|
-
if (isEnabled("cannibal/title-overlap")) {
|
|
251
|
+
if (isEnabled("cannibal/title-overlap") && modeOk("cannibal/title-overlap")) {
|
|
203
252
|
findings.push(...tag(titleOverlapRule(pages, entityPatterns, resolvedRules.titleOverlapThreshold)));
|
|
204
253
|
}
|
|
205
|
-
if (isEnabled("cannibal/keyword-collision")) {
|
|
254
|
+
if (isEnabled("cannibal/keyword-collision") && modeOk("cannibal/keyword-collision")) {
|
|
206
255
|
findings.push(...tag(keywordCollisionRule(pages, resolvedRules.keywordCollisionMinShared)));
|
|
207
256
|
}
|
|
208
|
-
if (isEnabled("cannibal/url-pattern")) {
|
|
257
|
+
if (isEnabled("cannibal/url-pattern") && modeOk("cannibal/url-pattern")) {
|
|
209
258
|
findings.push(...tag(urlPatternRule(pages)));
|
|
210
259
|
}
|
|
211
260
|
return findings;
|
|
@@ -223,6 +272,7 @@ function scoreFromFindings(findings) {
|
|
|
223
272
|
const raw = {
|
|
224
273
|
spam: 0,
|
|
225
274
|
content: 0,
|
|
275
|
+
aeo: 0,
|
|
226
276
|
links: 0,
|
|
227
277
|
tech: 0,
|
|
228
278
|
schema: 0,
|
|
@@ -238,6 +288,7 @@ function scoreFromFindings(findings) {
|
|
|
238
288
|
}
|
|
239
289
|
const weighted = raw.spam * CATEGORY_WEIGHTS.spam +
|
|
240
290
|
raw.content * CATEGORY_WEIGHTS.content +
|
|
291
|
+
raw.aeo * CATEGORY_WEIGHTS.aeo +
|
|
241
292
|
raw.links * CATEGORY_WEIGHTS.links +
|
|
242
293
|
raw.tech * CATEGORY_WEIGHTS.tech +
|
|
243
294
|
raw.schema * CATEGORY_WEIGHTS.schema +
|
|
@@ -248,6 +299,7 @@ function scoreFromFindings(findings) {
|
|
|
248
299
|
categoryScores: {
|
|
249
300
|
spam: raw.spam,
|
|
250
301
|
content: raw.content,
|
|
302
|
+
aeo: raw.aeo,
|
|
251
303
|
links: raw.links,
|
|
252
304
|
tech: raw.tech,
|
|
253
305
|
schema: raw.schema,
|
|
@@ -702,7 +754,14 @@ export async function auditSource(source, options) {
|
|
|
702
754
|
hubPagesMaxSiblings: options?.rules?.hubPagesMaxSiblings ?? DEFAULTS.hubPagesMaxSiblings,
|
|
703
755
|
titleOverlapThreshold: options?.rules?.titleOverlapThreshold ?? DEFAULTS.titleOverlapThreshold,
|
|
704
756
|
keywordCollisionMinShared: options?.rules?.keywordCollisionMinShared ?? DEFAULTS.keywordCollisionMinShared,
|
|
705
|
-
templateCoverageMinPages: options?.rules?.templateCoverageMinPages ?? DEFAULTS.templateCoverageMinPages
|
|
757
|
+
templateCoverageMinPages: options?.rules?.templateCoverageMinPages ?? DEFAULTS.templateCoverageMinPages,
|
|
758
|
+
answerFirstMaxWords: options?.rules?.answerFirstMaxWords ?? DEFAULTS.answerFirstMaxWords,
|
|
759
|
+
citableFactsMin: options?.rules?.citableFactsMin ?? DEFAULTS.citableFactsMin,
|
|
760
|
+
citableFactsTarget: options?.rules?.citableFactsTarget ?? DEFAULTS.citableFactsTarget,
|
|
761
|
+
freshnessMaxStaleDays: options?.rules?.freshnessMaxStaleDays ?? DEFAULTS.freshnessMaxStaleDays,
|
|
762
|
+
modularityMaxParagraphWords: options?.rules?.modularityMaxParagraphWords ?? DEFAULTS.modularityMaxParagraphWords,
|
|
763
|
+
modularityMinSelfContainedRatio: options?.rules?.modularityMinSelfContainedRatio ?? DEFAULTS.modularityMinSelfContainedRatio,
|
|
764
|
+
faqMinQuestionHeadings: options?.rules?.faqMinQuestionHeadings ?? DEFAULTS.faqMinQuestionHeadings
|
|
706
765
|
};
|
|
707
766
|
const normalizeUrlOptions = mergeNormalizeUrlOptions({
|
|
708
767
|
stripQuery: options?.rules?.stripUrlQuery ?? true,
|
|
@@ -836,9 +895,14 @@ export async function auditSource(source, options) {
|
|
|
836
895
|
throw new Error(`Invalid regex flags "${rawFlags}" in entityPatterns for placeholder "${p.placeholder}". ` +
|
|
837
896
|
`Only the flags g, i, m, s, u, y are permitted.`);
|
|
838
897
|
}
|
|
898
|
+
// Entity patterns are used with String.replace to mask every occurrence, which
|
|
899
|
+
// requires the `g` flag. Add it if the user forgot — a silently broken "only first
|
|
900
|
+
// match masked" regex would make template-detection rules (answer-first,
|
|
901
|
+
// citable-facts) miss shared openers.
|
|
902
|
+
const normalizedFlags = rawFlags.includes("g") ? rawFlags : `${rawFlags}g`;
|
|
839
903
|
try {
|
|
840
904
|
// Flags validated against SAFE_FLAGS_RE above; pattern is from trusted local config, not HTTP input.
|
|
841
|
-
return { placeholder: p.placeholder, pattern: new RegExp(p.pattern,
|
|
905
|
+
return { placeholder: p.placeholder, pattern: new RegExp(p.pattern, normalizedFlags) }; // nosemgrep
|
|
842
906
|
}
|
|
843
907
|
catch (err) {
|
|
844
908
|
throw new Error(`Invalid regex pattern for placeholder "${p.placeholder}": ${err.message}`);
|
|
@@ -851,8 +915,9 @@ export async function auditSource(source, options) {
|
|
|
851
915
|
const allFindings = [...duplicateUrlFindings];
|
|
852
916
|
const groupScores = {};
|
|
853
917
|
const groupPageCounts = {};
|
|
918
|
+
const auditMode = options?.mode ?? "full";
|
|
854
919
|
// Site-wide rules (run once, outside group loop)
|
|
855
|
-
if (sitemapUrlSet && sitemapUrlSet.size > 0) {
|
|
920
|
+
if (sitemapUrlSet && sitemapUrlSet.size > 0 && auditMode !== "diff") {
|
|
856
921
|
const sitemapFindings = sitemapCompletenessRule(parsedPages, sitemapUrlSet);
|
|
857
922
|
allFindings.push(...sitemapFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
|
|
858
923
|
if (robotsTxtContent) {
|
|
@@ -860,13 +925,24 @@ export async function auditSource(source, options) {
|
|
|
860
925
|
allFindings.push(...robotsFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
|
|
861
926
|
}
|
|
862
927
|
}
|
|
928
|
+
// AEO site-wide rules. These run unconditionally (consistent with sitemap-completeness
|
|
929
|
+
// and robots-compliance); page-group rule lists govern per-page AEO rules only.
|
|
930
|
+
const llmsFindings = await llmsTxtRule(source, { timeoutMs });
|
|
931
|
+
allFindings.push(...llmsFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
|
|
932
|
+
if (robotsTxtContent) {
|
|
933
|
+
const crawlerFindings = crawlerAccessRule(robotsTxtContent);
|
|
934
|
+
allFindings.push(...crawlerFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
|
|
935
|
+
}
|
|
863
936
|
// Data source comparison rules
|
|
864
937
|
if (options?.dataSource?.records && options.dataSource.records.length > 0) {
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
...
|
|
868
|
-
|
|
869
|
-
|
|
938
|
+
if (auditMode !== "diff" || isRuleAllowedInDiff("data/missing-binding")) {
|
|
939
|
+
const dataBindingFindings = dataBindingRule(parsedPages, options.dataSource.records);
|
|
940
|
+
allFindings.push(...dataBindingFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
|
|
941
|
+
}
|
|
942
|
+
if (auditMode !== "diff" || isRuleAllowedInDiff("data/identical-across-pages")) {
|
|
943
|
+
const dataIdenticalFindings = dataIdenticalRule(parsedPages, options.dataSource.records);
|
|
944
|
+
allFindings.push(...dataIdenticalFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
|
|
945
|
+
}
|
|
870
946
|
}
|
|
871
947
|
for (const [groupName, groupPages] of classified) {
|
|
872
948
|
if (groupPages.length === 0)
|
|
@@ -876,7 +952,7 @@ export async function auditSource(source, options) {
|
|
|
876
952
|
continue;
|
|
877
953
|
const groupRules = resolveGroupRules(resolvedRules, groupConfig?.overrides);
|
|
878
954
|
const enabledCheck = (ruleId) => isRuleEnabled(ruleId, groupConfig?.rules);
|
|
879
|
-
const findings = runRulesOnPages(groupPages, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, DEFAULT_ENTITY_PATTERNS, groupConfig?.overrides);
|
|
955
|
+
const findings = runRulesOnPages(groupPages, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, DEFAULT_ENTITY_PATTERNS, groupConfig?.overrides, options?.mode ?? "full");
|
|
880
956
|
allFindings.push(...findings);
|
|
881
957
|
groupPageCounts[groupName] = groupPages.length;
|
|
882
958
|
const { score } = scoreFromFindings(findings);
|