@pseolint/core 0.2.2 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -9
- package/dist/ai/prompt.d.ts +1 -1
- package/dist/ai/prompt.d.ts.map +1 -1
- package/dist/ai/prompt.js +13 -1
- package/dist/ai/prompt.js.map +1 -1
- package/dist/ai/triage.d.ts +15 -1
- package/dist/ai/triage.d.ts.map +1 -1
- package/dist/ai/triage.js +30 -0
- package/dist/ai/triage.js.map +1 -1
- package/dist/analytics-blocklist.d.ts +28 -0
- package/dist/analytics-blocklist.d.ts.map +1 -0
- package/dist/analytics-blocklist.js +129 -0
- package/dist/analytics-blocklist.js.map +1 -0
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +130 -46
- package/dist/auditor.js.map +1 -1
- package/dist/formatters/console.d.ts +9 -0
- package/dist/formatters/console.d.ts.map +1 -1
- package/dist/formatters/console.js +53 -0
- package/dist/formatters/console.js.map +1 -1
- package/dist/formatters/html.d.ts.map +1 -1
- package/dist/formatters/html.js +557 -144
- package/dist/formatters/html.js.map +1 -1
- package/dist/index.d.ts +14 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +12 -1
- package/dist/index.js.map +1 -1
- package/dist/renderer.d.ts +14 -0
- package/dist/renderer.d.ts.map +1 -1
- package/dist/renderer.js +130 -4
- package/dist/renderer.js.map +1 -1
- package/dist/rule-references.d.ts.map +1 -1
- package/dist/rule-references.js +9 -0
- package/dist/rule-references.js.map +1 -1
- package/dist/rules/aeo/answer-first.d.ts +18 -0
- package/dist/rules/aeo/answer-first.d.ts.map +1 -0
- package/dist/rules/aeo/answer-first.js +191 -0
- package/dist/rules/aeo/answer-first.js.map +1 -0
- package/dist/rules/aeo/citable-facts.d.ts +9 -0
- package/dist/rules/aeo/citable-facts.d.ts.map +1 -0
- package/dist/rules/aeo/citable-facts.js +90 -0
- package/dist/rules/aeo/citable-facts.js.map +1 -0
- package/dist/rules/aeo/content-modularity.d.ts +11 -0
- package/dist/rules/aeo/content-modularity.d.ts.map +1 -0
- package/dist/rules/aeo/content-modularity.js +107 -0
- package/dist/rules/aeo/content-modularity.js.map +1 -0
- package/dist/rules/aeo/crawler-access.d.ts +25 -0
- package/dist/rules/aeo/crawler-access.d.ts.map +1 -0
- package/dist/rules/aeo/crawler-access.js +116 -0
- package/dist/rules/aeo/crawler-access.js.map +1 -0
- package/dist/rules/aeo/faq-coverage.d.ts +9 -0
- package/dist/rules/aeo/faq-coverage.d.ts.map +1 -0
- package/dist/rules/aeo/faq-coverage.js +71 -0
- package/dist/rules/aeo/faq-coverage.js.map +1 -0
- package/dist/rules/aeo/freshness-signals.d.ts +9 -0
- package/dist/rules/aeo/freshness-signals.d.ts.map +1 -0
- package/dist/rules/aeo/freshness-signals.js +109 -0
- package/dist/rules/aeo/freshness-signals.js.map +1 -0
- package/dist/rules/aeo/llms-txt.d.ts +24 -0
- package/dist/rules/aeo/llms-txt.d.ts.map +1 -0
- package/dist/rules/aeo/llms-txt.js +93 -0
- package/dist/rules/aeo/llms-txt.js.map +1 -0
- package/dist/rules/aeo/non-replicable-value.d.ts +9 -0
- package/dist/rules/aeo/non-replicable-value.d.ts.map +1 -0
- package/dist/rules/aeo/non-replicable-value.js +95 -0
- package/dist/rules/aeo/non-replicable-value.js.map +1 -0
- package/dist/rules/aeo/summary-bait.d.ts +20 -0
- package/dist/rules/aeo/summary-bait.d.ts.map +1 -0
- package/dist/rules/aeo/summary-bait.js +147 -0
- package/dist/rules/aeo/summary-bait.js.map +1 -0
- package/dist/rules/scope.d.ts +12 -0
- package/dist/rules/scope.d.ts.map +1 -0
- package/dist/rules/scope.js +67 -0
- package/dist/rules/scope.js.map +1 -0
- package/dist/types.d.ts +30 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +3 -3
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
const FACT_PATTERNS = [
|
|
2
|
+
{ name: "dollar", regex: /\$[\d,]+(\.\d{2})?/g },
|
|
3
|
+
{ name: "percent", regex: /\b\d+(\.\d+)?\s*%/g },
|
|
4
|
+
{
|
|
5
|
+
name: "timeframe",
|
|
6
|
+
regex: /\b\d+(?:-\d+)?\s*(business\s+days?|days?|weeks?|months?|years?|hours?|minutes?)\b/gi,
|
|
7
|
+
},
|
|
8
|
+
{
|
|
9
|
+
name: "date",
|
|
10
|
+
regex: /\b(january|february|march|april|may|june|july|august|september|october|november|december)\s+\d{1,2}(?:,\s*\d{4})?\b/gi,
|
|
11
|
+
},
|
|
12
|
+
{ name: "isoDate", regex: /\b\d{4}-\d{2}-\d{2}\b/g },
|
|
13
|
+
{ name: "form", regex: /\bForm\s+[A-Z0-9][A-Z0-9-]*\b/g },
|
|
14
|
+
];
|
|
15
|
+
function extractRawFacts(text) {
|
|
16
|
+
const out = new Set();
|
|
17
|
+
for (const { regex } of FACT_PATTERNS) {
|
|
18
|
+
const matches = text.match(regex);
|
|
19
|
+
if (!matches)
|
|
20
|
+
continue;
|
|
21
|
+
for (const m of matches)
|
|
22
|
+
out.add(m.trim().toLowerCase());
|
|
23
|
+
}
|
|
24
|
+
return Array.from(out);
|
|
25
|
+
}
|
|
26
|
+
function applyEntityMask(text, patterns) {
|
|
27
|
+
let out = text;
|
|
28
|
+
for (const p of patterns)
|
|
29
|
+
out = out.replace(p.pattern, p.placeholder);
|
|
30
|
+
return out;
|
|
31
|
+
}
|
|
32
|
+
export function citableFactsRule(pages, entityPatterns, options) {
|
|
33
|
+
const minFacts = options?.minFactsPerPage ?? 3;
|
|
34
|
+
const targetFacts = options?.targetFactsPerPage ?? 8;
|
|
35
|
+
const findings = [];
|
|
36
|
+
// Build a global template-fact set: facts that appear verbatim on a majority of pages
|
|
37
|
+
// after entity masking — those are "template facts", not entity-specific data points.
|
|
38
|
+
//
|
|
39
|
+
// Scaling by page count:
|
|
40
|
+
// n == 1 → no template detection possible, all facts count as entity-specific.
|
|
41
|
+
// n in 2..=5 → fact is template when it appears on ALL pages (strict).
|
|
42
|
+
// n > 5 → fact is template when it appears on >= 50% of pages.
|
|
43
|
+
//
|
|
44
|
+
// Earlier cut-off (`> 5 ? ceil(n*0.5) : n + 1`) made small-sample audits silently
|
|
45
|
+
// ignore template facts, so a 3-page audit with "$70" on every page looked clean.
|
|
46
|
+
const templateThreshold = pages.length <= 1 ? Infinity :
|
|
47
|
+
pages.length <= 5 ? pages.length :
|
|
48
|
+
Math.ceil(pages.length * 0.5);
|
|
49
|
+
const factFrequency = new Map();
|
|
50
|
+
const perPageFacts = new Map();
|
|
51
|
+
for (const page of pages) {
|
|
52
|
+
const masked = applyEntityMask(page.contentText, entityPatterns);
|
|
53
|
+
const rawFacts = extractRawFacts(masked);
|
|
54
|
+
perPageFacts.set(page.url, rawFacts);
|
|
55
|
+
for (const f of rawFacts) {
|
|
56
|
+
factFrequency.set(f, (factFrequency.get(f) ?? 0) + 1);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
const templateFacts = new Set();
|
|
60
|
+
for (const [f, count] of factFrequency.entries()) {
|
|
61
|
+
if (count >= templateThreshold)
|
|
62
|
+
templateFacts.add(f);
|
|
63
|
+
}
|
|
64
|
+
for (const page of pages) {
|
|
65
|
+
const facts = perPageFacts.get(page.url) ?? [];
|
|
66
|
+
const unique = facts.filter((f) => !templateFacts.has(f));
|
|
67
|
+
if (unique.length >= targetFacts)
|
|
68
|
+
continue;
|
|
69
|
+
const severity = unique.length < minFacts ? "error" : "warning";
|
|
70
|
+
const templateDrag = facts.length - unique.length;
|
|
71
|
+
const templateNote = templateDrag > 0
|
|
72
|
+
? ` (${templateDrag} additional fact${templateDrag === 1 ? "" : "s"} appear on most pages and don't count as entity-specific)`
|
|
73
|
+
: "";
|
|
74
|
+
findings.push({
|
|
75
|
+
ruleId: "aeo/citable-facts",
|
|
76
|
+
severity,
|
|
77
|
+
message: `${page.url} has ${unique.length} unique citable fact${unique.length === 1 ? "" : "s"}${templateNote}. ` +
|
|
78
|
+
`AI Overviews cite specific numbers and named references.`,
|
|
79
|
+
pageUrl: page.url,
|
|
80
|
+
fix: `Replace vague language ("varies", "several weeks", "affordable", "many options") with ` +
|
|
81
|
+
`specific values a reader or AI engine would cite: exact prices, percentages, dates, ` +
|
|
82
|
+
`timeframes, version numbers, named products, standards, or regulations that apply to this page. ` +
|
|
83
|
+
`For pSEO templates, bind these values from your data source so each page gets ` +
|
|
84
|
+
`entity-specific numbers instead of repeating the same template wording. ` +
|
|
85
|
+
`Target: ${targetFacts}+ unique facts per page.`,
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
return findings;
|
|
89
|
+
}
|
|
90
|
+
//# sourceMappingURL=citable-facts.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"citable-facts.js","sourceRoot":"","sources":["../../../src/rules/aeo/citable-facts.ts"],"names":[],"mappings":"AASA,MAAM,aAAa,GAA2C;IAC5D,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,qBAAqB,EAAE;IAChD,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,oBAAoB,EAAE;IAChD;QACE,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,qFAAqF;KAC7F;IACD;QACE,IAAI,EAAE,MAAM;QACZ,KAAK,EACH,uHAAuH;KAC1H;IACD,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,wBAAwB,EAAE;IACpD,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,gCAAgC,EAAE;CAC1D,CAAC;AAEF,SAAS,eAAe,CAAC,IAAY;IACnC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,EAAE,KAAK,EAAE,IAAI,aAAa,EAAE,CAAC;QACtC,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAClC,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,KAAK,MAAM,CAAC,IAAI,OAAO;YAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,QAA6B;IAClE,IAAI,GAAG,GAAG,IAAI,CAAC;IACf,KAAK,MAAM,CAAC,IAAI,QAAQ;QAAE,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,WAAW,CAAC,CAAC;IACtE,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,KAAmB,EACnB,cAAmC,EACnC,OAA6B;IAE7B,MAAM,QAAQ,GAAG,OAAO,EAAE,eAAe,IAAI,CAAC,CAAC;IAC/C,MAAM,WAAW,GAAG,OAAO,EAAE,kBAAkB,IAAI,CAAC,CAAC;IACrD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,sFAAsF;IACtF,sFAAsF;IACtF,EAAE;IACF,yBAAyB;IACzB,sFAAsF;IACtF,0EAA0E;IAC1E,uEAAuE;IACvE,EAAE;IACF,kFAAkF;IAClF,kFAAkF;IAClF,MAAM,iBAAiB,GACrB,KAAK,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QAC9B,KAAK,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YAClC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;IAEhC,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB,CAAC;IAChD,MAAM,YAAY,GAAG,IAAI,GAAG,EAAoB,CAAC;IAEjD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,eAAe,CAAC,IAAI,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;QACjE,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC;QACzC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QACrC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,aAAa,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED,MAAM,aAAa,GAAG,IAAI,GAAG,EAAU,CAAC;IACxC,KAAK,MAAM,CAAC,CAAC,EAAE,KAAK,CAAC,IAAI,aAAa,CAAC,OAAO,EAAE,EAAE,CAAC;QACjD,IAAI,KAAK,IAAI,iBAAiB;YAAE,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACvD,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QAC/C,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAE1D,IAAI,MAAM,CAAC,MAAM,IAAI,WAAW;YAAE,SAAS;QAE3C,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;QAChE,MAAM,YAAY,GAAG,KAAK,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAClD,MAAM,YAAY,GAAG,YAAY,GAAG,CAAC;YACnC,CAAC,CAAC,KAAK,YAAY,mBAAmB,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,2DAA2D;YAC9H,CAAC,CAAC,EAAE,CAAC;QAEP,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,mBAAmB;YAC3B,QAAQ;YACR,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,QAAQ,MAAM,CAAC,MAAM,uBAAuB,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,YAAY,IAAI;gBACxG,0DAA0D;YAC5D,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EACD,wFAAwF;gBACxF,sFAAsF;gBACtF,kGAAkG;gBAClG,gFAAgF;gBAChF,0EAA0E;gBAC1E,WAAW,WAAW,0BAA0B;SACnD,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
+
export interface ContentModularityOptions {
|
|
3
|
+
/** Paragraphs longer than this many words are flagged. Default: 200. */
|
|
4
|
+
maxParagraphWords?: number;
|
|
5
|
+
/** Fraction of sections that must be self-contained to pass. Default: 0.7. */
|
|
6
|
+
minSelfContainedRatio?: number;
|
|
7
|
+
/** Extra cross-reference regexes (merged with defaults). */
|
|
8
|
+
crossRefPatterns?: RegExp[];
|
|
9
|
+
}
|
|
10
|
+
export declare function contentModularityRule(pages: ParsedPage[], options?: ContentModularityOptions): RuleResult[];
|
|
11
|
+
//# sourceMappingURL=content-modularity.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content-modularity.d.ts","sourceRoot":"","sources":["../../../src/rules/aeo/content-modularity.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,MAAM,WAAW,wBAAwB;IACvC,wEAAwE;IACxE,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,8EAA8E;IAC9E,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,4DAA4D;IAC5D,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;CAC7B;AA+FD,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,UAAU,EAAE,EACnB,OAAO,CAAC,EAAE,wBAAwB,GACjC,UAAU,EAAE,CAyCd"}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { load } from "cheerio";
|
|
2
|
+
const DEFAULT_CROSS_REF_RULES = [
|
|
3
|
+
{ re: /\b(as\s+)?(mentioned|noted|discussed|shown)\s+(above|below|earlier|previously|later)\b/i, label: "'as mentioned above'" },
|
|
4
|
+
{ re: /\bin\s+the\s+(previous|next|following|preceding)\s+section\b/i, label: "'in the previous/next section'" },
|
|
5
|
+
{ re: /\bsee\s+(above|below|earlier|the\s+section)\b/i, label: "'see above/below'" },
|
|
6
|
+
{ re: /\bas\s+(noted|stated)\s+above\b/i, label: "'as noted above'" },
|
|
7
|
+
];
|
|
8
|
+
// Intentionally conservative — only flag headings that are truly content-free.
|
|
9
|
+
// "FAQ", "Summary", "Conclusion", "Getting Started" are common legitimate headings
|
|
10
|
+
// and were removed from this list after feedback.
|
|
11
|
+
const VAGUE_HEADING_PATTERNS = [
|
|
12
|
+
/^\s*(more\s+information|more\s+info|details|additional\s+details|other|notes?)\s*$/i,
|
|
13
|
+
];
|
|
14
|
+
function splitIntoSections(html) {
|
|
15
|
+
const $ = load(html);
|
|
16
|
+
const sections = [];
|
|
17
|
+
let current = null;
|
|
18
|
+
const scope = $("article").length > 0 ? $("article") : $("main").length > 0 ? $("main") : $("body");
|
|
19
|
+
// Chrome/nav elements inside <article>/<main> (related-posts asides, breadcrumb nav,
|
|
20
|
+
// footer CTAs) inflate paragraph counts and confuse cross-reference detection.
|
|
21
|
+
// Skip any h2/h3/p/li that sits inside them.
|
|
22
|
+
const excludeSel = "nav, aside, footer, header, [role=navigation], .breadcrumbs, .breadcrumb";
|
|
23
|
+
const isExcluded = (el) => $(el).closest(excludeSel).length > 0;
|
|
24
|
+
scope.find("h2, h3, p, li").each((_, el) => {
|
|
25
|
+
if (isExcluded(el))
|
|
26
|
+
return;
|
|
27
|
+
const tag = el.tagName?.toLowerCase?.() ?? "";
|
|
28
|
+
const text = $(el).text().trim();
|
|
29
|
+
if (!text)
|
|
30
|
+
return;
|
|
31
|
+
if (tag === "h2" || tag === "h3") {
|
|
32
|
+
current = { heading: text, text: "", paragraphs: [] };
|
|
33
|
+
sections.push(current);
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
if (!current) {
|
|
37
|
+
current = { heading: "", text: "", paragraphs: [] };
|
|
38
|
+
sections.push(current);
|
|
39
|
+
}
|
|
40
|
+
current.paragraphs.push(text);
|
|
41
|
+
current.text = current.text ? `${current.text} ${text}` : text;
|
|
42
|
+
});
|
|
43
|
+
return sections;
|
|
44
|
+
}
|
|
45
|
+
function wordCount(text) {
|
|
46
|
+
const trimmed = text.trim();
|
|
47
|
+
if (!trimmed)
|
|
48
|
+
return 0;
|
|
49
|
+
return trimmed.split(/\s+/).length;
|
|
50
|
+
}
|
|
51
|
+
function analyzeSection(section, crossRefRules, maxParagraphWords) {
|
|
52
|
+
const reasons = [];
|
|
53
|
+
for (const { re, label } of crossRefRules) {
|
|
54
|
+
if (re.test(section.text)) {
|
|
55
|
+
reasons.push(`cross-references another section (${label})`);
|
|
56
|
+
break;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
if (section.heading && VAGUE_HEADING_PATTERNS.some((re) => re.test(section.heading))) {
|
|
60
|
+
reasons.push(`heading "${section.heading}" is too vague`);
|
|
61
|
+
}
|
|
62
|
+
const longParagraphs = section.paragraphs.filter((p) => wordCount(p) > maxParagraphWords).length;
|
|
63
|
+
if (longParagraphs > 0) {
|
|
64
|
+
reasons.push(`${longParagraphs} paragraph${longParagraphs === 1 ? "" : "s"} exceed ${maxParagraphWords} words`);
|
|
65
|
+
}
|
|
66
|
+
if (reasons.length === 0)
|
|
67
|
+
return null;
|
|
68
|
+
return { heading: section.heading || "(pre-heading)", reasons };
|
|
69
|
+
}
|
|
70
|
+
export function contentModularityRule(pages, options) {
|
|
71
|
+
const maxParagraphWords = options?.maxParagraphWords ?? 200;
|
|
72
|
+
const minRatio = options?.minSelfContainedRatio ?? 0.7;
|
|
73
|
+
const extraCrossRef = (options?.crossRefPatterns ?? []).map((re) => ({ re, label: "custom pattern" }));
|
|
74
|
+
const crossRefRules = [...DEFAULT_CROSS_REF_RULES, ...extraCrossRef];
|
|
75
|
+
const findings = [];
|
|
76
|
+
for (const page of pages) {
|
|
77
|
+
const sections = splitIntoSections(page.html).filter((s) => s.text.length > 0);
|
|
78
|
+
if (sections.length < 2)
|
|
79
|
+
continue;
|
|
80
|
+
const issues = [];
|
|
81
|
+
for (const section of sections) {
|
|
82
|
+
const issue = analyzeSection(section, crossRefRules, maxParagraphWords);
|
|
83
|
+
if (issue)
|
|
84
|
+
issues.push(issue);
|
|
85
|
+
}
|
|
86
|
+
const selfContainedRatio = 1 - issues.length / sections.length;
|
|
87
|
+
if (selfContainedRatio >= minRatio)
|
|
88
|
+
continue;
|
|
89
|
+
const examples = issues
|
|
90
|
+
.slice(0, 3)
|
|
91
|
+
.map((i) => `"${i.heading}": ${i.reasons.join(", ")}`)
|
|
92
|
+
.join(" | ");
|
|
93
|
+
findings.push({
|
|
94
|
+
ruleId: "aeo/content-modularity",
|
|
95
|
+
severity: "warning",
|
|
96
|
+
message: `${page.url} has ${issues.length}/${sections.length} sections that are not independently extractable. ` +
|
|
97
|
+
`Examples — ${examples}.`,
|
|
98
|
+
pageUrl: page.url,
|
|
99
|
+
fix: `Each section should answer one question completely without referencing other parts of the page. ` +
|
|
100
|
+
`(1) Replace "see above" / "as mentioned" with the actual information the reader needs. ` +
|
|
101
|
+
`(2) Rename vague H2s ("More Info", "Details") to specific topics (e.g. "California LLC Annual Report Requirements"). ` +
|
|
102
|
+
`(3) Break paragraphs longer than ${maxParagraphWords} words into 2–3 focused paragraphs with their own sub-headings.`,
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
return findings;
|
|
106
|
+
}
|
|
107
|
+
//# sourceMappingURL=content-modularity.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content-modularity.js","sourceRoot":"","sources":["../../../src/rules/aeo/content-modularity.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAiB/B,MAAM,uBAAuB,GAAmB;IAC9C,EAAE,EAAE,EAAE,yFAAyF,EAAE,KAAK,EAAE,sBAAsB,EAAE;IAChI,EAAE,EAAE,EAAE,+DAA+D,EAAE,KAAK,EAAE,gCAAgC,EAAE;IAChH,EAAE,EAAE,EAAE,gDAAgD,EAAE,KAAK,EAAE,mBAAmB,EAAE;IACpF,EAAE,EAAE,EAAE,kCAAkC,EAAE,KAAK,EAAE,kBAAkB,EAAE;CACtE,CAAC;AAEF,+EAA+E;AAC/E,mFAAmF;AACnF,kDAAkD;AAClD,MAAM,sBAAsB,GAAa;IACvC,qFAAqF;CACtF,CAAC;AAQF,SAAS,iBAAiB,CAAC,IAAY;IACrC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,IAAI,OAAO,GAAmB,IAAI,CAAC;IAEnC,MAAM,KAAK,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAEpG,qFAAqF;IACrF,+EAA+E;IAC/E,6CAA6C;IAC7C,MAAM,UAAU,GAAG,0EAA0E,CAAC;IAC9F,MAAM,UAAU,GAAG,CAAC,EAAW,EAAW,EAAE,CAAC,CAAC,CAAC,EAAW,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAE3F,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QACzC,IAAI,UAAU,CAAC,EAAE,CAAC;YAAE,OAAO;QAC3B,MAAM,GAAG,GAAI,EAA2B,CAAC,OAAO,EAAE,WAAW,EAAE,EAAE,IAAI,EAAE,CAAC;QACxE,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,IAAI,CAAC,IAAI;YAAE,OAAO;QAClB,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,KAAK,IAAI,EAAE,CAAC;YACjC,OAAO,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;YACtD,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACvB,OAAO;QACT,CAAC;QACD,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO,GAAG,EAAE,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;YACpD,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;QACD,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9B,OAAO,CAAC,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,IAAI,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IACjE,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,SAAS,CAAC,IAAY;IAC7B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,CAAC,CAAC;IACvB,OAAO,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;AACrC,CAAC;AAOD,SAAS,cAAc,CACrB,OAAgB,EAChB,aAA6B,EAC7B,iBAAyB;IAEzB,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,MAAM,EAAE,EAAE,EAAE,KAAK,EAAE,IAAI,aAAa,EAAE,CAAC;QAC1C,IAAI,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,IAAI,CAAC,qCAAqC,KAAK,GAAG,CAAC,CAAC;YAC5D,MAAM;QACR,CAAC;IACH,CAAC;IACD,IAAI,OAAO,CAAC,OAAO,IAAI,sBAAsB,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC;QACrF,OAAO,CAAC,IAAI,CAAC,YAAY,OAAO,CAAC,OAAO,gBAAgB,CAAC,CAAC;IAC5D,CAAC;IACD,MAAM,cAAc,GAAG,OAAO,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,iBAAiB,CAAC,CAAC,MAAM,CAAC;IACjG,IAAI,cAAc,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,IAAI,CAAC,GAAG,cAAc,aAAa,cAAc,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,WAAW,iBAAiB,QAAQ,CAAC,CAAC;IAClH,CAAC;IACD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,eAAe,EAAE,OAAO,EAAE,CAAC;AAClE,CAAC;AAED,MAAM,UAAU,qBAAqB,CACnC,KAAmB,EACnB,OAAkC;IAElC,MAAM,iBAAiB,GAAG,OAAO,EAAE,iBAAiB,IAAI,GAAG,CAAC;IAC5D,MAAM,QAAQ,GAAG,OAAO,EAAE,qBAAqB,IAAI,GAAG,CAAC;IACvD,MAAM,aAAa,GAAG,CAAC,OAAO,EAAE,gBAAgB,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,gBAAgB,EAAE,CAAC,CAAC,CAAC;IACvG,MAAM,aAAa,GAAG,CAAC,GAAG,uBAAuB,EAAE,GAAG,aAAa,CAAC,CAAC;IACrE,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC/E,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAElC,MAAM,MAAM,GAAmB,EAAE,CAAC;QAClC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,KAAK,GAAG,cAAc,CAAC,OAAO,EAAE,aAAa,EAAE,iBAAiB,CAAC,CAAC;YACxE,IAAI,KAAK;gBAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAChC,CAAC;QAED,MAAM,kBAAkB,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;QAC/D,IAAI,kBAAkB,IAAI,QAAQ;YAAE,SAAS;QAE7C,MAAM,QAAQ,GAAG,MAAM;aACpB,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;aACX,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,OAAO,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;aACrD,IAAI,CAAC,KAAK,CAAC,CAAC;QAEf,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,wBAAwB;YAChC,QAAQ,EAAE,SAAS;YACnB,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,QAAQ,MAAM,CAAC,MAAM,IAAI,QAAQ,CAAC,MAAM,oDAAoD;gBACvG,cAAc,QAAQ,GAAG;YAC3B,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EACD,kGAAkG;gBAClG,yFAAyF;gBACzF,uHAAuH;gBACvH,oCAAoC,iBAAiB,iEAAiE;SACzH,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { RuleResult } from "../../types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Default AI crawler user-agents to check. Ordered by prevalence.
|
|
4
|
+
*/
|
|
5
|
+
export declare const DEFAULT_AI_CRAWLERS: readonly ["GPTBot", "ChatGPT-User", "ClaudeBot", "PerplexityBot", "Bytespider", "Google-Extended", "CCBot", "Applebot-Extended"];
|
|
6
|
+
/**
|
|
7
|
+
* Parse robots.txt into a map of user-agent -> list of Disallow patterns.
|
|
8
|
+
* User-agent keys are lowercased for case-insensitive lookup.
|
|
9
|
+
* A blank Disallow value is treated as "allow all" and produces an empty array
|
|
10
|
+
* (matches the robots spec: "Disallow: " with no value means no restrictions).
|
|
11
|
+
*/
|
|
12
|
+
export declare function parseRobotsByUserAgent(robotsTxt: string): Map<string, string[]>;
|
|
13
|
+
/** True if the Disallow list includes a root block (`/`). */
|
|
14
|
+
export declare function isFullyDisallowed(patterns: string[] | undefined): boolean;
|
|
15
|
+
export interface CrawlerAccessOptions {
|
|
16
|
+
/** List of AI crawler user-agents to check. */
|
|
17
|
+
crawlers?: readonly string[];
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Warn per blocked AI crawler; escalate to error when all configured crawlers are blocked.
|
|
21
|
+
* Wildcard blocks (`User-agent: *` + `Disallow: /`) also count as blocking each named crawler
|
|
22
|
+
* unless the crawler has its own more-permissive block.
|
|
23
|
+
*/
|
|
24
|
+
export declare function crawlerAccessRule(robotsTxtContent: string, options?: CrawlerAccessOptions): RuleResult[];
|
|
25
|
+
//# sourceMappingURL=crawler-access.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"crawler-access.d.ts","sourceRoot":"","sources":["../../../src/rules/aeo/crawler-access.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEjD;;GAEG;AACH,eAAO,MAAM,mBAAmB,kIAStB,CAAC;AAEX;;;;;GAKG;AACH,wBAAgB,sBAAsB,CAAC,SAAS,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAsC/E;AAED,6DAA6D;AAC7D,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,SAAS,GAAG,OAAO,CAGzE;AAED,MAAM,WAAW,oBAAoB;IACnC,+CAA+C;IAC/C,QAAQ,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;CAC9B;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAC/B,gBAAgB,EAAE,MAAM,EACxB,OAAO,CAAC,EAAE,oBAAoB,GAC7B,UAAU,EAAE,CAkDd"}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Default AI crawler user-agents to check. Ordered by prevalence.
|
|
3
|
+
*/
|
|
4
|
+
export const DEFAULT_AI_CRAWLERS = [
|
|
5
|
+
"GPTBot",
|
|
6
|
+
"ChatGPT-User",
|
|
7
|
+
"ClaudeBot",
|
|
8
|
+
"PerplexityBot",
|
|
9
|
+
"Bytespider",
|
|
10
|
+
"Google-Extended",
|
|
11
|
+
"CCBot",
|
|
12
|
+
"Applebot-Extended",
|
|
13
|
+
];
|
|
14
|
+
/**
|
|
15
|
+
* Parse robots.txt into a map of user-agent -> list of Disallow patterns.
|
|
16
|
+
* User-agent keys are lowercased for case-insensitive lookup.
|
|
17
|
+
* A blank Disallow value is treated as "allow all" and produces an empty array
|
|
18
|
+
* (matches the robots spec: "Disallow: " with no value means no restrictions).
|
|
19
|
+
*/
|
|
20
|
+
export function parseRobotsByUserAgent(robotsTxt) {
|
|
21
|
+
const lines = robotsTxt.split(/\r?\n/);
|
|
22
|
+
const result = new Map();
|
|
23
|
+
let currentAgents = [];
|
|
24
|
+
let expectingRules = false;
|
|
25
|
+
for (const raw of lines) {
|
|
26
|
+
const line = raw.trim();
|
|
27
|
+
if (!line || line.startsWith("#"))
|
|
28
|
+
continue;
|
|
29
|
+
if (/^user-agent\s*:/i.test(line)) {
|
|
30
|
+
const ua = line.replace(/^user-agent\s*:\s*/i, "").trim().toLowerCase();
|
|
31
|
+
if (!expectingRules) {
|
|
32
|
+
// Stacking consecutive User-agent lines — they all share the next rule block.
|
|
33
|
+
currentAgents.push(ua);
|
|
34
|
+
}
|
|
35
|
+
else {
|
|
36
|
+
currentAgents = [ua];
|
|
37
|
+
expectingRules = false;
|
|
38
|
+
}
|
|
39
|
+
if (!result.has(ua))
|
|
40
|
+
result.set(ua, []);
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
if (/^(allow|disallow|crawl-delay|sitemap)\s*:/i.test(line)) {
|
|
44
|
+
expectingRules = true;
|
|
45
|
+
}
|
|
46
|
+
if (/^disallow\s*:/i.test(line)) {
|
|
47
|
+
const value = line.replace(/^disallow\s*:\s*/i, "").trim();
|
|
48
|
+
if (!value)
|
|
49
|
+
continue;
|
|
50
|
+
for (const agent of currentAgents) {
|
|
51
|
+
const bucket = result.get(agent);
|
|
52
|
+
if (bucket)
|
|
53
|
+
bucket.push(value);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return result;
|
|
58
|
+
}
|
|
59
|
+
/** True if the Disallow list includes a root block (`/`). */
|
|
60
|
+
export function isFullyDisallowed(patterns) {
|
|
61
|
+
if (!patterns)
|
|
62
|
+
return false;
|
|
63
|
+
return patterns.some((p) => p === "/" || p === "/*");
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Warn per blocked AI crawler; escalate to error when all configured crawlers are blocked.
|
|
67
|
+
* Wildcard blocks (`User-agent: *` + `Disallow: /`) also count as blocking each named crawler
|
|
68
|
+
* unless the crawler has its own more-permissive block.
|
|
69
|
+
*/
|
|
70
|
+
export function crawlerAccessRule(robotsTxtContent, options) {
|
|
71
|
+
if (!robotsTxtContent)
|
|
72
|
+
return [];
|
|
73
|
+
const crawlers = options?.crawlers ?? DEFAULT_AI_CRAWLERS;
|
|
74
|
+
const byAgent = parseRobotsByUserAgent(robotsTxtContent);
|
|
75
|
+
const wildcardBlocked = isFullyDisallowed(byAgent.get("*"));
|
|
76
|
+
const blocked = [];
|
|
77
|
+
for (const crawler of crawlers) {
|
|
78
|
+
const key = crawler.toLowerCase();
|
|
79
|
+
const ownBlock = byAgent.get(key);
|
|
80
|
+
if (ownBlock === undefined) {
|
|
81
|
+
// No explicit block for this agent — it falls back to the wildcard block.
|
|
82
|
+
if (wildcardBlocked)
|
|
83
|
+
blocked.push(crawler);
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
if (isFullyDisallowed(ownBlock))
|
|
87
|
+
blocked.push(crawler);
|
|
88
|
+
}
|
|
89
|
+
if (blocked.length === 0)
|
|
90
|
+
return [];
|
|
91
|
+
const findings = [];
|
|
92
|
+
const allBlocked = blocked.length === crawlers.length;
|
|
93
|
+
if (allBlocked) {
|
|
94
|
+
findings.push({
|
|
95
|
+
ruleId: "aeo/crawler-access",
|
|
96
|
+
severity: "error",
|
|
97
|
+
message: `robots.txt blocks all ${crawlers.length} configured AI crawlers: ${blocked.join(", ")}.`,
|
|
98
|
+
fix: `Blocking every AI crawler makes your pages invisible to answer engines. ` +
|
|
99
|
+
`Sites uncited in AI Overviews lose ~68% of traffic vs ~12% for cited sites. ` +
|
|
100
|
+
`Remove the Disallow rules for these crawlers unless you have a specific legal or competitive reason to block them.`,
|
|
101
|
+
});
|
|
102
|
+
return findings;
|
|
103
|
+
}
|
|
104
|
+
for (const crawler of blocked) {
|
|
105
|
+
findings.push({
|
|
106
|
+
ruleId: "aeo/crawler-access",
|
|
107
|
+
severity: "warning",
|
|
108
|
+
message: `robots.txt blocks ${crawler}.`,
|
|
109
|
+
fix: `Remove the "Disallow: /" directive for User-agent: ${crawler} in your robots.txt. ` +
|
|
110
|
+
`Blocking ${crawler} removes your pages from its answer engine's citation pool. ` +
|
|
111
|
+
`If selective blocking is intentional (e.g. admin routes only), narrow the Disallow pattern instead of blocking the whole site.`,
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
return findings;
|
|
115
|
+
}
|
|
116
|
+
//# sourceMappingURL=crawler-access.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"crawler-access.js","sourceRoot":"","sources":["../../../src/rules/aeo/crawler-access.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG;IACjC,QAAQ;IACR,cAAc;IACd,WAAW;IACX,eAAe;IACf,YAAY;IACZ,iBAAiB;IACjB,OAAO;IACP,mBAAmB;CACX,CAAC;AAEX;;;;;GAKG;AACH,MAAM,UAAU,sBAAsB,CAAC,SAAiB;IACtD,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC3C,IAAI,aAAa,GAAa,EAAE,CAAC;IACjC,IAAI,cAAc,GAAG,KAAK,CAAC;IAE3B,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;QACxB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QACxB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAE5C,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAClC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YACxE,IAAI,CAAC,cAAc,EAAE,CAAC;gBACpB,8EAA8E;gBAC9E,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACN,aAAa,GAAG,CAAC,EAAE,CAAC,CAAC;gBACrB,cAAc,GAAG,KAAK,CAAC;YACzB,CAAC;YACD,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;gBAAE,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;YACxC,SAAS;QACX,CAAC;QAED,IAAI,4CAA4C,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC5D,cAAc,GAAG,IAAI,CAAC;QACxB,CAAC;QAED,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAChC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YAC3D,IAAI,CAAC,KAAK;gBAAE,SAAS;YACrB,KAAK,MAAM,KAAK,IAAI,aAAa,EAAE,CAAC;gBAClC,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBACjC,IAAI,MAAM;oBAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,6DAA6D;AAC7D,MAAM,UAAU,iBAAiB,CAAC,QAA8B;IAC9D,IAAI,CAAC,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC5B,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC;AACvD,CAAC;AAOD;;;;GAIG;AACH,MAAM,UAAU,iBAAiB,CAC/B,gBAAwB,EACxB,OAA8B;IAE9B,IAAI,CAAC,gBAAgB;QAAE,OAAO,EAAE,CAAC;IAEjC,MAAM,QAAQ,GAAG,OAAO,EAAE,QAAQ,IAAI,mBAAmB,CAAC;IAC1D,MAAM,OAAO,GAAG,sBAAsB,CAAC,gBAAgB,CAAC,CAAC;IACzD,MAAM,eAAe,GAAG,iBAAiB,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAE5D,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,GAAG,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,0EAA0E;YAC1E,IAAI,eAAe;gBAAE,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAC3C,SAAS;QACX,CAAC;QACD,IAAI,iBAAiB,CAAC,QAAQ,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACzD,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEpC,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,KAAK,QAAQ,CAAC,MAAM,CAAC;IAEtD,IAAI,UAAU,EAAE,CAAC;QACf,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,yBAAyB,QAAQ,CAAC,MAAM,4BAA4B,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;YAClG,GAAG,EACD,0EAA0E;gBAC1E,8EAA8E;gBAC9E,oHAAoH;SACvH,CAAC,CAAC;QACH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,KAAK,MAAM,OAAO,IAAI,OAAO,EAAE,CAAC;QAC9B,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,qBAAqB,OAAO,GAAG;YACxC,GAAG,EACD,sDAAsD,OAAO,uBAAuB;gBACpF,YAAY,OAAO,8DAA8D;gBACjF,gIAAgI;SACnI,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
+
export interface FaqCoverageOptions {
|
|
3
|
+
/** URL substring/glob fragments that signal question intent. */
|
|
4
|
+
questionPatterns?: string[];
|
|
5
|
+
/** Minimum number of question-style headings to trigger the check (default: 2). */
|
|
6
|
+
minQuestionHeadings?: number;
|
|
7
|
+
}
|
|
8
|
+
export declare function faqCoverageRule(pages: ParsedPage[], options?: FaqCoverageOptions): RuleResult[];
|
|
9
|
+
//# sourceMappingURL=faq-coverage.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"faq-coverage.d.ts","sourceRoot":"","sources":["../../../src/rules/aeo/faq-coverage.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,MAAM,WAAW,kBAAkB;IACjC,gEAAgE;IAChE,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,mFAAmF;IACnF,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B;AA6CD,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,OAAO,CAAC,EAAE,kBAAkB,GAC3B,UAAU,EAAE,CA8Bd"}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
const DEFAULT_URL_PATTERNS = ["/how-to-", "/what-is-", "/guide-", "-faq", "/faq", "/questions"];
|
|
2
|
+
const QUESTION_STARTERS = /^\s*(how|what|why|when|where|who|can|does|do|is|are|should|which|will|could|would|may)\b/i;
|
|
3
|
+
function isQuestionHeading(heading) {
|
|
4
|
+
const trimmed = heading.trim();
|
|
5
|
+
if (!trimmed)
|
|
6
|
+
return false;
|
|
7
|
+
return trimmed.endsWith("?") || QUESTION_STARTERS.test(trimmed);
|
|
8
|
+
}
|
|
9
|
+
function hasFaqLikeSchema(entries) {
|
|
10
|
+
const stack = [...entries];
|
|
11
|
+
while (stack.length > 0) {
|
|
12
|
+
const node = stack.pop();
|
|
13
|
+
if (node === null || typeof node !== "object")
|
|
14
|
+
continue;
|
|
15
|
+
const obj = node;
|
|
16
|
+
const type = obj["@type"];
|
|
17
|
+
if (type === "FAQPage" || type === "HowTo" || type === "QAPage")
|
|
18
|
+
return true;
|
|
19
|
+
if (Array.isArray(type) && type.some((t) => t === "FAQPage" || t === "HowTo" || t === "QAPage")) {
|
|
20
|
+
return true;
|
|
21
|
+
}
|
|
22
|
+
for (const value of Object.values(obj)) {
|
|
23
|
+
if (Array.isArray(value)) {
|
|
24
|
+
for (const item of value)
|
|
25
|
+
stack.push(item);
|
|
26
|
+
}
|
|
27
|
+
else if (value !== null && typeof value === "object") {
|
|
28
|
+
stack.push(value);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
function urlLooksLikeFaq(url, patterns) {
|
|
35
|
+
try {
|
|
36
|
+
const path = new URL(url).pathname.toLowerCase();
|
|
37
|
+
return patterns.some((p) => path.includes(p));
|
|
38
|
+
}
|
|
39
|
+
catch {
|
|
40
|
+
const lower = url.toLowerCase();
|
|
41
|
+
return patterns.some((p) => lower.includes(p));
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
export function faqCoverageRule(pages, options) {
|
|
45
|
+
const patterns = options?.questionPatterns ?? DEFAULT_URL_PATTERNS;
|
|
46
|
+
const minQuestions = options?.minQuestionHeadings ?? 2;
|
|
47
|
+
const findings = [];
|
|
48
|
+
for (const page of pages) {
|
|
49
|
+
const questionHeadings = page.headings.h2.filter(isQuestionHeading);
|
|
50
|
+
const urlSignalsFaq = urlLooksLikeFaq(page.url, patterns);
|
|
51
|
+
if (!urlSignalsFaq && questionHeadings.length < minQuestions)
|
|
52
|
+
continue;
|
|
53
|
+
if (hasFaqLikeSchema(page.jsonLd))
|
|
54
|
+
continue;
|
|
55
|
+
const sampleList = questionHeadings.slice(0, 3).map((h) => `"${h.trim()}"`).join(", ");
|
|
56
|
+
const detail = questionHeadings.length > 0
|
|
57
|
+
? `${questionHeadings.length} question-style heading${questionHeadings.length === 1 ? "" : "s"}${sampleList ? ` (e.g. ${sampleList})` : ""}`
|
|
58
|
+
: `URL path matches an FAQ pattern`;
|
|
59
|
+
findings.push({
|
|
60
|
+
ruleId: "aeo/faq-coverage",
|
|
61
|
+
severity: "info",
|
|
62
|
+
message: `${page.url} contains FAQ-style content (${detail}) but no FAQPage/HowTo JSON-LD.`,
|
|
63
|
+
pageUrl: page.url,
|
|
64
|
+
fix: `Add FAQPage JSON-LD that mirrors the existing Q&A content. For pSEO templates, generate the ` +
|
|
65
|
+
`schema programmatically from the same data source that renders the headings — don't ship identical ` +
|
|
66
|
+
`questions with only the entity name swapped.`,
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
return findings;
|
|
70
|
+
}
|
|
71
|
+
//# sourceMappingURL=faq-coverage.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"faq-coverage.js","sourceRoot":"","sources":["../../../src/rules/aeo/faq-coverage.ts"],"names":[],"mappings":"AASA,MAAM,oBAAoB,GAAG,CAAC,UAAU,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,CAAC,CAAC;AAEhG,MAAM,iBAAiB,GACrB,2FAA2F,CAAC;AAE9F,SAAS,iBAAiB,CAAC,OAAe;IACxC,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;IAC/B,IAAI,CAAC,OAAO;QAAE,OAAO,KAAK,CAAC;IAC3B,OAAO,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,iBAAiB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,gBAAgB,CAAC,OAAkB;IAC1C,MAAM,KAAK,GAAc,CAAC,GAAG,OAAO,CAAC,CAAC;IACtC,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,EAAE,CAAC;QACzB,IAAI,IAAI,KAAK,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;YAAE,SAAS;QACxD,MAAM,GAAG,GAAG,IAA+B,CAAC;QAC5C,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;QAC1B,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,OAAO,IAAI,IAAI,KAAK,QAAQ;YAAE,OAAO,IAAI,CAAC;QAC7E,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,SAAS,IAAI,CAAC,KAAK,OAAO,IAAI,CAAC,KAAK,QAAQ,CAAC,EAAE,CAAC;YAChG,OAAO,IAAI,CAAC;QACd,CAAC;QACD,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;YACvC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;gBACzB,KAAK,MAAM,IAAI,IAAI,KAAK;oBAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7C,CAAC;iBAAM,IAAI,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;gBACvD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACpB,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,eAAe,CAAC,GAAW,EAAE,QAAkB;IACtD,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;QACjD,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IAChD,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,KAAK,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;QAChC,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IACjD,CAAC;AACH,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,OAA4B;IAE5B,MAAM,QAAQ,GAAG,OAAO,EAAE,gBAAgB,IAAI,oBAAoB,CAAC;IACnE,MAAM,YAAY,GAAG,OAAO,EAAE,mBAAmB,IAAI,CAAC,CAAC;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,gBAAgB,GAAG,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC;QACpE,MAAM,aAAa,GAAG,eAAe,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QAE1D,IAAI,CAAC,aAAa,IAAI,gBAAgB,CAAC,MAAM,GAAG,YAAY;YAAE,SAAS;QACvE,IAAI,gBAAgB,CAAC,IAAI,CAAC,MAAM,CAAC;YAAE,SAAS;QAE5C,MAAM,UAAU,GAAG,gBAAgB,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvF,MAAM,MAAM,GAAG,gBAAgB,CAAC,MAAM,GAAG,CAAC;YACxC,CAAC,CAAC,GAAG,gBAAgB,CAAC,MAAM,0BAA0B,gBAAgB,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,UAAU,CAAC,CAAC,CAAC,UAAU,UAAU,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;YAC5I,CAAC,CAAC,iCAAiC,CAAC;QAEtC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,kBAAkB;YAC1B,QAAQ,EAAE,MAAM;YAChB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,gCAAgC,MAAM,iCAAiC;YAC3F,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EACD,8FAA8F;gBAC9F,qGAAqG;gBACrG,8CAA8C;SACjD,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
+
export interface FreshnessOptions {
|
|
3
|
+
/** Flag pages with dateModified older than this many days. Default: 180. */
|
|
4
|
+
maxStaleDays?: number;
|
|
5
|
+
/** Clock override for deterministic testing. Default: Date.now(). */
|
|
6
|
+
now?: () => number;
|
|
7
|
+
}
|
|
8
|
+
export declare function freshnessSignalsRule(pages: ParsedPage[], options?: FreshnessOptions): RuleResult[];
|
|
9
|
+
//# sourceMappingURL=freshness-signals.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"freshness-signals.d.ts","sourceRoot":"","sources":["../../../src/rules/aeo/freshness-signals.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,MAAM,WAAW,gBAAgB;IAC/B,4EAA4E;IAC5E,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,qEAAqE;IACrE,GAAG,CAAC,EAAE,MAAM,MAAM,CAAC;CACpB;AAkED,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,UAAU,EAAE,EACnB,OAAO,CAAC,EAAE,gBAAgB,GACzB,UAAU,EAAE,CAsDd"}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import { load } from "cheerio";
|
|
2
|
+
const DAY_MS = 86_400_000;
|
|
3
|
+
/**
|
|
4
|
+
* Find dateModified-specific signals in JSON-LD (recursive). Keeps `dateModified` and
|
|
5
|
+
* `datePublished` separate so callers can require a true modification signal rather than
|
|
6
|
+
* accept "published once in 2020" as evidence of freshness.
|
|
7
|
+
*/
|
|
8
|
+
function findDatesInJsonLd(entries) {
|
|
9
|
+
let modified = null;
|
|
10
|
+
let published = null;
|
|
11
|
+
const stack = [...entries];
|
|
12
|
+
while (stack.length > 0) {
|
|
13
|
+
const node = stack.pop();
|
|
14
|
+
if (node === null || typeof node !== "object")
|
|
15
|
+
continue;
|
|
16
|
+
const obj = node;
|
|
17
|
+
const m = obj["dateModified"];
|
|
18
|
+
if (!modified && typeof m === "string" && m.length > 0)
|
|
19
|
+
modified = m;
|
|
20
|
+
const p = obj["datePublished"] ?? obj["dateCreated"];
|
|
21
|
+
if (!published && typeof p === "string" && p.length > 0)
|
|
22
|
+
published = p;
|
|
23
|
+
for (const value of Object.values(obj)) {
|
|
24
|
+
if (Array.isArray(value)) {
|
|
25
|
+
for (const item of value)
|
|
26
|
+
stack.push(item);
|
|
27
|
+
}
|
|
28
|
+
else if (value !== null && typeof value === "object") {
|
|
29
|
+
stack.push(value);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return { modified, published };
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Find modification-specific meta tags and <time> elements. Uses cheerio so attribute
|
|
37
|
+
* order does not matter (<meta content="..." property="...">` and `<meta property="..."
|
|
38
|
+
* content="...">` both work).
|
|
39
|
+
*/
|
|
40
|
+
function findDateModifiedInHtml(html) {
|
|
41
|
+
const $ = load(html);
|
|
42
|
+
const candidates = [
|
|
43
|
+
$('meta[property="article:modified_time"]').attr("content"),
|
|
44
|
+
$('meta[name="last-modified"]').attr("content"),
|
|
45
|
+
$('meta[name="dc.date.modified"]').attr("content"),
|
|
46
|
+
$("time[datetime]").attr("datetime"),
|
|
47
|
+
];
|
|
48
|
+
for (const value of candidates) {
|
|
49
|
+
if (typeof value === "string" && value.length > 0)
|
|
50
|
+
return value;
|
|
51
|
+
}
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
function hasVisibleUpdateSignal(contentText) {
|
|
55
|
+
return /\b(last\s+updated|updated\s+on|revised|last\s+modified)\b/i.test(contentText);
|
|
56
|
+
}
|
|
57
|
+
function parseDateSafe(value) {
|
|
58
|
+
if (!value)
|
|
59
|
+
return null;
|
|
60
|
+
const t = Date.parse(value);
|
|
61
|
+
return Number.isFinite(t) ? new Date(t) : null;
|
|
62
|
+
}
|
|
63
|
+
export function freshnessSignalsRule(pages, options) {
|
|
64
|
+
const maxStaleDays = options?.maxStaleDays ?? 180;
|
|
65
|
+
const now = options?.now ? options.now() : Date.now();
|
|
66
|
+
const findings = [];
|
|
67
|
+
for (const page of pages) {
|
|
68
|
+
const { modified: jsonLdModified, published: jsonLdPublished } = findDatesInJsonLd(page.jsonLd);
|
|
69
|
+
const htmlModified = findDateModifiedInHtml(page.html);
|
|
70
|
+
const visibleSignal = hasVisibleUpdateSignal(page.contentText);
|
|
71
|
+
// A true modification signal is `dateModified`, a modification meta tag, or visible
|
|
72
|
+
// "Last updated" text. `datePublished` alone (a page born in 2019 and never touched)
|
|
73
|
+
// is NOT a modification signal — fall through to the no-signal warning.
|
|
74
|
+
const hasModificationSignal = Boolean(jsonLdModified || htmlModified || visibleSignal);
|
|
75
|
+
if (!hasModificationSignal) {
|
|
76
|
+
findings.push({
|
|
77
|
+
ruleId: "aeo/freshness-signals",
|
|
78
|
+
severity: "warning",
|
|
79
|
+
message: `${page.url} has no dateModified signal (no JSON-LD dateModified, no modification meta tag, no visible "Last updated").`,
|
|
80
|
+
pageUrl: page.url,
|
|
81
|
+
fix: `Add a freshness signal so AI engines know the page is current. Three recommended places: ` +
|
|
82
|
+
`(1) dateModified in your JSON-LD schema, ` +
|
|
83
|
+
`(2) a visible "Last updated: YYYY-MM-DD" line in the page content, ` +
|
|
84
|
+
`(3) accurate <lastmod> in your sitemap. ` +
|
|
85
|
+
`For pSEO templates, automate dateModified to update when your underlying data source changes.`,
|
|
86
|
+
});
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
const best = parseDateSafe(jsonLdModified) ??
|
|
90
|
+
parseDateSafe(htmlModified) ??
|
|
91
|
+
parseDateSafe(jsonLdPublished) ??
|
|
92
|
+
parseDateSafe(page.publishedDate);
|
|
93
|
+
if (best) {
|
|
94
|
+
const ageDays = Math.floor((now - best.getTime()) / DAY_MS);
|
|
95
|
+
if (ageDays > maxStaleDays) {
|
|
96
|
+
findings.push({
|
|
97
|
+
ruleId: "aeo/freshness-signals",
|
|
98
|
+
severity: "info",
|
|
99
|
+
message: `${page.url} was last updated ${ageDays} days ago (threshold: ${maxStaleDays}).`,
|
|
100
|
+
pageUrl: page.url,
|
|
101
|
+
fix: `AI engines prioritize fresh content for citation. If the page is still accurate, ` +
|
|
102
|
+
`refresh the visible date and bump dateModified. If information has changed, update the body accordingly.`,
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return findings;
|
|
108
|
+
}
|
|
109
|
+
//# sourceMappingURL=freshness-signals.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"freshness-signals.js","sourceRoot":"","sources":["../../../src/rules/aeo/freshness-signals.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAU/B,MAAM,MAAM,GAAG,UAAU,CAAC;AAO1B;;;;GAIG;AACH,SAAS,iBAAiB,CAAC,OAAkB;IAC3C,IAAI,QAAQ,GAAkB,IAAI,CAAC;IACnC,IAAI,SAAS,GAAkB,IAAI,CAAC;IACpC,MAAM,KAAK,GAAc,CAAC,GAAG,OAAO,CAAC,CAAC;IACtC,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,EAAE,CAAC;QACzB,IAAI,IAAI,KAAK,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;YAAE,SAAS;QACxD,MAAM,GAAG,GAAG,IAA+B,CAAC;QAC5C,MAAM,CAAC,GAAG,GAAG,CAAC,cAAc,CAAC,CAAC;QAC9B,IAAI,CAAC,QAAQ,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC;YAAE,QAAQ,GAAG,CAAC,CAAC;QACrE,MAAM,CAAC,GAAG,GAAG,CAAC,eAAe,CAAC,IAAI,GAAG,CAAC,aAAa,CAAC,CAAC;QACrD,IAAI,CAAC,SAAS,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS,GAAG,CAAC,CAAC;QACvE,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;YACvC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;gBACzB,KAAK,MAAM,IAAI,IAAI,KAAK;oBAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7C,CAAC;iBAAM,IAAI,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;gBACvD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACpB,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;AACjC,CAAC;AAED;;;;GAIG;AACH,SAAS,sBAAsB,CAAC,IAAY;IAC1C,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,MAAM,UAAU,GAAG;QACjB,CAAC,CAAC,wCAAwC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;QAC3D,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;QAC/C,CAAC,CAAC,+BAA+B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;QAClD,CAAC,CAAC,gBAAgB,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC;KACrC,CAAC;IACF,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;QAC/B,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;IAClE,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,sBAAsB,CAAC,WAAmB;IACjD,OAAO,4DAA4D,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;AACxF,CAAC;AAED,SAAS,aAAa,CAAC,KAAgC;IACrD,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IACxB,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC5B,OAAO,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AACjD,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,KAAmB,EACnB,OAA0B;IAE1B,MAAM,YAAY,GAAG,OAAO,EAAE,YAAY,IAAI,GAAG,CAAC;IAClD,MAAM,GAAG,GAAG,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC;IACtD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,EAAE,QAAQ,EAAE,cAAc,EAAE,SAAS,EAAE,eAAe,EAAE,GAAG,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChG,MAAM,YAAY,GAAG,sBAAsB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,aAAa,GAAG,sBAAsB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAE/D,oFAAoF;QACpF,qFAAqF;QACrF,wEAAwE;QACxE,MAAM,qBAAqB,GAAG,OAAO,CAAC,cAAc,IAAI,YAAY,IAAI,aAAa,CAAC,CAAC;QAEvF,IAAI,CAAC,qBAAqB,EAAE,CAAC;YAC3B,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,uBAAuB;gBAC/B,QAAQ,EAAE,SAAS;gBACnB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,6GAA6G;gBACjI,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EACD,2FAA2F;oBAC3F,2CAA2C;oBAC3C,qEAAqE;oBACrE,0CAA0C;oBAC1C,+FAA+F;aAClG,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,MAAM,IAAI,GACR,aAAa,CAAC,cAAc,CAAC;YAC7B,aAAa,CAAC,YAAY,CAAC;YAC3B,aAAa,CAAC,eAAe,CAAC;YAC9B,aAAa,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAEpC,IAAI,IAAI,EAAE,CAAC;YACT,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,GAAG,MAAM,CAAC,CAAC;YAC5D,IAAI,OAAO,GAAG,YAAY,EAAE,CAAC;gBAC3B,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,uBAAuB;oBAC/B,QAAQ,EAAE,MAAM;oBAChB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,qBAAqB,OAAO,yBAAyB,YAAY,IAAI;oBACzF,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EACD,mFAAmF;wBACnF,0GAA0G;iBAC7G,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { RuleResult } from "../../types.js";
|
|
2
|
+
export interface LlmsTxtFetcher {
|
|
3
|
+
(url: string): Promise<string | null>;
|
|
4
|
+
}
|
|
5
|
+
/**
|
|
6
|
+
* Minimal shape check for the emerging llms.txt convention:
|
|
7
|
+
* - First non-empty line is an `# H1` title
|
|
8
|
+
* - At least one `## ` section heading
|
|
9
|
+
* - At least one markdown link line under a section
|
|
10
|
+
* Deliberately lenient: the spec is still evolving, so only reject obvious garbage.
|
|
11
|
+
*/
|
|
12
|
+
export declare function validateLlmsTxt(content: string): {
|
|
13
|
+
valid: boolean;
|
|
14
|
+
reason?: string;
|
|
15
|
+
};
|
|
16
|
+
/**
|
|
17
|
+
* Check for /llms.txt at the origin. Site-level rule — runs once, not per page.
|
|
18
|
+
*/
|
|
19
|
+
export interface LlmsTxtRuleOptions {
|
|
20
|
+
/** Timeout in ms for the /llms.txt fetch. Default: 10 000. */
|
|
21
|
+
timeoutMs?: number;
|
|
22
|
+
}
|
|
23
|
+
export declare function llmsTxtRule(source: string, fetcherOrOptions?: LlmsTxtFetcher | LlmsTxtRuleOptions): Promise<RuleResult[]>;
|
|
24
|
+
//# sourceMappingURL=llms-txt.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llms-txt.d.ts","sourceRoot":"","sources":["../../../src/rules/aeo/llms-txt.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEjD,MAAM,WAAW,cAAc;IAC7B,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;CACvC;AAkBD;;;;;;GAMG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,CAgCpF;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,8DAA8D;IAC9D,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,wBAAsB,WAAW,CAC/B,MAAM,EAAE,MAAM,EACd,gBAAgB,GAAE,cAAc,GAAG,kBAAuB,GACzD,OAAO,CAAC,UAAU,EAAE,CAAC,CAqCvB"}
|